# 4_Density plots for data spread analysis

This script allows to visualize the measurement distribution.

## 1) Import packages

In [None]:
import json
import os
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import shapiro

### 1a) Define the functions

In [None]:
def process_excel_file(file_path, experiment_data):
    measurement_data = pd.read_excel(file_path)
    negative_control_wells = list(experiment_data["negative_controls"].keys())
    negative_control_data = measurement_data[measurement_data["Well"].isin(negative_control_wells)]

    statistic, p_value = shapiro(negative_control_data["Measurement"])

    if p_value > 0.05:
        return "Normal"
    else:
        return "Non-Normal"

## 2) Create JSON file with information on different treatments and negative controls

You can create a JSON file in Python to store information about the different treatments and negative controls in a 96-well plate experiment. 

In [None]:
data = {
    "treatments": {
        "well_1": "Treatment A",
        "well_2": "Treatment B",
        "well_3": "Treatment C",
    },
    "negative_controls": {
        "well_10": "Control 1",
        "well_20": "Control 2",
        "well_30": "Control 3",
    }
}

with open("experiment_data.json", "w") as json_file:
    json.dump(data, json_file, indent=4)

print("JSON file created successfully.")

## 3) Start to analyze the distribution

In [None]:
distribution_types = []

directory_path = "path_to_directory_containing_excel_files"

for filename in os.listdir(directory_path):
    if filename.endswith(".xlsx"):
        file_path = os.path.join(directory_path, filename)

        with open("experiment_data.json", "r") as json_file:
            experiment_data = json.load(json_file)

        distribution_type = process_excel_file(file_path, experiment_data)
        distribution_types.append(distribution_type)

distribution_counts = {distribution: distribution_types.count(distribution) for distribution in set(distribution_types)}

plt.bar(distribution_counts.keys(), distribution_counts.values())
plt.xlabel("Distribution Type")
plt.ylabel("Count")
plt.title("Overall Distribution of Negative Controls")
plt.show()