# Retinal Organoid SpontaneousBurst

- Spontaneous bursts has been extracted by Neuroexplorer into an excel file

In [None]:
## Read excel file of Nex-export
import os
import pandas as pd

directory_path = r'C:\Baseline'
excel_files = [f for f in os.listdir(directory_path) if f.endswith('.xlsx')]
all_cleaned_data = {}

for file in excel_files:
    file_path = os.path.join(directory_path, file)
    sheet_names = pd.ExcelFile(file_path).sheet_names
    cleaned_data = {}
    for sheet in sheet_names:
        if sheet == "Sheet1":
            continue
        df = pd.read_excel(file_path, sheet_name=sheet)
        cleaned_df = df[df.iloc[:, 1] != 'nr_001']
        cleaned_data[sheet] = cleaned_df
    all_cleaned_data[file] = cleaned_data

output_directory_path = r'C:\Baseline_cleaned'
os.makedirs(output_directory_path, exist_ok=True)

for file, cleaned_data in all_cleaned_data.items():
    if file.startswith("01"):
        new_file_name = "02" + file[2:]
    else:
        new_file_name = file
    output_file_path = os.path.join(output_directory_path, new_file_name)
    with pd.ExcelWriter(output_file_path) as writer:
        for sheet, cleaned_df in cleaned_data.items():
            cleaned_df.to_excel(writer, sheet_name=sheet, index=False)

In [None]:
import pandas as pd
import glob
import os

directory_path = r'C:\Baseline_cleaned'
threshold = 0.1
column_name = 'Mean Freq.'
excel_files = [f for f in os.listdir(directory_path) if f.endswith('.xlsx')]
all_cleaned_data = {}

for file in excel_files:
    file_path = os.path.join(directory_path, file)
    sheet_names = pd.ExcelFile(file_path).sheet_names
    cleaned_data = {}
    for sheet in sheet_names:
        df = pd.read_excel(file_path, sheet_name=sheet)
        if column_name in df.columns:
            cleaned_df = df[df[column_name] >= threshold]
        else:
            cleaned_df = df
        cleaned_data[sheet] = cleaned_df
    all_cleaned_data[file] = cleaned_data

output_directory_path = r'C:\Baseline_SFFilter'
os.makedirs(output_directory_path, exist_ok=True)
for file, cleaned_data in all_cleaned_data.items():
    if file.startswith("02_Burst"):
        new_file_name = "03_SF" + file[2:]
    else:
        new_file_name = file
    output_file_path = os.path.join(output_directory_path, new_file_name)
    with pd.ExcelWriter(output_file_path) as writer:
        for sheet, cleaned_df in cleaned_data.items():
            cleaned_df.to_excel(writer, sheet_name=sheet, index=False)

In [None]:
import pandas as pd
import numpy as np

excel_file_path = r'C:\Baseline_SFFilter.xlsx'
xls = pd.ExcelFile(excel_file_path)
variables = ['Mean Freq.']
data = {var: {} for var in variables}
for sheet_name in xls.sheet_names:
    df = pd.read_excel(excel_file_path, sheet_name=sheet_name)
    for var in variables:
        if var in df.columns:
            data[var][sheet_name] = df[var].values

final_data_frames = {}
for var in variables:
    arrays = [data[var][sheet] for sheet in data[var]]
    max_length = max(len(arr) for arr in arrays)
    padded_arrays = [np.pad(arr.astype(float), (0, max_length - len(arr)), mode='constant', constant_values=np.nan) for arr in arrays]
    final_data_frames[var] = pd.DataFrame(padded_arrays, columns=range(max_length), index=data[var].keys()).transpose()

output_file_path = r'C:\Baseline_Avg.xlsx'
with pd.ExcelWriter(output_file_path) as writer:
    for var, df in final_data_frames.items():
        df.to_excel(writer, sheet_name=var, index=False)

In [None]:
import pandas as pd
import numpy as np

excel_file_path = r'C:\Baseline_Avg.xlsx'
xls = pd.ExcelFile(excel_file_path)
output_file_path = r'C:\Baseline_Avg2.xlsx'
with pd.ExcelWriter(output_file_path) as writer:
    for sheet_name in xls.sheet_names:
        df = pd.read_excel(excel_file_path, sheet_name=sheet_name, header=0)
        df = df.apply(pd.to_numeric, errors='coerce')
        stats_data = pd.DataFrame()
        for sample_name in df.columns:
            data = df[sample_name]
            mean = data.mean()
            count = data.count()
            median = data.median()
            max_val = data.max()
            min_val = data.min()
            std = data.std()
            sem = data.sem()

            stat_df = pd.DataFrame({
                'Sample Name': [sample_name],
                'Average': [mean],
                'Count': [count],
                'Median': [median],
                'Max': [max_val],
                'Min': [min_val],
                'SD': [std],
                'SEM': [sem]
            })
            stats_data = pd.concat([stats_data, stat_df], ignore_index=True)
        stats_data.to_excel(writer, sheet_name=f'{sheet_name}', index=False)

In [None]:
# Groupping
import pandas as pd

file_path = r"C:\Baseline_Avg2.xlsx"
xls = pd.ExcelFile(file_path)
processed_sheets = {}
for sheet_name in xls.sheet_names:
    df = pd.read_excel(xls, sheet_name)
    df['Week'] = df['Sample Name'].str[:3]
    df['Group'] = df['Sample Name'].apply(lambda x: 'Vascularized' if 'VASCU' in x else 'Non_Vascularized')
    new_columns = ['Group', 'Week'] + [col for col in df.columns if col not in ['Group', 'Week']]
    df = df[new_columns]
    processed_sheets[sheet_name] = df
output_file_path = r"C:\Baseline_Groupped.xlsx"
with pd.ExcelWriter(output_file_path) as writer:
    for sheet_name, df in processed_sheets.items():
        df.to_excel(writer, sheet_name=sheet_name, index=False)

In [None]:
## Remove outliers, Plot, save for Prism import
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

colors = {'Non_Vascularized': 'gray', 'Vascularized': 'lightcoral'}

file_path = r"C:\Baseline_Groupped.xlsx"
xls = pd.ExcelFile(file_path)

def remove_outliers(df, column):
    def _remove_outliers(group):
        Q1 = group[column].quantile(0.25)
        Q3 = group[column].quantile(0.75)
        IQR = Q3 - Q1
        return group[~((group[column] < (Q1 - 3 * IQR)) | (group[column] > (Q3 + 5 * IQR)))]
    return df.groupby(['Group', 'Week']).apply(_remove_outliers).reset_index(drop=True)

def calculate_group_statistics(df):
    group_stats = df.groupby(['Week', 'Group'])['Average'].agg(['mean', 'std', 'sem', 'max', 'min', 'count']).reset_index()
    group_counts = df.groupby('Group')['Average'].count().reset_index()
    group_stats['Count'] = group_counts['Average']
    return group_stats

processed_data = {}
variable_stats_raw = {}
variable_stats_cleaned = {}
for sheet_name in xls.sheet_names:
    df = pd.read_excel(xls, sheet_name)
    df = df.replace([np.inf, -np.inf], np.nan).dropna(subset=['Week', 'Average'])
    group_stats_raw = calculate_group_statistics(df)
    variable_stats_raw[sheet_name] = group_stats_raw
    # --------------------- Remove outliers --------------------
    df_cleaned = remove_outliers(df, 'Average')
    processed_data[sheet_name] = df_cleaned
    group_stats = calculate_group_statistics(df_cleaned)
    variable_stats_cleaned[sheet_name] = group_stats
    group_order = ['Non_Vascularized', 'Vascularized']
    plt.figure(figsize=(7, 6)) 
    ax=sns.boxplot(data=df_cleaned, x='Week', y='Average', hue='Group', 
                order=['w17', 'w20', 'w23'],
                hue_order=group_order, palette=colors, showfliers=False, width=0.5)  
    sns.stripplot(data=df_cleaned, x='Week', y='Average', hue='Group', 
                  order=['w17', 'w20', 'w23'],
                  hue_order=group_order, dodge=True, jitter=True, edgecolor='black', linewidth=0.5, palette=colors, 
                  alpha=0.7, size=4, ax=ax)  
    means = df_cleaned.groupby(['Week', 'Group'])['Average'].mean().reset_index()
    for group, color in colors.items():
        plt.plot(means[means['Group'] == group]['Week'], means[means['Group'] == group]['Average'], 
                 color='black', linestyle='-', linewidth=1, marker='o', markersize=3, markeredgecolor='black',
                 markeredgewidth=2)  

    for group, color in colors.items():
        sems = df_cleaned.groupby(['Week', 'Group'])['Average'].sem().reset_index()
        group_sems = sems[sems['Group'] == group]
        for dpi in ['w17', 'w20', 'w23']:
            dpi_sems = group_sems[group_sems['Week'] == dpi]
            plt.errorbar(dpi_sems['Week'], means[(means['Group'] == group) & (means['Week'] == dpi)]['Average'], 
                         yerr=dpi_sems['Average'], fmt='o', color=color, markersize=10, capsize=5, 
                         markeredgecolor='black', linewidth=1, ecolor='black') 
    
    plt.xticks(rotation=45, fontsize=16)
    
    # Set the y-axis to log scale and fixed maximum
    #ax.set_yscale('log')
    #ax.set_ylim(1e0, 1e2)  # Adjust the limits according to your data
    
    plt.legend(title='Group', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.xlabel('Week', fontsize=16)  
    plt.ylabel('Average', fontsize=18)  
    plt.title(f'{sheet_name}', fontsize=18)  
    plt.yticks(fontsize=16)  
    plt.tight_layout()
    
    output_directory= r'C:\007_Stats'
    pdf_filename = f"{output_directory}/Ex02Ex04_{sheet_name}.pdf"
    plt.savefig(pdf_filename, format='pdf')
    plt.show()

output_excel_path = r"C:\Baseline_Stat.xlsx"
with pd.ExcelWriter(output_excel_path) as writer:
    for variable, stats in variable_stats_raw.items():
        stats.to_excel(writer, sheet_name=f"{variable}_raw", index=False)
    for variable, stats in variable_stats_cleaned.items():
        stats.to_excel(writer, sheet_name=f"{variable}_clean", index=False)
output_excel_path_cleaned = r"C:\Baseline_OutliersRemoved.xlsx"
with pd.ExcelWriter(output_excel_path_cleaned) as writer_cleaned:
    for sheet_name, data in processed_data.items():
        data.to_excel(writer_cleaned, sheet_name=sheet_name, index=False)  