In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
from scipy import stats
import warnings
import pandas as pd
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
import matplotlib.pyplot as plt
import seaborn as sns
import re
import glob
!pip install xlsxwriter
pd.options.mode.chained_assignment = None  # default='warn'



In [2]:
def custom_sort(item):
    if item.endswith('basal'):
        return 0
    else:
        return int(item.split('uM')[0].split('_')[-1])

In [6]:
currdir= os.getcwd()
parent = os.path.dirname(currdir)
gparent=os.path.dirname(parent)
lines_to_skip = 10 # adjust

# count the number of header lines
header_lines = 3 # adjust
EXPERIMENT_TIME = "1_MIN"
plate_type = 'DIV19'

PERTURBATION = "CNO"
firstTableHeading = "Number of Spikes"

FOLDER_PATH = f"{parent}/example_data/Div19_CNO/Div19_1minute" #
folder_dict = {}
balances= {"basal": "",
        "dose1": "",
        "dose2": "",
        "dose3": "",
        "dose4": ""
        }
## WALK Through folder of interest
for dirpath, dirnames, filenames in os.walk(FOLDER_PATH):
    if dirpath != FOLDER_PATH:
        folder_dict[dirpath.split("/")[-1]] = dirpath
    elif not dirnames:
        folder_dict['basal'] = dirpath

## get doses names in increasing order 
dose_names = [name for name in folder_dict.keys() if 'basal' not in name]
dose_names.sort(key=custom_sort)

# Initialize doses dictionaryd
balances = {"basal": "basal"}

## map sorted doses
for i, dose_name in enumerate(dose_names, start=1):
    balances[f"dose{i}"] = dose_name.split("_")[-1]

# Output the updated doses dictionary
print("Balances:", balances)
folder_dict

Balances: {'basal': 'basal', 'dose1': '5uMCNO', 'dose2': '10uMCNO', 'dose3': '15uMCNO', 'dose4': '20uMCNO'}


{'Div19_5uMCNO': '/home/poojaparameswaran/Documents/SoderlingLab/MultipleElectrodeAnalysisANOVA/example_data/Div19_CNO/Div19_1minute/Div19_5uMCNO',
 'Div19_10uMCNO': '/home/poojaparameswaran/Documents/SoderlingLab/MultipleElectrodeAnalysisANOVA/example_data/Div19_CNO/Div19_1minute/Div19_10uMCNO',
 'Div19_basal': '/home/poojaparameswaran/Documents/SoderlingLab/MultipleElectrodeAnalysisANOVA/example_data/Div19_CNO/Div19_1minute/Div19_basal',
 'Div19_20uMCNO': '/home/poojaparameswaran/Documents/SoderlingLab/MultipleElectrodeAnalysisANOVA/example_data/Div19_CNO/Div19_1minute/Div19_20uMCNO',
 'Div19_15uMCNO': '/home/poojaparameswaran/Documents/SoderlingLab/MultipleElectrodeAnalysisANOVA/example_data/Div19_CNO/Div19_1minute/Div19_15uMCNO'}

In [7]:
def read_csvs_to_dict(folder, lines_to_skip=10):
    files = glob.glob(os.path.join(folder, "*.csv"))
    data_dict = {}
    for file in files:
        # Extracting the plate number from the filename
        plate_num = re.search(r'plate\s?(\d+)', file, re.IGNORECASE)
        if plate_num:
            plate_key = f"plate_{plate_num.group(1)}"
            df = pd.read_csv(file, skiprows = lines_to_skip)
            df = df.drop(columns=['Unnamed: 9'])
            data_dict[plate_key] = df
    return data_dict
all_files_dict = {}

for dose, folder in folder_dict.items():
    all_files_dict[dose] = read_csvs_to_dict(folder)
    
for dose, files_dict in all_files_dict.items():
    print(dose, files_dict.keys())

Div19_5uMCNO dict_keys(['plate_1', 'plate_3'])
Div19_10uMCNO dict_keys(['plate_1', 'plate_3'])
Div19_basal dict_keys(['plate_1', 'plate_3'])
Div19_20uMCNO dict_keys(['plate_3', 'plate_1'])
Div19_15uMCNO dict_keys(['plate_3', 'plate_1'])


In [8]:
balances

{'basal': 'basal',
 'dose1': '5uMCNO',
 'dose2': '10uMCNO',
 'dose3': '15uMCNO',
 'dose4': '20uMCNO'}

In [9]:
def concat_plates(plates_dict, firstHeading):
    experiment = {}
    testparams_concat = {}
    for plate, df in plates_dict.items():
        maxrows = df.shape[0]
        vals_between_tables = 16
        titles = [heading_ind for heading_ind in range(14, maxrows+16, 16)]
        titles.insert(0,0)
        # # Empty dictionary to store dataframes
        # Loop through start indices
        for ind in range(len(titles) -1):
            if ind == 0:
                ## Handle mean firing rate
                key = firstHeading
                table = df.iloc[titles[ind] : titles[ind +1], :]
                # table = rename_row_names(table)
            elif ind != 0:
                table = df.iloc[titles[ind] : titles[ind +1], :]
                key = table.iloc[0][0] # just get name
                table = df.iloc[titles[ind] +2: titles[ind +1], :]

            table.set_index('Unnamed: 0', inplace=True)
            if key not in experiment:
                experiment[key] = []
            experiment[key].append(table)
    
    for testparam, df_list in experiment.items():
        concated_table = pd.concat(df_list, ignore_index=False)
        testparams_concat[testparam] = concated_table
    
    testparams_concat = {key.strip(): value for key, value in testparams_concat.items()}
    return testparams_concat
doses = {}
for dose, plates in all_files_dict.items():
    doses[dose] = concat_plates(plates, firstTableHeading)

doses.keys()

dict_keys(['Div19_5uMCNO', 'Div19_10uMCNO', 'Div19_basal', 'Div19_20uMCNO', 'Div19_15uMCNO'])

In [10]:
def remove_mean_SEM(test_dict, chemical_type):
    for key, value in test_dict.items():
        to_drop = [index for index in ['B Mean', 'B SEM'] if index in value.index]
        if to_drop:
            test_dict[key] = value.drop(index=to_drop)
        test_dict[key]['Chemical_Type']=chemical_type
    return test_dict
for dose, parameter_dict in doses.items():
    doses[dose] = remove_mean_SEM(parameter_dict, dose)


In [11]:
def rename_row_names(experiment_df):
    # 'Unnamed: 0' is a column
    experiment_df = experiment_df.reset_index()
    experiment_df = experiment_df.drop(columns="Unnamed: 0", errors="ignore")
    experiment_df.index = ["Replicate{}".format(i+1) for i in range(len(experiment_df))]
    return experiment_df
for dose, test_dict in doses.items():
    for test_name, df in test_dict.items():
        test_dict[test_name] = rename_row_names(df)
    doses[dose] = test_dict

In [13]:
doses.keys()

dict_keys(['Div19_5uMCNO', 'Div19_10uMCNO', 'Div19_basal', 'Div19_20uMCNO', 'Div19_15uMCNO'])

In [14]:
balances.keys(), balances.values(), doses.keys()

(dict_keys(['basal', 'dose1', 'dose2', 'dose3', 'dose4']),
 dict_values(['basal', '5uMCNO', '10uMCNO', '15uMCNO', '20uMCNO']),
 dict_keys(['Div19_5uMCNO', 'Div19_10uMCNO', 'Div19_basal', 'Div19_20uMCNO', 'Div19_15uMCNO']))

In [15]:
dmapped = {}
for bkey, bval in balances.items():
    for dkey, dval in doses.items():
        if bval == dkey.split("_")[-1]:
            dmapped[bkey] = dval

list(map(lambda x: dmapped[x]['Mean Firing Rate (Hz)']['Chemical_Type'].unique(), dmapped.keys()))

[array(['Div19_basal'], dtype=object),
 array(['Div19_5uMCNO'], dtype=object),
 array(['Div19_10uMCNO'], dtype=object),
 array(['Div19_15uMCNO'], dtype=object),
 array(['Div19_20uMCNO'], dtype=object)]

In [16]:
for dose, tdict in dmapped.items():
    print(dose)

basal
dose1
dose2
dose3
dose4


In [17]:
def impute_nans(testparam, table):
    chem_types = table['Chemical_Type']
    table = table.drop(columns=['Chemical_Type'])
    original_index = table.index
    imputer = IterativeImputer(max_iter=100, random_state=0)
    imputed_data = imputer.fit_transform(table)

    # Convert the imputed data back to a DataFrame
    imputed_df = pd.DataFrame(imputed_data, columns=table.columns, index=original_index)
    imputed_df['Chemical_Type'] = chem_types
    return imputed_df
dmappedI = {}
for dose, tdict in dmapped.items():
    dmappedI[dose] = {testname: impute_nans(testname, table) for testname, table in tdict.items() if not table.loc[:, table.columns != 'Chemical_Type'].isna().all().all()} 




In [20]:
def perform_t_test(condition_dict):
    tests = {}
    for testname in condition_dict.keys():
        tests[testname] = {}
        table = condition_dict[testname].copy()  # Create a deep copy
        rows_to_drop = [row for row in ['B Mean', 'B SEM', 'B Std'] if any(row.lower() == existing_row.lower() for existing_row in table.index)]
        if rows_to_drop:
            table.drop(rows_to_drop, axis=0, inplace=True)
        test_variables = [col for col in table.columns if "unt" not in col.lower() and "chemical" not in col.lower()]
        control_name = "unt2"
        for testitem in test_variables:
            exp_group = list(table[testitem].astype(float))
            control_group = list(table[control_name].astype(float))
            with warnings.catch_warnings(record=True) as w:
                warnings.simplefilter("always")  # Cause all warnings to always be triggered
                t_stat, pval = stats.ttest_ind(exp_group, control_group)
                if len(w) > 0:
                    tests[testname][testitem] = [t_stat, pval, 'Warning: ' + str(w[-1].message)]
                else:
                    tests[testname][testitem] = [t_stat, pval]
    return tests

dmapped_ttest = {}
for key, testdict in dmappedI.items():
    dmapped_ttest[key] = perform_t_test(testdict)
dmapped_ttest.keys()

dict_keys(['basal', 'dose1', 'dose2', 'dose3', 'dose4'])

In [36]:
balances

{'basal': 'basal',
 'dose1': '5uMCNO',
 'dose2': '10uMCNO',
 'dose3': '15uMCNO',
 'dose4': '20uMCNO'}

In [21]:
def ensure_dirs_exists(path):
    directory = os.path.dirname(path)  # Get the directory part of the file path
    if not os.path.exists(directory):
        os.makedirs(directory)
    return

In [22]:
FOLDER_PATH.split("/")[-1]

'Div19_1minute'

In [38]:
# exp1.iloc[62:79]
def analyze_test_results(experiment_res, outfile):
    col_names = ['Test Types', 'Gene', 'T-statistic', 'Regulation', 'P-value', 'Significance', 'Warnings']
    df = pd.DataFrame(columns = col_names)

    ensure_dirs_exists(outfile)
    for test in (experiment_res.keys()):
        for prot in experiment_res[test].keys():
            new_row = {}
            new_row['Test Types'] = test
            new_row['Gene'] = prot
            t_stat = experiment_res[test][prot][0]
            new_row['T-statistic'] = t_stat
            if t_stat < 0:
                new_row['Regulation'] = "Down-Regulated"
            elif t_stat > 0:
                new_row['Regulation'] = "Up-Regulated"
            else:
                new_row['Regulation'] = "Same/NA"
            
            pval = experiment_res[test][prot][1]
            new_row['P-value'] = pval
            if pval <= 0.05:
                new_row['Significance'] = "Significant diff between gene and control"
            else:
                new_row['Significance'] = "Not Significant"
            if len(experiment_res[test][prot]) > 2:
                new_row['Warnings'] = experiment_res[test][prot][2]
            else:
                new_row['Warnings'] = "No Warning"

            df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
    ensure_dirs_exists(outfile)
    df.to_csv(outfile)
    print(f"written to {outfile}")
    return df

for condition, testdict in dmapped_ttest.items():
    test_results = analyze_test_results(testdict, 
                                        f'{parent}/t-test_results/{FOLDER_PATH.split("/")[-2]}/{FOLDER_PATH.split("/")[-1]}/'
                                            f'{balances[condition]}_ttest_results.csv')
    test_results

written to /home/poojaparameswaran/Documents/SoderlingLab/MultipleElectrodeAnalysisANOVA/t-test_results/Div19_CNO/Div19_1minute/basal_ttest_results.csv
written to /home/poojaparameswaran/Documents/SoderlingLab/MultipleElectrodeAnalysisANOVA/t-test_results/Div19_CNO/Div19_1minute/5uMCNO_ttest_results.csv
written to /home/poojaparameswaran/Documents/SoderlingLab/MultipleElectrodeAnalysisANOVA/t-test_results/Div19_CNO/Div19_1minute/10uMCNO_ttest_results.csv
written to /home/poojaparameswaran/Documents/SoderlingLab/MultipleElectrodeAnalysisANOVA/t-test_results/Div19_CNO/Div19_1minute/15uMCNO_ttest_results.csv
written to /home/poojaparameswaran/Documents/SoderlingLab/MultipleElectrodeAnalysisANOVA/t-test_results/Div19_CNO/Div19_1minute/20uMCNO_ttest_results.csv


In [33]:
dmapped_ttest.keys()

dict_keys(['basal', 'dose1', 'dose2', 'dose3', 'dose4'])

In [40]:
def save_all_to_excel(data, outfile):
    ensure_dirs_exists(outfile)
    with pd.ExcelWriter(outfile, engine='xlsxwriter') as writer:
        for condition, testdict in data.items():
            # Flatten the data structure
            flat_data = {}
            for test, values in testdict.items():
                index = values.keys()
                flat_data[(test, 't-stat')] = [v[0] for v in values.values()]
                flat_data[(test, 'p-value')] = [v[1] for v in values.values()]

            # Create DataFrame with MultiIndex columns
            df = pd.DataFrame(flat_data, index =index)            
            df.columns = pd.MultiIndex.from_tuples(df.columns)

            df.to_excel(writer, sheet_name=balances[condition])
    writer.close()
    if os.path.exists(outfile):
        return f"Outfile created {outfile}"
    else:
        return f"Problem in writing conditions to {outfile}"

save_all_to_excel(dmapped_ttest,f'{parent}/t-test_results/{FOLDER_PATH.split("/")[-2]}/{FOLDER_PATH.split("/")[-1]}/'
                                            f'ttest_results_{FOLDER_PATH.split("/")[-2].split("_")[-1]}{FOLDER_PATH.split("/")[-1].split("_")[-1]}.xlsx')


  warn("Calling close() on already closed file.")


'Outfile created /home/poojaparameswaran/Documents/SoderlingLab/MultipleElectrodeAnalysisANOVA/t-test_results/Div19_CNO/Div19_1minute/ttest_results_CNO1minute.xlsx'