Notebook to plot multiple trials of GEM experiments together

In [None]:
#Importing libraries that we will need to use
#Set Jupyter Kernel to imaging_env

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import scipy
from scipy import stats



In [None]:
#import data from our trials

#trial 1 - 230331
trial_1 = pd.read_csv('/path/to/data.csv')

#trial 2 - 230407
trial_2 = pd.read_csv('/path/to/data.csv')

#trial 3 - 230407_b
trial_3 = pd.read_csv('/path/to/data.csv')


In [None]:
GEM_data= pd.concat([trial_1, trial_2, trial_3], ignore_index=True)

#print(GEM_data)

First plotting our Diffusion coefficients that were calculated from the pairwise distribution (D_pwd)

In [None]:
#Plot of D's calculated from pairwise distribution (colored points) 
#Mean D + 95% confidence interval (black point + error bars)

sns.set_context("notebook", font_scale = 1.2)
g = sns.catplot (x="condition", y="mobile_D_eff", data=GEM_data, kind="point", marker="D", markersize=3, color ="#000000", errorbar="ci", err_kws={'linewidth': 1.5}, capsize = 0.15, linestyle='none')
g.map_dataframe( sns.swarmplot, x="condition", y="mobile_D_eff", hue="date", palette='dark', alpha=0.5, size=4 )
g.set(ylim=(0, None))
g.set_xticklabels(rotation = 45)

#plt.savefig('Dpwd_swarmplot_mean_95ci_allconditions.png', dpi=300, bbox_inches="tight")

In [None]:
#Calculate mean D_pwd for each transfection:
mean_mobile_Deff = GEM_data.groupby(['condition'], as_index=False, sort=False).agg({'mobile_D_eff':"mean"})
print(mean_mobile_Deff)

In [None]:
# Count the number of data points for each category in the transfection column
counts = GEM_data['condition'].value_counts()

print(counts)

In [None]:
#We can also do a statistical test to see if the differences in D are significant

#grouping data by transfection condition:
condition_group = GEM_data.groupby(['condition'], as_index=False, sort=False)
control_group = condition_group.get_group('nonCoding')
syne1_group = condition_group.get_group('syne1')
syne2_group = condition_group.get_group('syne2')
syne1_syne2_group = condition_group.get_group('syne1_syne2')

#Two Sample Kolmogorov–Smirnov Test 
#Tells if two datasets likely came from the same (unknown) distrubution
#Does not require data to be normally distributed like a T-test would
#default p-value is two-sided
print(" ")
print("SYNE1 KS Test:")
print(stats.ks_2samp(control_group["mobile_D_eff"], syne1_group["mobile_D_eff"]))
print(" ")
print("SYNE2 KS Test:")
print(stats.ks_2samp(control_group["mobile_D_eff"], syne2_group["mobile_D_eff"]))
print(" ")
print("SYNE1 + SYNE2 KS Test:")
print(stats.ks_2samp(control_group["mobile_D_eff"], syne1_syne2_group["mobile_D_eff"]))
print(" ")
print(" ")