<b>Visualizing GEM Data - plotting diffusion coefficients</b> <br><br>
Results of GEM diffusion analysis are in separate JSON files for each cell analyzed <br>
Read the values from these JSON files and put them in a pandas dataframe <br>
Use the dataframe to plot and analyze the experimental results
<br><br>
Here the experimental groups are: no_transfection, mock_transfection, PPPTA_control, and SRKASH <br>
Edit these to adapt to a different experimental setup if needed

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import scipy
from scipy import stats
import json
import glob

In [None]:
#Loading the JSON files from our No Transfection into a pandas dataframe
# Create an empty list to hold the DataFrames
dfs = []


# Loop through all JSON files in the folder and read them into a DataFrame
for file_path in glob.glob('/path/to/data/*/*.json'):
    with open(file_path, 'r') as f:
        data = json.load(f)
        df = pd.DataFrame.from_dict(data, orient='index').T
        dfs.append(df)


# Concatenate all DataFrames in the list
df = pd.concat(dfs, ignore_index=True)

#Now we are making a dataframe of just our results from our PPPTA control data
no_transfection_df = df[['D_msd (um^2/s)', 'D_pwd (um^2/s)', 'bg_pwd', 'mCh_Labels']]

no_transfection_df.loc[:, 'transfection'] = 'no_transfection'
print(no_transfection_df)

In [None]:
#Loading the JSON files from our Mock Transfection into a pandas dataframe
# Create an empty list to hold the DataFrames
dfs = []


# Loop through all JSON files in the folder and read them into a DataFrame
for file_path in glob.glob('/path/to/data/*/*.json'):
    with open(file_path, 'r') as f:
        data = json.load(f)
        df = pd.DataFrame.from_dict(data, orient='index').T
        dfs.append(df)


# Concatenate all DataFrames in the list
df = pd.concat(dfs, ignore_index=True)

#Now we are making a dataframe of just our results from our PPPTA control data
mock_transfection_df = df[['D_msd (um^2/s)', 'D_pwd (um^2/s)', 'bg_pwd', 'mCh_Labels']]

mock_transfection_df.loc[:, 'transfection'] = 'mock_transfection'
print(mock_transfection_df)

In [None]:
#Loading the JSON files from our PPPTA Control into a pandas dataframe
# Create an empty list to hold the DataFrames
dfs = []


# Loop through all JSON files in the folder and read them into a DataFrame
for file_path in glob.glob('/path/to/data/*/*.json'):
    with open(file_path, 'r') as f:
        data = json.load(f)
        df = pd.DataFrame.from_dict(data, orient='index').T
        dfs.append(df)


# Concatenate all DataFrames in the list
df = pd.concat(dfs, ignore_index=True)

#Now we are making a dataframe of just our results from our PPPTA control data
PPPTA_control_df = df[['D_msd (um^2/s)', 'D_pwd (um^2/s)', 'bg_pwd', 'mCh_Labels']]

PPPTA_control_df.loc[:, 'transfection'] = 'PPPTA_control'
print(PPPTA_control_df)

In [None]:
#Loading the JSON files from our SRKASH data into a pandas dataframe
# Create an empty list to hold the DataFrames
dfs = []


# Loop through all JSON files in the folder and read them into a DataFrame
for file_path in glob.glob('/path/to/data/*/*.json'):
    with open(file_path, 'r') as f:
        data = json.load(f)
        df = pd.DataFrame.from_dict(data, orient='index').T
               
        dfs.append(df)


# Concatenate all DataFrames in the list
df = pd.concat(dfs, ignore_index=True)

#Now we are making a dataframe of just our results from our PPPTA control data
SRKASH_df = df[['D_msd (um^2/s)', 'D_pwd (um^2/s)', 'bg_pwd', 'mCh_Labels']]

SRKASH_df.loc[:,'transfection'] = 'SRKASH'
print(SRKASH_df)

#pd.set_option('display.max_colwidth', None)
#print(df)


In [None]:
#Concatenate the no_transfection, mock_transfection, PPPTA, and SRKASH dataframes into df called GEM_data
GEM_data= pd.concat([no_transfection_df, mock_transfection_df, PPPTA_control_df, SRKASH_df], ignore_index=True)

print(GEM_data)

First plotting our Diffusion coefficients that were calculated from the pairwise distribution (D_pwd)

In [None]:
#Plot of D's calculated from pairwise distribution (blue points) 
#Mean D + 95% confidence interval (black point + error bars)

sns.set_context("notebook", font_scale = 1.2)
g = sns.catplot (x="transfection", y="D_pwd (um^2/s)", data=GEM_data, kind="point", color ="#000000", errorbar="ci", errwidth = 1.5, capsize = 0.15, join=False)
g.map_dataframe( sns.swarmplot, x="transfection", y="D_pwd (um^2/s)")
g.set(ylim=(0, None))

In [None]:
#Calculate mean D_pwd for each transfection:
Mean_D_pwd = GEM_data.groupby(['transfection'], as_index=False, sort=False).agg({'D_pwd (um^2/s)':"mean"})
print(Mean_D_pwd)

In [None]:
#We can also do a statistical test to see if the differences in D are significant
#Need to think more about which is best test to use, right now goal is to get code set up

#grouping data by transfection condition:
transfection_group = GEM_data.groupby(['transfection'], as_index=False, sort=False)
control_group = transfection_group.get_group('PPPTA_control')
dNKASH_group = transfection_group.get_group('SRKASH')

#Two Sample Kolmogorov–Smirnov Test 
#Tells if two datasets likely came from the same (unknown) distrubution
#Does not require data to be normally distributed like a T-test would
#default p-value is two-sided
stats.ks_2samp(control_group["D_pwd (um^2/s)"], dNKASH_group["D_pwd (um^2/s)"])



Next we are plotting the Diffusion coefficients that were calculated using MSD (D_msd)

In [None]:
#Plot of D calculated from MSD (blue points)
#Mean D + 95% confidence interval (black point + error bars)

sns.set_context("notebook", font_scale = 1.2)
g = sns.catplot (x="transfection", y="D_msd (um^2/s)", data=GEM_data, kind="point", color ="#000000", errorbar="ci", errwidth = 1.5, capsize = 0.15, join=False)
g.map_dataframe( sns.swarmplot, x="transfection", y="D_msd (um^2/s)")
g.set(ylim=(0, None))


In [None]:
#Calculate mean D_pwd for each transfection:
Mean_D_msd = GEM_data.groupby(['transfection'], as_index=False, sort=False).agg({'D_msd (um^2/s)':"mean"})
print(Mean_D_msd)

In [None]:
#Two Sample Kolmogorov–Smirnov Test 
stats.ks_2samp(control_group["D_msd (um^2/s)"], dNKASH_group["D_msd (um^2/s)"])

In [None]:
#now making a scatter plot of D_pwd vs rfp_cyto
sns.set_context("notebook", font_scale = 1.2)
g = sns.scatterplot(data=GEM_data, x="mCh_Labels", y="D_pwd (um^2/s)", hue="transfection")
g.set(ylim=(0, None))