In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline

In [None]:
rootdir = "/Users/teboa/Documents/splitFAST_imaging/2023_12_06/"
# Initialize an empty dictionary to hold dataframes
dataframes = {}

# Walk through the directory structure
for dirpath, dirnames, filenames in os.walk(rootdir):
    # Check if there are .csv files in the directory
    if any(filename.endswith('.csv') for filename in filenames):
        # Get the condition from the directory path
        condition = os.path.basename(dirpath)
        # Initialize an empty list to hold data for this condition
        data = []
        # Load each .csv file as a pandas dataframe and append it to the list
        for filename in filenames:
            if filename.endswith('.csv'):
                df = pd.read_csv(os.path.join(dirpath, filename))
                df.columns = ['frame'] + df.columns.tolist()[1:]
                data.append(df)
        # Concatenate all dataframes in the list and store it in the dictionary
        dataframes[condition] = pd.concat([d.set_index('frame') for d in data], axis=1,join='inner').reset_index()

In [None]:
# Get the list of conditions
conditions = list(dataframes.keys())

# Print the conditions
for condition in conditions:
    print(condition)
len(conditions)

should probably create a new dataframe with the name of the condition and add position data to it? need to add all the dfs together from a single condition (pd.concat([]), axis =1). convert frame to time (new df). plot raw data. make new df for each that is normalized between 0 and 1 and plot.

In [None]:
print(dataframes['RspA_MOM'].head())

In [None]:
print(dataframes['RspA_MOM'].dtypes)

In [None]:
# Generate x,y plots for each condition
# Determine the grid size for the plots
# need to sort the graphs by condition
grid_size = int(np.ceil(np.sqrt(len(dataframes))))

fig, axs = plt.subplots(grid_size, grid_size, figsize=(15, 15))  # Create a grid of subplots

# Flatten the axes array and remove extra subplots
axs = axs.flatten()
for ax in axs[len(dataframes):]:
    fig.delaxes(ax)

# Generate x,y plots for each condition
for ax, (condition, df) in zip(axs, dataframes.items()):
    for column in df.columns[1:]:  # Skip the 'frame' column
        ax.plot(df['frame'], df[column], label=column)
    ax.set_title(condition)  # Set the title of the plot as the condition name
    ax.set_xlabel('Frame')  # Set the x-axis label
    ax.set_ylabel('Value')  # Set the y-axis label
  

plt.tight_layout()
plt.show()  # Display the plot


In [None]:
#normalize data to max
dataframes_norm = {}

for condition,df in dataframes.items():
    df_norm = df.copy() #create a copy of the dataframe to do the math in
    for column in df_norm.columns[1:]:
        df_norm[column] = df_norm[column]/df_norm[column].max()
    dataframes_norm[condition] = df_norm

print(dataframes_norm['RspA_MOM'].head())

In [None]:
# Generate x,y plots for each condition
# Determine the grid size for the plots
grid_size = int(np.ceil(np.sqrt(len(dataframes_norm))))

fig, axs = plt.subplots(grid_size, grid_size, figsize=(15, 15))  # Create a grid of subplots

# Flatten the axes array and remove extra subplots
axs = axs.flatten()
for ax in axs[len(dataframes_norm):]:
    fig.delaxes(ax)

# Generate x,y plots for each condition
for ax, (condition, df) in zip(axs, dataframes_norm.items()):
    for column in df.columns[1:]:  # Skip the 'frame' column
        ax.plot(df['frame'], df[column], label=column)
    ax.set_title(condition)  # Set the title of the plot as the condition name
    ax.set_xlabel('Frame')  # Set the x-axis label
    ax.set_ylabel('Value')  # Set the y-axis label
  

plt.tight_layout()
plt.show()  # Display the plot

In [None]:
#calculate mean and SD
dataframes_mean = {}

for condition,df in dataframes_norm.items():
    df_mean = df.copy() #create a copy of the dataframe to do the math in
    df_mean['mean'] = df_mean.iloc[:, 1:].mean(axis=1)
    df_mean['SD']= df_mean.iloc[:, 1:-1].std(axis=1)
    dataframes_mean[condition] = df_mean

print(dataframes_mean['RspA_MOM'].head())


In [None]:

grid_size_m = int(np.ceil(np.sqrt(len(dataframes_mean))))

fig1, axs1 = plt.subplots(grid_size_m, grid_size_m, figsize=(15, 15))  # Create a grid of subplots

# Flatten the axes array and remove extra subplots
axs1 = axs1.flatten()
for ax in axs1[len(dataframes_mean):]:
    fig1.delaxes(ax)
    
# Generate x,y plots for each condition
for ax, (condition, df) in zip(axs1, dataframes_mean.items()):
    for column in df.columns[1:]:  # Skip the 'frame' column
        ax.errorbar(df['frame'], df['mean'], yerr = df['SD'], label= 'Mean')
        #ax.plot(df['frame'], df['SD'], label= 'SD')
    ax.set_title(condition)  # Set the title of the plot as the condition name
    ax.set_xlabel('Frame')  # Set the x-axis label
    ax.set_ylabel('Value')  # Set the y-axis label
    ax.set_ybound(0,1.1)
  

plt.tight_layout()
plt.show()  # Display the plot

In [None]:
#calculate the contrast, i.e. pre-rapamycin fluorescence intensity vs. post-rapamycin fluorescence intensit
dataframes_contrast = {}

for condition, df in dataframes.items():
    # Select the first four and last ten rows of the dataframe, excluding the 'frame' column
    df_contrast = df.iloc[list(range(4)) + list(range(-10, 0)), 1:].T

    # Calculate the mean of the first four and last ten columns
    df_contrast['pre'] = df_contrast.iloc[:, :4].mean(axis=1)
    df_contrast['post'] = df_contrast.iloc[:, -10:].mean(axis=1)

    # Store the result in the dataframes_contrast dictionary
    dataframes_contrast[condition] = df_contrast
    
print(dataframes_contrast['RspA_MOM'].head())

In [None]:
#plot pre- and post- rapamycin intensities
 
import seaborn as sns
from scipy import stats

grid_size_c = int(np.ceil(np.sqrt(len(dataframes_mean))))

fig2, axs2 = plt.subplots(grid_size_c, grid_size_c, figsize=(15, 15))  # Create a grid of subplots

# Flatten the axes array and remove extra subplots
axs2 = axs2.flatten()
for ax in axs2[len(dataframes_contrast):]:
    fig2.delaxes(ax)
    
# Generate x,y plots for each condition
for ax, (condition, df) in zip(axs2, dataframes_contrast.items()):
        # Select the 'pre' and 'post' columns
    df_selected = df[['pre', 'post']]

    # Melt the dataframe into a long format suitable for seaborn
    df_melted = df_selected.melt(var_name='condition', value_name='value')

    # Create the violin plot
    sns.violinplot(x='condition', y='value', data=df_melted, ax=ax)

    # Set the title of the plot as the condition name
    ax.set_title(condition)
    
    # Perform a paired t-test
    t_stat, p_value = stats.ttest_rel(df['pre'], df['post'])

    # Add the p-value to the plot title
    ax.set_title(f"{condition} (p={p_value:.2e})")
  

plt.tight_layout()
plt.show()  # Display the plot

In [None]:
# save some of this to .csv files for Prism
savedir = "/Users/teboa/Documents/splitFAST_imaging/2023_12_06/"
for i in dataframes_contrast:
    dataframes_contrast[i].to_csv(savedir+'contrast'+str(i)+'.csv')

for i in dataframes_mean:
    dataframes_mean[i].to_csv(savedir+'mean'+str(i)+'.csv') 
