In [624]:
import pandas as pd
import numpy as np
import os
import re
import ast
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
from Tusc5IntegralUtils import *
from Tusc5ImageUtils import *

# Importing Data

In [625]:
# '''
# Grabs the fluoresence value from each cell from 'ImageAnalysis folder'

# >>> Need to change directory
# '''


# # Initial Data Import

# directory = '/Users/raphaeltinio/LabAnalysis/ImageAnalysis' # Needs to get to the /ImageAnalysis folder

# stack_names = [f for f in os.listdir(directory) if f != '.DS_Store']

# Cell_Results = pd.DataFrame(columns = ['File', 'Cell', 'Stain', 'X_vals', 'Y_vals'])

# for fname in stack_names:
#     result_directory = directory + "/" + fname + "/results"
    
#     csv_files = [f for f in os.listdir(result_directory) if f.endswith('.csv')] #incase there is a none .csv file for some reason

#     # Grabs number of cells in an image
#     ncells = np.unique([re.search(r'(\d+)(?=\.csv$)', s).group(1) if re.search(r'(\d+)(?=\.csv$)', s) else None for s in csv_files])

#     # Cell Counter
#     if len(Cell_Results) == 0:
#         n_add = 0

#     else:
#         n_add = Cell_Results.Cell.max() + 1 # counts the number of cells after an addition of a z-stack dataframe, so that it keeps counting up 

#     for n in ncells:
#         unique_cell = [f for f in csv_files if "cell" + n + '.csv' in f] # Grabs list of specific cell

#         cell_number = int(n) + n_add

#         for csv in unique_cell:

#             stain_number = csv.split('_')[0]
#             #stain_number = re.findall(r'\d+$', stain_number)[0]

#             single_csv = pd.read_csv(result_directory + '/' + csv, usecols = [1,2])

#             temp_frame = pd.DataFrame({
#                 'File': fname,
#                 'Cell': cell_number,
#                 'Stain': stain_number,
#                 'X_vals': [single_csv['X'].to_list()],
#                 'Y_vals': [single_csv['Y'].to_list()]})

#             Cell_Results = pd.concat([Cell_Results, temp_frame])


# Cell_Results = Cell_Results.reset_index(drop = True)
# Cell_Results['Cell'] = Cell_Results['Cell'].astype('int')
# Cell_Results = Cell_Results.sort_values('Cell')

# # Extracting the first four integers and the following letter into separate columns ({DJID}{Eye})
# Cell_Results['DJID'] = Cell_Results['File'].str.extract(r'(\d{4})').astype(int)
# Cell_Results['Eye'] = Cell_Results['File'].str.extract(r'(\d{4})([A-Za-z])')[1]

# #Renaming Glut1 to GluT1
# Cell_Results['Stain'] = Cell_Results['Stain'].apply(lambda x: 'GluT1' if x == 'Glut1' else x)

# Cell_Results.to_csv('Cell_Results.csv', index = False)

In [626]:
'''
Accepts data from the new image analysis
'''

Cell_Results = pd.read_csv('all_data.csv')
Cell_Results['X_vals'] = Cell_Results['X_vals'].apply(ast.literal_eval) # Makes it so cells containing lists are identifiable as lists by pandas
Cell_Results['Y_vals'] = Cell_Results['Y_vals'].apply(ast.literal_eval)

## DJID_GEN_Eye Merge

In [627]:
'''
'DJID_GEN_EYES.csv' contains DJIDs which connect DJIDs to genotype and which eye was in the experimental or control group

This code chunk merges that dataframe with Cell_Results

'''

Recorded_Info = pd.read_csv('DJID_GEN_EYES.csv')
merged_df = pd.merge(Cell_Results, Recorded_Info[['DJID', 'Genotype', 'Experimental Eye', 'Control Eye']], on='DJID', how='left')

merged_df.loc[merged_df['Eye'] == merged_df['Experimental Eye'], 'Group_Type'] = 'Experimental'

merged_df.loc[merged_df['Eye'] == merged_df['Control Eye'], 'Group_Type'] = 'Control'
Cell_Results = merged_df.drop(columns=['Experimental Eye', 'Control Eye'])

Cell_Results.head()

Unnamed: 0,Y_vals,mask_id,Stain,X_vals,file_name,DJID,Eye,eGFP_Value,in_rip,Genotype,Group_Type
0,"(88231, 88395, 89662, 90581, 92721, 93422, 942...",0,DAPI,"(0.0, 0.15, 0.3, 0.44999999999999996, 0.6, 0.7...",2007R_GLUT1_647_WGA_594_0002,2007,R,False,False,het,Experimental
1,"(12355, 12520, 12597, 12537, 12876, 13175, 132...",0,eGFP,"(0.0, 0.15, 0.3, 0.44999999999999996, 0.6, 0.7...",2007R_GLUT1_647_WGA_594_0002,2007,R,False,False,het,Experimental
2,"(36257, 36092, 38264, 41798, 46942, 55160, 596...",0,WGA,"(0.0, 0.15, 0.3, 0.44999999999999996, 0.6, 0.7...",2007R_GLUT1_647_WGA_594_0002,2007,R,False,False,het,Experimental
3,"(3717, 3686, 3944, 4754, 5774, 7369, 7972, 100...",0,GluT1,"(0.0, 0.15, 0.3, 0.44999999999999996, 0.6, 0.7...",2007R_GLUT1_647_WGA_594_0002,2007,R,False,False,het,Experimental
4,"(109662, 110592, 111608, 113614, 115944, 11889...",1,DAPI,"(0.0, 0.15, 0.3, 0.44999999999999996, 0.6, 0.7...",2007R_GLUT1_647_WGA_594_0002,2007,R,False,False,het,Experimental


## Birth Data Merge


In [628]:
'''
Fetching a table to connect DJID to mouse age
Then merging that table to Cell_Results

'''

# DJ query and fetch
init_query = sln_animal.Animal.proj('dob', 'sex') * sln_animal.AnimalEvent * sln_animal.Deceased
birth_data = init_query.proj('dob', 'sex', 'date').fetch(format = 'frame')

# Reseting index and renaming columns
birth_data = birth_data.reset_index().drop(columns = ['event_id'])
birth_data = birth_data.rename(columns = {'animal_id': 'DJID', 'dob': 'birth', 'date': 'death'})

# Changing variables to date time vars
birth_data['birth'] = pd.to_datetime(birth_data['birth'])
birth_data['death'] = pd.to_datetime(birth_data['death'])

# Calculate the difference in days
birth_data['Age_Days'] = (birth_data['death'] - birth_data['birth']).dt.days

# Convert days to months (approximately, assuming ~30.44 days in a month)
birth_data['Age_Months'] = round(birth_data['Age_Days'] / 30.44,1)

# Merge
Cell_Results = Cell_Results.merge(birth_data, how = 'left', on = 'DJID')
Cell_Results = Cell_Results.drop(columns = ['birth', 'death', 'Age_Days'])
Cell_Results.head()

Unnamed: 0,Y_vals,mask_id,Stain,X_vals,file_name,DJID,Eye,eGFP_Value,in_rip,Genotype,Group_Type,sex,Age_Months
0,"(88231, 88395, 89662, 90581, 92721, 93422, 942...",0,DAPI,"(0.0, 0.15, 0.3, 0.44999999999999996, 0.6, 0.7...",2007R_GLUT1_647_WGA_594_0002,2007,R,False,False,het,Experimental,Male,20.9
1,"(12355, 12520, 12597, 12537, 12876, 13175, 132...",0,eGFP,"(0.0, 0.15, 0.3, 0.44999999999999996, 0.6, 0.7...",2007R_GLUT1_647_WGA_594_0002,2007,R,False,False,het,Experimental,Male,20.9
2,"(36257, 36092, 38264, 41798, 46942, 55160, 596...",0,WGA,"(0.0, 0.15, 0.3, 0.44999999999999996, 0.6, 0.7...",2007R_GLUT1_647_WGA_594_0002,2007,R,False,False,het,Experimental,Male,20.9
3,"(3717, 3686, 3944, 4754, 5774, 7369, 7972, 100...",0,GluT1,"(0.0, 0.15, 0.3, 0.44999999999999996, 0.6, 0.7...",2007R_GLUT1_647_WGA_594_0002,2007,R,False,False,het,Experimental,Male,20.9
4,"(109662, 110592, 111608, 113614, 115944, 11889...",1,DAPI,"(0.0, 0.15, 0.3, 0.44999999999999996, 0.6, 0.7...",2007R_GLUT1_647_WGA_594_0002,2007,R,False,False,het,Experimental,Male,20.9


## Slice Separation and ID generation

In [629]:
'''
1) Adds a column to the dataframe which contains the z-stack slice seperation value

2) Adds a cell id

'''
Cell_Results['Slice_Seperation'] = Cell_Results['X_vals'].apply(lambda x: x[1]-x[0])

Cell_Results['Cell'] = Cell_Results.groupby(['file_name', 'mask_id']).ngroup()

## Filters

In [622]:
'''
Goes through each cell in Cell_Results and calculates peaks

Prominence = 25 was chosen as it experimentally retained the most number of good cells.

'''

# Peak Filter

print(f'Original Number of Cells: {len(Cell_Results)/4}')

remove_list = []

for cell_number in Cell_Results['Cell'].unique():
    
    CR_Query = Cell_Results.query('Cell == @cell_number').copy()

    slice_seperation = CR_Query.iloc[0]['Slice_Seperation']

    WGA_min_distance = int(3 / slice_seperation) # peaks must be 4 microns away from eachother
    DAPI_min_distance = int(10 / slice_seperation)
    
    index_max = len(CR_Query.iloc[0]['X_vals'])

    WGA_Y_vals = CR_Query.loc[CR_Query['Stain'] == 'WGA']['Y_vals'].iloc[0]
    DAPI_Y_vals = CR_Query.loc[CR_Query['Stain'] == 'DAPI']['Y_vals'].iloc[0]

    peaks_WGA, _ = find_peaks(WGA_Y_vals, prominence= 25, distance = WGA_min_distance)
    peaks_DAPI, _ = find_peaks(DAPI_Y_vals, prominence= 25, distance = DAPI_min_distance)

    '''
    WGA Filters

    '''
    
    # 1) Remove if less than two peaks
    if len(peaks_WGA) < 2:
        remove_list.append(cell_number)
        continue
    

    if peaks_WGA[1] != peaks_WGA[-1] and WGA_Y_vals[peaks_WGA[-1]] > WGA_Y_vals[peaks_WGA[1]]:
        remove_list.append(cell_number)
        continue

    # '''
    # DAPI Filters
    # '''

    if len(peaks_DAPI) < 1:
        remove_list.append(cell_number)
        continue

    elif len(peaks_DAPI) == 1 and peaks_DAPI[0] > peaks_WGA[0] and peaks_DAPI[0] < peaks_WGA[1]:
        pass

    if peaks_DAPI[0] != peaks_DAPI[-1] and DAPI_Y_vals[peaks_DAPI[-1]] > DAPI_Y_vals[peaks_DAPI[1]]:
        remove_list.append(cell_number)

# Removing undesirable cells from main list
remove_list = np.unique(remove_list)
Cell_Results = Cell_Results[~Cell_Results['Cell'].isin(remove_list)]

print(f'Remaining Cells after Filter: {len(Cell_Results)/4}')
print(f'Cell Numbers Removed {remove_list}, count : {len(remove_list)}')

Original Number of Cells: 209.0
Remaining Cells after Filter: 208.0
Cell Numbers Removed [0], count : 1


In [9]:
# eGFP finder
eGFP_Filtered = []
remove_from_eGFP = []

for cell_number in Cell_Results['Cell'].unique():

    CR_Query = Cell_Results.query('Cell == @cell_number').copy()

    DAPI_idx, _ = find_peaks(CR_Query.loc[CR_Query['Stain'] == 'DAPI']['Y_vals'].iloc[0], prominence= 25, distance = 5)
    DAPI_peak_val = np.array(CR_Query.loc[CR_Query['Stain'] == 'DAPI', 'Y_vals'].iloc[0])[DAPI_idx]

    # eGFP peak needs to be at least half of DAPI peak to be considered eGFP positive
    eGFP_idx, _ = find_peaks(CR_Query.loc[CR_Query['Stain'] == 'eGFP']['Y_vals'].iloc[0], prominence= 100, distance = 10, height = int(DAPI_peak_val/2))
    eGFP_peak_val = np.array(CR_Query.loc[CR_Query['Stain'] == 'eGFP', 'Y_vals'].iloc[0])[eGFP_idx]

    WGA_idx, _ = find_peaks(CR_Query.loc[CR_Query['Stain'] == 'WGA']['Y_vals'].iloc[0], prominence= 25)
    WGA_peak_val = np.array(CR_Query.loc[CR_Query['Stain'] == 'WGA', 'Y_vals'].iloc[0])[WGA_idx]

    if len(eGFP_peak_val) == 1:

        # If eGFP peak is not within WGA peaks, cell is not considered eGFP positive
        if eGFP_idx < WGA_idx[0] or eGFP_idx > WGA_idx[1]:
            remove_from_eGFP.append(cell_number)
        
        # If eGFP is not at least 1/2 the size of 
        if eGFP_peak_val > WGA_peak_val[1]/2:
            eGFP_Filtered.append(cell_number)


eGFP_Filtered = list(set(eGFP_Filtered))
eGFP_Filtered = sorted([i for i in eGFP_Filtered if i not in remove_from_eGFP])

print(f'Cells with eGFP: {eGFP_Filtered}, count: {len(eGFP_Filtered)}')

# Labeling Cells T5 Positive or Negative
Cell_Results['T5'] = 'Negative'
Cell_Results.loc[Cell_Results['Cell'].isin(eGFP_Filtered), 'T5'] = 'Positive'

Cells with eGFP: [7, 13, 19, 21, 22, 28, 32, 36, 38, 43, 47, 50, 56, 58, 61, 62, 75, 77, 108, 110, 116, 117, 118, 124, 143, 162, 166, 170, 179, 189, 197, 207, 209, 218, 225, 230, 265, 272, 446, 453, 462, 465, 466, 468, 477, 486, 493, 497, 518, 530, 551, 557, 560, 564, 639, 658, 667, 709, 719, 720, 721, 723, 797, 825, 826, 828, 841, 844, 854, 857, 858, 870, 889, 910, 912, 932, 933, 939, 969, 977, 982, 984, 985, 993, 997, 1008, 1026, 1030, 1031, 1033, 1041, 1047, 1053, 1059, 1064, 1079, 1080, 1084, 1092, 1099, 1110, 1114, 1119, 1120, 1124, 1127, 1128, 1159, 1160, 1167, 1186, 1187, 1188, 1192, 1198, 1208, 1211, 1212, 1223, 1224, 1236, 1239, 1241, 1252, 1271, 1277, 1281, 1292, 1295, 1300, 1302, 1331, 1336, 1345, 1347, 1356, 1360, 1363, 1365, 1369, 1374, 1376, 1377, 1378, 1381, 1391, 1401, 1402, 1408, 1409, 1414, 1422, 1440, 1442, 1456, 1459, 1466, 1472, 1473, 1491, 1502, 1504, 1509, 1510, 1517, 1525, 1526, 1528, 1530, 1531, 1536, 1537, 1543, 1545, 1548, 1554, 1555, 1556, 1566, 1572, 1575, 

## Summary of Cell_Results

In [10]:
'''
Sanity Check


'''

print('Genotype', Cell_Results['Genotype'].unique())
print('T5 Labelling', Cell_Results['Stain'].unique())
print('eGFP Types', Cell_Results['T5'].unique())

print('\n')
print('###'*12)
print('\n')

'''
Summary of mouse information
'''

def extract_before_underscore(input_string):
    parts = input_string.split("_", 1)
    if len(parts) > 1:
        return parts[0]
    
eyes = Cell_Results['File'].apply(extract_before_underscore).unique()

def extract_first_four_integers(input_string):
    # Find all substrings of consecutive digits
    integers = re.findall(r'\d+', input_string)
    
    # Convert the found substrings into integers
    integers = [int(i) for i in integers]
    
    # Return the first four integers
    return integers[:4][0]

animals = list(map(extract_first_four_integers, eyes))
animals = pd.Series(animals).unique() # Extracting the unqiue values

left = 0
right = 0

for i in eyes:
    if 'R' in i:
        right += 1
    if 'L' in i:
        left +=1

positive_count = Cell_Results['T5'].value_counts()['Positive']/4
total = (len(Cell_Results))/4


###
print(f'Number of mice: {len(animals)}')
print(f'Mice DJIDs: {animals}')
print(f'Number of eyes: {len(eyes)}, Left = {left} R = {right}')

###
print('\n')
print('###'*12)
print('\n')

###
print('Number of Cells', Cell_Results['Cell'].count()/4)
print('eGFP Positive', (Cell_Results.groupby('T5').size()/4)['Positive'])
print('eGFP Negative', (Cell_Results.groupby('T5').size()/4)['Negative'])
print('Positive over Negative Percentage', (Cell_Results.groupby('T5').size()/4)['Positive'] / (Cell_Results.groupby('T5').size()/4)['Negative'] *100)
print('Positive over Total Percentage', positive_count/total*100)

Genotype ['homo' 'het']
T5 Labelling ['WGA' 'eGFP' 'DAPI' 'GluT1']
eGFP Types ['Negative' 'Positive']


####################################


Number of mice: 9
Mice DJIDs: [3318 2143 2146 3201 3111 2006 2007 3203 3101]
Number of eyes: 16, Left = 8 R = 8


####################################


Number of Cells 2007.0
eGFP Positive 391.0
eGFP Negative 1616.0
Positive over Negative Percentage 24.195544554455445
Positive over Total Percentage 19.48181365221724


In [19]:
Cell_Results.columns

Index(['File', 'Cell', 'Stain', 'X_vals', 'Y_vals', 'DJID', 'Eye', 'Genotype',
       'Group_Type', 'sex', 'Age_Months', 'Slice_Seperation', 'T5'],
      dtype='object')

In [34]:
Cell_Results.groupby(['Genotype','DJID','sex'])['Age_Months'].unique()

Genotype  DJID  sex   
het       2006  Male      [20.6]
          2007  Male      [20.9]
          2143  Female    [14.3]
          3111  Male       [5.4]
homo      2146  Female    [14.3]
          3101  Male       [5.2]
          3201  Male       [4.3]
          3203  Female     [4.5]
          3318  Female     [3.3]
Name: Age_Months, dtype: object

# Calculations

## Defining Functions

In [58]:
'''
Function: Calculating WGA Peaks

Creates columns: WGA_Middle_Indices, Length (um)

'''

def WGA_Peaks_Finder(dataframe, prom_val:int = 25):

    # Function to find WGA peaks for each cell (via .groupby)
    def find_peaks_in_cell(cell_df):
        # Filter rows where Stain is 'WGA'
        df_WGA = cell_df.loc[cell_df['Stain'] == 'WGA']

        WGA_peak_indicies = find_peaks(df_WGA['Y_vals'].iloc[0], prominence = prom_val )[0]


        return WGA_peak_indicies

    # Apply the peak finder function across cells grouped by 'Cell'
    grouped = dataframe.groupby('Cell')
    slice_separation = grouped['Slice_Seperation'].first().iloc[0]

    grouped_WGA_indices = grouped.apply(find_peaks_in_cell).reset_index().rename(columns={0: 'WGA_Middle_Indices'})    # Reset index to flatten the DataFrame and rename



    grouped_WGA_indices['Length'] = grouped_WGA_indices['WGA_Middle_Indices'].apply(lambda x: (x[1] - x[0]) * slice_separation)

    #Merge the peak indices back into the original DataFrame
    result_df = pd.merge(dataframe, grouped_WGA_indices, on='Cell', how='left')

    return result_df


In [59]:
'''
Function: Calculates "Top" and "Bottom" INDICES to be used for integral boundaries

Creates columns: WGA_Top_Indices, WGA_Bottom_Indices
'''

def Top_Bottom_Indices(dataframe, microns_extension: float = 1.5):
    # Grouping / Getting Slice Separation / Calculating peaks
    grouped = dataframe.groupby('Cell')
    slice_separation = grouped['Slice_Seperation'].first()
    first_peaks = grouped['WGA_Middle_Indices'].apply(lambda x: x.iloc[0] if len(x) > 0 else [np.nan, np.nan])

    # Calculate index offset for each cell based on the slice separation
    index_offset = (microns_extension / slice_separation).fillna(0).astype(int)

    # Separate the peak indices
    l_middle = first_peaks.apply(lambda x: x[0] if len(x) > 0 else np.nan)
    r_middle = first_peaks.apply(lambda x: x[1] if len(x) > 1 else np.nan)

    # Calculate modified indices
    l_top = l_middle - index_offset
    r_bot = r_middle + index_offset

    # Prepare to merge corrections for indices exceeding boundaries
    max_idx = grouped['X_vals'].apply(lambda x: len(x.iloc[0]))

    # Adjust indices to ensure they do not exceed the size of the array
    r_bot = np.minimum(r_bot, max_idx)

    # Creating DataFrame for merging
    idx_df = pd.DataFrame({
        'Cell': grouped.size().index,  # Assures alignment with group keys
        'WGA_Top_Indices': list(zip(l_top, l_middle)),
        'WGA_Bottom_Indices': list(zip(r_middle, r_bot))
    })

    # Merge adjusted index data back into the original DataFrame
    result_df = pd.merge(dataframe, idx_df, on='Cell', how='left')
    return result_df

In [60]:
'''
Function: Calculates "Top", "Middle", "Bottom" INTEGRALS

Creates columns: Middle_Integral, Top_Integral, Bottom_Integral
'''

def TopMidBot_Integrals(dataframe):
    

    # Allows you to calculate integrals through .apply()
    def integral_calculator(arr1, arr2, integral_section:str = 'Middle'):

        if integral_section == 'Middle':

            peak_yvals = np.array(arr1)[arr2[0]:arr2[1]+1] #[ , ]

        if integral_section == 'Top':

            peak_yvals = np.array(arr1)[arr2[0]:arr2[1]+1] # [ , ]

        if integral_section == 'Bottom': 

            peak_yvals = np.array(arr1)[arr2[0]:arr2[1]+1] # [ , ]
        return peak_yvals


    for section in ['Middle', 'Top', 'Bottom']:
        dataframe[f'{section}_Integral'] = dataframe.apply(lambda x: np.sum(integral_calculator(x['Y_vals'], x[f'WGA_{section}_Indices'], integral_section = section)), axis =1)
    
    return dataframe

In [61]:
'''
Function: Calculates "Top", "Bottom" INTEGRAL RATIOS

# Calculating the ratio of Glut1 over WGA (normalization) .5 microns about the WGA peaks

Creates columns: [WGA or GluT1]_[Top or Bot]_Surface_Integral
'''

def Surface_Integrals(dataframe):
    # Initialize a list to store results dataframes for each group
    results = []

    # Group by 'Cell' and process each group
    for (cell, group) in dataframe.groupby('Cell'):
        # Filter for WGA and GluT1 stains
        df_WGA = group.loc[group['Stain'] == 'WGA']
        df_GluT1 = group.loc[group['Stain'] == 'GluT1']


        peak_indices = df_WGA['WGA_Middle_Indices'].iloc[0]
        x_vals = df_WGA['X_vals'].iloc[0]

        # Compute slice separation and radius offset
        slice_separation = group['Slice_Seperation'].iloc[0]
        radius = 0.5
        idx_offset = int(radius / slice_separation)

        # Define borders based on peak indices and radius offset
        top_lborder = max(peak_indices[0] - idx_offset, 0)
        top_rborder = min(peak_indices[0] + idx_offset, len(x_vals))
        bottom_lborder = max(peak_indices[1] - idx_offset, 0)
        bottom_rborder = min(peak_indices[1] + idx_offset, len(x_vals))

        # Calculate integrals for 'GluT1' and 'WGA'
        top_surface_integral_GluT1 = np.sum(df_GluT1.iloc[0]['Y_vals'][top_lborder:top_rborder+1]) # [,]
        bottom_surface_integral_GluT1 = np.sum(df_GluT1.iloc[0]['Y_vals'][bottom_lborder:bottom_rborder+1]) # [,]
        top_surface_integral_WGA = np.sum(df_WGA.iloc[0]['Y_vals'][top_lborder:top_rborder+1]) # [,]
        bottom_surface_integral_WGA = np.sum(df_WGA.iloc[0]['Y_vals'][bottom_lborder:bottom_rborder+1]) # [,]

        # Ratio calculations
        Top_G_over_W = top_surface_integral_GluT1 / top_surface_integral_WGA
        Bottom_G_over_W = bottom_surface_integral_GluT1 / bottom_surface_integral_WGA

        # Append results as a DataFrame to list
        results.append(pd.DataFrame({
            'Cell': [cell],
            'GluT1_Top_Surface_Integral': [top_surface_integral_GluT1],
            'GluT1_Bot_Surface_Integral': [bottom_surface_integral_GluT1],
            'WGA_Top_Surface_Integral': [top_surface_integral_WGA],
            'WGA_Bot_Surface_Integral': [bottom_surface_integral_WGA],
            'Top_Surface_Ratio': [Top_G_over_W],
            'Bot_Surface_Ratio': [Bottom_G_over_W]
        }))

    # Concatenate all results into a single DataFrame
    results_df = pd.concat(results, ignore_index=True)

    # Merge adjusted index data back into the original DataFrame

    dataframe = dataframe.merge(results_df, on='Cell', how='left')
    
    return dataframe

In [62]:
"""
Function: Reshapes columns, so that each group has a column with '{Stain}_{Integral_Type}'. 4 x 3 = 12 columns are added
"""

def Reshape_Integrals(dataframe):

    # Make pivot table
    pivot_df = dataframe.pivot_table(
        index=['Cell'], 
        columns='Stain',
        values=['Middle_Integral', 'Top_Integral', 'Bottom_Integral'],
        aggfunc='first' 
    )

    # Flatten the multi-level column headers and format them
    pivot_df.columns = [f'{col[1]}_{col[0]}' for col in pivot_df.columns.values]

    pivot_df.reset_index(inplace=True) # Cell is a column used to merge 

    result_df = pd.merge(dataframe, pivot_df)

    return result_df

In [63]:
Cell_Results_Calcs = Cell_Results.copy()

# These functions find indices
Cell_Results_Calcs = WGA_Peaks_Finder(Cell_Results_Calcs)
Cell_Results_Calcs = Top_Bottom_Indices(Cell_Results_Calcs)

# These functions calculate integrals using the index boundaries
Cell_Results_Calcs = TopMidBot_Integrals(Cell_Results_Calcs)
Cell_Results_Calcs = Reshape_Integrals(Cell_Results_Calcs)
Cell_Results_Calcs = Surface_Integrals(Cell_Results_Calcs)

## Export

In [66]:
'''
Making new dataframe for export
- Keeps certain columns
- Renames columns
- Mapping column categories to new names (e.g. eGFP Positive -> 1)
'''

# Columns to Keep
keep_cols = ['DJID', 'Genotype', 'Eye', 'sex', 'Age_Months', 'T5', 'Group_Type', 'Length',
             'GluT1_Top_Surface_Integral', 'GluT1_Bot_Surface_Integral', 'WGA_Top_Surface_Integral', 'WGA_Bot_Surface_Integral',
             'GluT1_Top_Integral', 'GluT1_Middle_Integral', 'GluT1_Bottom_Integral',
             'WGA_Top_Integral', 'WGA_Middle_Integral', 'WGA_Bottom_Integral']

# Columns to rename
renamed_cols = {'sex': 'Sex',
                'T5': 'eGFP_Value',
                'Length': 'Length_um',
                'Group_Type' :'Experimental_Condition'}

Export_df = Cell_Results_Calcs.groupby('Cell').first().loc[:, keep_cols].reset_index(drop = True)
Export_df = Export_df.rename(columns = renamed_cols)

# Remapping values
eGFP_map = {'Positive': 1, 'Negative': 0}
Export_df['eGFP_Value'] = Export_df['eGFP_Value'].map(eGFP_map)#.fillna(Export_df['eGFP_Value'])

Exp_Con_map = {'Experimental': 'Light Flicker', 'Control': 'Dark'}
Export_df['Experimental_Condition'] = Export_df['Experimental_Condition'].map(Exp_Con_map)#.fillna(Export_df['Experimental_Condition'])

# Checking for NA vals
if Export_df.isna().sum().sum() > 0:
    print('There are NA values, check for problems')

Export_df

Unnamed: 0,DJID,Genotype,Eye,Sex,Age_Months,eGFP_Value,Experimental_Condition,Length_um,GluT1_Top_Surface_Integral,GluT1_Bot_Surface_Integral,WGA_Top_Surface_Integral,WGA_Bot_Surface_Integral,GluT1_Top_Integral,GluT1_Middle_Integral,GluT1_Bottom_Integral,WGA_Top_Integral,WGA_Middle_Integral,WGA_Bottom_Integral
0,3318,homo,L,Female,3.3,0,Dark,4.05,356.711,254.924,3543.140,2042.528,433.597,1293.476,373.803,3718.553,8787.001,2766.512
1,3318,homo,L,Female,3.3,0,Dark,3.60,413.792,295.081,4244.286,2527.544,460.604,1362.935,410.086,3868.958,10281.797,3326.948
2,3318,homo,L,Female,3.3,0,Dark,3.75,437.078,299.160,3676.296,2324.667,476.453,1557.103,410.952,3616.971,8652.692,3138.795
3,3318,homo,L,Female,3.3,0,Dark,3.75,383.277,305.551,3899.951,2743.418,430.363,1311.415,420.554,3374.527,10366.993,3532.583
4,3318,homo,L,Female,3.3,0,Dark,3.60,404.290,291.613,3453.174,2690.338,452.136,1242.936,416.688,3227.657,8629.897,3607.310
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2002,3318,homo,R,Female,3.3,0,Light Flicker,4.80,817.697,557.738,3908.031,2341.141,753.929,2920.764,802.589,3397.296,11826.174,3396.824
2003,3318,homo,R,Female,3.3,1,Light Flicker,4.95,921.407,545.581,4235.757,2195.389,740.539,3383.287,744.685,2960.880,12110.774,3036.334
2004,3318,homo,R,Female,3.3,0,Light Flicker,4.50,674.534,574.570,3100.628,2299.651,627.103,2492.457,808.891,2624.451,8913.281,3364.984
2005,3318,homo,R,Female,3.3,0,Light Flicker,4.65,876.517,624.620,4059.640,2759.808,763.332,3234.224,839.456,3304.047,11800.279,3760.830


In [67]:
Export_df.to_csv('Image_Analysis.csv', index = False)