In [1]:
import os
import pandas as pd
import tifffile as tff
import openpyxl
import xlsxwriter
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from scipy import stats
import math
import scikit_posthocs as sp

ModuleNotFoundError: No module named 'xlsxwriter'

In [None]:
sxtdata = os.getcwd() 
sxtdata

## Processing Raw Data ##

In [None]:
# Input must be name of structure, capitalized ("Cell", "Vacuole", "Nucleus", "LD")
# output: 5 dataframes (raw all strains, processed all strains, WT, vph1-GFP, vac14), data saved to Excel file w 5 sheets
# processing: scale msmts according to voxel size, apply size filter, reset index

def metrics_df(struc):
    raw_3Dsuite =  pd.read_csv(sxtdata + "/3D Suite Measurements/Raw/" + struc + " 3D Suite Results.csv") 
    num_rows = raw_3Dsuite.shape[0]
    num_cols = raw_3Dsuite.shape[1]
    cols = raw_3Dsuite.columns.tolist()
    
    # create a list of object names by merging cell name and object ID number  
    print(raw_3Dsuite['Label'][1])
    
    objIDs_list = []
    for org in range(num_rows):
    #     print(org)
        objID = raw_3Dsuite['Label'][org] + "_" + str(raw_3Dsuite['LabelObj'][org])
    #     print(objID)
        objIDs_list.append(objID)

    print("Objects list:", len(objIDs_list))
    
    # condense list of objects by removing repeats
    objIDs_cond = []

    for x in objIDs_list:
    #     print(x)
        if x not in objIDs_cond:
            objIDs_cond.append(x)
    
    print("Unique Objects:", len(objIDs_cond)) 
    
    # create a condensed list of cell IDs to match objIDs_cond
    cellIDs_cond = []

    for obj in objIDs_cond:
        cellIDs_cond.append(obj.split('.tiff_')[0])
    
    print("Cells:", len(cellIDs_cond))
    
    # create a dictionary with cell and object IDs
    cond_3Dsuite = dict({'Cell ID':cellIDs_cond,'Object ID':objIDs_cond})

    for c in cols:
        colvals = raw_3Dsuite[c]
        nonzero_colvals = list(filter(lambda x: x !=0,colvals)) # use lambda function to remove zeros
    #     print(len(nonzero_colvals))
    
        # get rid of empty space in 3D suite output - number of nonzero values should match the number of objects 
        # this will remove some number of parameter columns which for some reason have a different number of values
        if len(nonzero_colvals) == len(objIDs_cond):
#             print(c, len(nonzero_colvals))
            cond_3Dsuite.update({c:nonzero_colvals})
    
    # check which parameter columns were excluded, should at least include index, Label, and LabelObj)
    removed_cols = [x for x in cols if x not in list(cond_3Dsuite.keys())]
    print("Removed: ", removed_cols)
            
    struc_df = pd.DataFrame(cond_3Dsuite)
#     print(struc_df)
    
    # scale measurements according to known pixel sizes
    
    # create dictionary where keys are pix sizes (floats), values are cell IDs (lists of strings)
    pix_sizes = [30,35.31,36.8,30.23]
    pix30 = ["1359","1026","1208","1360"] + \
            ["1463_"+str(x) + "_" for x in range(1,18)] + \
            ["1467_"+str(x) + "_" for x in range(2,12)] + \
            ["1475_"+str(x) + "_" for x in range(1,10)] + \
            ["1464_"+str(x) + "_" for x in range(1,19)]
    pix35_31 = ["1463_"+str(x) + "_" for x in range(18,27)] + \
               ["1467_"+str(x) + "_" for x in range(12,17)] + \
               ["1475_"+str(x) + "_" for x in range(11,19)] + \
               ["1464_"+str(x) + "_" for x in range(20,29)] 

    pix36_8 = ["1463_"+str(x) + "_" for x in range(30,35)] 
    pix30_23 = ['1361_']

    pix_dict = dict({30:pix30, 35.31:pix35_31, 36.8:pix36_8, 30.23:pix30_23})
    # list(pix_dict.keys())
    
    # compare cell IDs against pix_size values, convert, create new lists to replace struc_df columns
    vol_scaled = []
    c_x_scaled = []
    c_y_scaled = []
    c_z_scaled = []
    SA_scaled = []

    i = 0

    for c in struc_df['Cell ID']:
    #     print(c)
        for ps in pix_sizes: # ps = integer pixel size
    #         print(c,ps)
            for p in pix_dict[ps]: # p = list of string IDs
    #             print(c,ps,i)
                if p in c: # if cell identifier pd matches cell ID value, scale measurements, make lists
    #                 print(i, c, ps, p)
                    # volume
                    c_vol = struc_df.at[i,'Volume(Pix)']
                    scaled_c_vol = (c_vol*(ps*1e3))/(1e9)
                    vol_scaled.append(scaled_c_vol)
    #                 print(c_vol,scaled_c_vol)

                    # centroid coordinates
                    c_x = struc_df.at[i,'CX(pix)']
                    scaled_c_x = (c_x*ps)/1e3
                    c_x_scaled.append(scaled_c_x)
    #                 print(c,scaled_c_x)

                    c_y = struc_df.at[i,'CY(pix)']
                    scaled_c_y = (c_y*ps)/1e3
                    c_y_scaled.append(scaled_c_y)

                    c_z = struc_df.at[i,'CZ(pix)']
                    scaled_c_z = (c_z*ps)/1e3
                    c_z_scaled.append(scaled_c_z)

                    # surface area
                    c_SA = struc_df.at[i,'Surface(Pix)']
                    scaled_SA = (c_SA*(ps*1e2))/1e6
                    SA_scaled.append(scaled_SA) 
        i+=1

    # replace unscaled struc_df columns w scaled lists
    struc_df_scl = struc_df.drop(['Volume(Unit)'], axis  = 1)
    struc_df_scl.insert(3,'Volume (um^3)', vol_scaled)
    
    struc_df_scl.drop(['CX(unit)'], axis  = 1)
    struc_df_scl.insert(7,'CX (um)', c_x_scaled)
    
    struc_df_scl.drop(['CY(unit)'], axis  = 1)
    struc_df_scl.insert(8,'CY (um)', c_y_scaled)
    
    struc_df_scl.drop(['CZ(unit)'], axis  = 1)
    struc_df_scl.insert(9,'CZ (um)', c_z_scaled)
    
    struc_df_scl.drop(['Surface(Unit)'], axis  = 1)
    struc_df_scl.insert(11,'Surface (um)', SA_scaled)
    

    # apply size filter to organelles - see notes 4/11/23 for threshold rationale
    if struc == 'Vacuole':
        struc_df_scl_filt = struc_df_scl.loc[struc_df_scl['Volume (um^3)']>0.3]
    elif struc == 'Nucleus':
        struc_df_scl_filt = struc_df_scl.loc[struc_df_scl['Volume (um^3)']>0.4]
    else:
        struc_df_scl_filt = struc_df_scl
        
    # sort the table by Object IDs so all objects from the same cell are consecutive; 
    # reset index to start at 0; creates new 'index' col w original values
    struc_df_scl_filt_sort = struc_df_scl_filt.sort_values('Object ID')
    
    # filter out WT
    wt1 = struc_df_scl_filt_sort[struc_df_scl_filt_sort['Cell ID'].str.contains('BY471A')]
    wt2 = struc_df_scl_filt_sort[struc_df_scl_filt_sort['Cell ID'].str.contains('BY4741A')]
    wt3 = struc_df_scl_filt_sort[struc_df_scl_filt_sort['Cell ID'].str.contains('BY471_wt')]
    WT = pd.concat([wt1,wt2,wt3]).reset_index()

    # filter out vph1-GFP
    vph1GFP = struc_df_scl_filt_sort[struc_df_scl_filt_sort['Cell ID'].str.contains('vph')].reset_index()

    # filter out vac14
    vac14 = struc_df_scl_filt_sort[struc_df_scl_filt_sort['Cell ID'].str.contains('VaCL4')].reset_index()
    
    # save data to multi-sheet Excel file (raw data, condensed, then split by strain)
    dflist = [raw_3Dsuite, struc_df_scl_filt_sort, WT, vph1GFP, vac14]
    dfnames = ['Raw All Strains', "Processed All Strains", "WT", "VPH1-GFP", "vac14"]
    Excelwriter = pd.ExcelWriter(struc +  " 3D Suite.xlsx", engine = 'xlsxwriter')
    
    for i,df in enumerate(dflist):
        df.to_excel(Excelwriter, sheet_name = dfnames[i], index=False)
#     Excelwriter.save()

    return(struc_df_scl_filt_sort.reset_index(), WT, vph1GFP, vac14)

In [None]:
# run metrics_df for all structures & strains

# Cell
cell = metrics_df("Cell")
cell_all = cell[0]
cell_WT = cell[1]
cell_vph1GFP = cell[2]
cell_vac14 = cell[3]
len(cell_all.columns.tolist())
pd.set_option('expand_frame_repr', False)
cell_all

# Vacuole
vac = metrics_df("Vacuole")
pd.set_option('expand_frame_repr', False)
vac_all = vac[0]
vac_WT = vac[1]
vac_vph1GFP = vac[2]
vac_vac14 = vac[3]
# vac_all

# Nucleus
nuc = metrics_df("Nucleus")
nuc_all = nuc[0]
nuc_WT = nuc[1]
nuc_vph1GFP = nuc[2]
nuc_vac14 = nuc[3]
# nuc_all['Volume (um^3)']

# LD
LD = metrics_df("LD")
LD_all = LD[0]
LD_WT = LD[1]
LD_vph1GFP = LD[2]
LD_vac14 = LD[3]

In [None]:
# filter out cells which do not have a nucleus or any vacuoles

# len(nuc_all['Cell ID']) #493
# len(cell_all['Cell ID']) #510
# len(vac_all['Cell ID']) #507
# len(LD_all['Cell ID']) #2614

# first, split the cellIDs to normalize them, since they end with _CELL, _NUC, etc
nuc_cellIDs = [x.split('multi')[0] for x in list(nuc_all['Cell ID'])]
cell_cellIDs = [x.split('multi')[0] for x in list(cell_all['Cell ID'])]
vac_cellIDs = [x.split('multi')[0] for x in list(vac_all['Cell ID'])]
LD_cellIDs = [x.split('multi')[0] for x in list(LD_all['Cell ID'])]
# cell_cellIDs

# list of cells excluding those w no nuc or vac
# this fills a list with cell IDs that are also found in the nuc and vac lists
cell_cellIDs_filt = [x for x in cell_cellIDs if x in nuc_cellIDs and x in vac_cellIDs]
# len(cell_cellIDs_filt) #490

# lists of included cells - =IDs in each structure list that match the filtered cellIDs
cells_inc = [x for x in cell_cellIDs if x in cell_cellIDs_filt]
nuc_inc = [x for x in nuc_cellIDs if x in cell_cellIDs_filt]
vac_inc = [x for x in vac_cellIDs if x in cell_cellIDs_filt]
LD_inc = [x for x in LD_cellIDs if x in cell_cellIDs_filt]

# there should be at least as many items in each organelle list as in the cell list; nuclei should match
# print((len(cells_inc),len(nuc_inc),len(vac_inc),len(LD_inc))) #(490, 490, 492, 2557)
# if cells_inc == nuc_inc:
#     print(True) # cell and nucleus lists are exactly the same!

# lists of excluded cells - the ones missing nuclei/vacuoles
cells_exc = [x for x in cell_cellIDs if x not in cell_cellIDs_filt]
nuc_exc = [x for x in nuc_cellIDs if x not in cell_cellIDs_filt]
vac_exc = [x for x in vac_cellIDs if x not in cell_cellIDs_filt]
LD_exc = [x for x in LD_cellIDs if x not in cell_cellIDs_filt]
all_exc = set(cells_exc + nuc_exc + vac_exc + LD_exc)

# print(cells_exc)

# print('Cells: ', cells_exc[0])
#' \n Nuclei: ', nuc_exc, ' \n Vacuoles: ', vac_exc)
# print((len(cells_exc),len(nuc_exc),len(vac_exc),len(LD_exc)))

In [None]:
# apply size filters to exclude nonspecific organelles; remove all structures for those cells

def filter_metrics(struc_all): 
    # all_exc
    struc_all_filt = struc_all
#     print(struc_all_filt['Volume (um^3)'][0], len(struc_all_filt['Volume (um^3)']))

    i=0
    for x in struc_all['Cell ID']:
        for y in all_exc:
            if y in x:
#                 print(i,x,y)
    #             print(cell_all.loc[i])
                struc_all_filt = struc_all_filt.drop(i)
        i+=1
#     print(struc_all_filt['Volume (um^3)'][0], len(struc_all_filt['Volume (um^3)']))


    # sort the table by Object IDs so all objects from the same cell are consecutive; 
    # reset index to start at 0; creates new 'index' col w original values
    struc_all_filt_sort = struc_all_filt.sort_values('Object ID')    
#     print(struc_all_filt['Volume (um^3)'][0], len(struc_all_filt['Volume (um^3)']))

    # filter out WT
    struc_wt1 = struc_all_filt_sort[struc_all_filt_sort['Cell ID'].str.contains('BY471A')]
    struc_wt2 = struc_all_filt_sort[struc_all_filt_sort['Cell ID'].str.contains('BY4741A')]
    struc_wt3 = struc_all_filt_sort[struc_all_filt_sort['Cell ID'].str.contains('BY471_wt')]
    struc_WT = pd.concat([struc_wt1,struc_wt2,struc_wt3])
#     print(struc_WT['Volume (um^3)'][0], len(struc_all_filt['Volume (um^3)']))


    # filter out vph1-GFP
    struc_vph1GFP = struc_all_filt_sort[struc_all_filt_sort['Cell ID'].str.contains('vph')]
    # filter out vac14
    struc_vac14 = struc_all_filt_sort[struc_all_filt_sort['Cell ID'].str.contains('VaCL4')]
    
    return(struc_all_filt_sort, struc_WT, struc_vph1GFP, struc_vac14)

In [None]:
# create filtered dataframes for each structure_all/strain
cell_filt = filter_metrics(cell_all)
cell_filt_all = cell_filt[0].reset_index()
cell_filt_WT= cell_filt[1].reset_index()
cell_filt_vph1gfp = cell_filt[2].reset_index()
cell_filt_vac14 = cell_filt[3].reset_index()

vac_filt = filter_metrics(vac_all)
vac_filt_all = vac_filt[0].reset_index()
vac_filt_WT= vac_filt[1].reset_index()
vac_filt_vph1gfp = vac_filt[2].reset_index()
vac_filt_vac14 = vac_filt[3].reset_index()

nuc_filt = filter_metrics(nuc_all)
nuc_filt_all = nuc_filt[0].reset_index()
nuc_filt_WT= nuc_filt[1].reset_index()
nuc_filt_vph1gfp = nuc_filt[2].reset_index()
nuc_filt_vac14 = nuc_filt[3].reset_index()

LD_filt = filter_metrics(LD_all)
LD_filt_all = LD_filt[0].reset_index()
LD_filt_WT= LD_filt[1].reset_index()
LD_filt_vph1gfp = LD_filt[2].reset_index()
LD_filt_vac14 = LD_filt[3].reset_index()

# save excel files by strain
all_strucs = [cell_filt[0], vac_filt[0],nuc_filt[0],LD_filt[0]]
WT_strucs = [cell_filt[1], vac_filt[1],nuc_filt[1],LD_filt[1]]
vph1gfp_strucs = [cell_filt[2], vac_filt[2],nuc_filt[2],LD_filt[2]]
vac14_strucs = [cell_filt[3], vac_filt[3],nuc_filt[3],LD_filt[3]]

df_lists = [all_strucs, WT_strucs, vph1gfp_strucs, vac14_strucs]
strains = ["All Strains", "WT", "VPH1-GFP", "vac14"]
structures = ['Cell', "Vacuole", "Nucleus", "LD"]

for i,strain_df in enumerate(df_lists):
    writer = pd.ExcelWriter(strains[i]+ " 3D Suite_Filtered.xlsx", engine = 'xlsxwriter')

    for j,df in enumerate(strain_df):
#         print(j)
        df.to_excel(writer, sheet_name = structures[j], index = False)
    writer.save()


In [None]:
# check that each filtered list contains the same cells

cell_filt_set = list(set(cell_filt[0]['Cell ID']))
# len(cell_filt_set)
nuc_filt_set = list(set(nuc_filt[0]['Cell ID']))
# len(nuc_filt_set)
# nuc_filt_set
vac_filt_set = list(set(vac_filt[0]['Cell ID']))
# len(vac_filt_set)
LD_filt_set = list(set(LD_filt[0]['Cell ID']))
len(LD_filt_set)

cell_filt_set.sort() == LD_filt_set.sort()
# pd.DataFrame(cell_filt_set,nuc_filt_set)

In [None]:
# # filtering vacuoles - excluding nonspecific too-small objects

# # plot WT vacuole volume histogram
# WTvac_vol_hist = plt.hist(vac_filt[0]['Volume (um^3)'], bins = 50,label='All Vac')
# plt.title('All Strains Vacuole Volume')
# plt.xlabel('Volume (um^3)')

# # output figures directory
# # os.getcwd()

# figs_dir = 'C:\\Users\\Mary\\Documents\\Shared folder\\Data\\SXT\\ML segmentation tiffs\\Final set for analysis\\3D Suite Measurements\\Figures\\'
# plt.savefig(figs_dir + 'all vac_vol_hist_bin30.png',format = 'png')

# # vac_WT.describe()


In [None]:
# sum vacuole volumes per cell
# create new vac volumes dict that sums the vols in the value for the cell key

# populate a dictionary w each key being cell ID from vac_filt_set
vac_sum_dict = {}
for i in vac_filt_set:
    vac_sum_dict.update({i:[0]})
# len(vac_sum_dict) # 490 cells
len(vac_filt_all['Object ID']) # 492 vacuoles

i = 0
for v in vac_filt_all['Object ID']: # for each vacuole object
#     print(i, v.split('.tiff_')[0])
    vac_sum_dict_key = v.split('.tiff_')[0] # get the obj ID to match the cell ID = dictionary key
    vac_sum_dict_val = vac_sum_dict[vac_sum_dict_key]  # the corresponding dict value starts as an empty list
#     print(i,v,vac_sum_dict_key,vac_sum_dict_val)
#     if vac_sum_dict_key in vac_sum_dict.keys(): # check that the keys match
#         print(i, vac_sum_dict_key)

    obj_vol = vac_filt_all.at[i,'Volume (um^3)'] # <--- why is this line giving me a key error?
#     print(i)
# # #     testlist.append(obj_vol)
# # #     print(testlist)

#     print(v, vac_sum_dict_val[0]+obj_vol)
#     print(v, vac_sum_dict_val[0])
    vac_sum_dict[vac_sum_dict_key][0]+=obj_vol
#     vac_sum_dict[vac_sum_dict_key].append(obj_vol)
    
    ## find cells with more than 1 vacuole:
##     BY471A_1026_1_2_pre_rec_20_multi_bin_VAC
##     BY471_vphl_GFP_1208_2_2_pre_rec_15_multi_bin_VAC
#     for x in list(vac_sum_dict.keys()):
#         if len(vac_sum_dict[x]) > 2:
#             print(x)
    i+=1
    
# vac_sum_dict['BY471A_1026_1_2_pre_rec_20_multi_bin_VAC'] # should be 2.1+0.56 = 2.66... and it is!
# len(vac_sum_dict) # 490, matching # cells
# vac_sum_dict

In [None]:
# find cells with multiple vacuoles
vac_sum_dict['BY471A_1026_1_2_pre_rec_20_multi_bin_VAC'].append(3.5)
# vac_sum_dict['BY471A_1026_1_2_pre_rec_20_multi_bin_VAC']
# x for x in vac_filt_all['Object ID'] if
for x in list(vac_sum_dict.keys()):
    if len(vac_sum_dict[x]) > 1:
        print(len(vac_sum_dict[x]))

vac_sum_dict['BY471A_1026_1_2_pre_rec_20_multi_bin_VAC']

In [None]:
# create correctly ordered list of vacuole sum volumes to add to volume df
vac_sum_vols_list = [x[0] for x in list(vac_sum_dict.values())]
len(vac_sum_vols_list)

vac_sum_df = pd.DataFrame()
vac_sum_df['Cell ID'] = vac_sum_dict.keys()
vac_sum_df['Vacuole Volume (um^3)'] = vac_sum_vols_list

vac_sum_df = vac_sum_df.sort_values('Cell ID')
vac_sum_df = vac_sum_df.reset_index()
vac_sum_df

In [None]:
# sum LD volumes per cell - repeat the above process for LDs (unfiltered)
# create new LD volumes dict that sums the vols in the value for the cell key

# populate a dictionary w each key being cell ID from LD_filt_set
LD_sum_dict = {}
for i in LD_filt_set:
    LD_sum_dict.update({i:[0]})
# len(LD_sum_dict) # 490 cells
# len(LD_filt_all['Object ID']) # 2557 LDs

i = 0
for ld in LD_filt_all['Object ID']: # for each vacuole object
#     print(i, ld.split('.tiff_')[0])
    LD_sum_dict_key = ld.split('.tiff_')[0] # get the obj ID to match the cell ID = dictionary key
    LD_sum_dict_val = LD_sum_dict[LD_sum_dict_key]  # the corresponding dict value starts as an empty list
#     print(i,ld,LD_sum_dict_key,LD_sum_dict_val)

# check that the keys match
#     if LD_sum_dict_key in LD_sum_dict.keys(): 
#         print(i, LD_sum_dict_key)

    obj_vol = LD_filt_all.at[i,'Volume (um^3)'] 
#     print(i)
# # #     testlist.append(obj_vol)
# # #     print(testlist)

# #     print(v, vac_sum_dict_val[0]+obj_vol)
# #     print(v, vac_sum_dict_val[0])
    LD_sum_dict[LD_sum_dict_key][0]+=obj_vol
    i+=1
    
# LD_sum_dict

# create correctly ordered list of vacuole sum volumes to add to volume df
LD_sum_vols_list = [x[0] for x in list(LD_sum_dict.values())]
LD_sum_vols_list

LD_sum_df = pd.DataFrame()
LD_sum_df['Cell ID'] = LD_sum_dict.keys()
LD_sum_df['LD Volume (um^3)'] = LD_sum_vols_list

LD_sum_df = LD_sum_df.sort_values('Cell ID')
LD_sum_df = LD_sum_df.reset_index()
LD_sum_df

In [None]:
# merged volume table for scaling analysis for each strain
all_vols_df = pd.DataFrame({'Cell ID':cell_filt_all['Cell ID'],'Cell Volume (um^3)':cell_filt_all['Volume (um^3)']})
all_vols_df['Nucleus Volume (um^3)']=nuc_filt_all['Volume (um^3)']
all_vols_df['Vacuole Sum Volume (um^3)']=vac_sum_df['Vacuole Volume (um^3)']
all_vols_df['LD Sum Volume (um^3)']=LD_sum_df['LD Volume (um^3)']
# avg LD vol/cell
# #LDs/cell
# stdev LD vol/cell
all_vols_df['N+V'] = all_vols_df['Nucleus Volume (um^3)']+all_vols_df['Vacuole Sum Volume (um^3)']
all_vols_df['Sum Organelles'] = all_vols_df['Nucleus Volume (um^3)']+all_vols_df['Vacuole Sum Volume (um^3)']+all_vols_df['LD Sum Volume (um^3)']

# calculate various versions of cytoplasm

all_vols_df['Cyto_N'] = all_vols_df['Cell Volume (um^3)']-all_vols_df['Nucleus Volume (um^3)']
all_vols_df['Cyto_V'] = all_vols_df['Cell Volume (um^3)']-all_vols_df['Vacuole Sum Volume (um^3)']
all_vols_df['Cyto_LD'] = all_vols_df['Cell Volume (um^3)']-all_vols_df['LD Sum Volume (um^3)']
all_vols_df['Cyto_NV'] = all_vols_df['Cell Volume (um^3)']-all_vols_df['N+V']
all_vols_df['Cyto_Organelles'] = all_vols_df['Cell Volume (um^3)']-all_vols_df['Sum Organelles']

# calculate ratios

all_vols_df['VCratio'] = all_vols_df['Vacuole Sum Volume (um^3)']/all_vols_df['Cell Volume (um^3)']
all_vols_df['NCratio'] = all_vols_df['Nucleus Volume (um^3)']/all_vols_df['Cell Volume (um^3)']
all_vols_df['LDCratio'] = all_vols_df['LD Sum Volume (um^3)']/all_vols_df['Cell Volume (um^3)']
all_vols_df['VNCratio'] = all_vols_df['N+V']/all_vols_df['Cell Volume (um^3)']
all_vols_df['OrganellesCellratio'] = all_vols_df['Sum Organelles']/all_vols_df['Cell Volume (um^3)']
all_vols_df['Cyto_OrganellesCellratio'] = all_vols_df['Cyto_Organelles']/all_vols_df['Cell Volume (um^3)']

all_vols_df['VCytoratio'] = all_vols_df['Vacuole Sum Volume (um^3)']/all_vols_df['Cyto_V']
all_vols_df['NCytoratio'] = all_vols_df['Nucleus Volume (um^3)']/all_vols_df['Cyto_N']
all_vols_df['LDCytoratio'] = all_vols_df['LD Sum Volume (um^3)']/all_vols_df['Cyto_LD']
all_vols_df['VNCytoratio'] = all_vols_df['N+V']/all_vols_df['Cyto_NV']
all_vols_df['OrganellesCytoratio'] = all_vols_df['Sum Organelles']/all_vols_df['Cyto_Organelles']

# all_vols_df['VCratio']
all_vols_df
                 

In [None]:
# split all_vols_df by strain

# filter out WT
wt1_vols_df = all_vols_df[all_vols_df['Cell ID'].str.contains('BY471A')]
wt2_vols_df = all_vols_df[all_vols_df['Cell ID'].str.contains('BY4741A')]
wt3_vols_df = all_vols_df[all_vols_df['Cell ID'].str.contains('BY471_wt')]
WT_vols_df = pd.concat([wt1_vols_df,wt2_vols_df,wt3_vols_df])


# filter out vph1-GFP
vph1GFP_vols_df = all_vols_df[all_vols_df['Cell ID'].str.contains('vph')]
# filter out vac14
vac14_vols_df = all_vols_df[all_vols_df['Cell ID'].str.contains('VaCL4')]

# print(all_vols_df['VCratio'].describe())
# print(WT_vols_df['VCratio'].describe())
# print(vph1GFP_vols_df['VCratio'].describe())

# save vols dfs
# sxtdata
# all_Excelwriter = pd.ExcelWriter(sxtdata +'/3D Suite Measurements/Volumes and ratios/all_volumes_ratios.xlsx', engine = 'xlsxwriter')
# # all_Excelwriter
# all_vols_df.to_excel(all_Excelwriter, index=False)
# all_Excelwriter.save()

# WT_Excelwriter = pd.ExcelWriter(sxtdata +'/3D Suite Measurements/Volumes and ratios/WT_volumes_ratios.xlsx', engine = 'xlsxwriter')
# # WT_Excelwriter
# WT_vols_df.to_excel(WT_Excelwriter, index=False)
# WT_Excelwriter.save()

# vph1GFP_Excelwriter = pd.ExcelWriter(sxtdata +'/3D Suite Measurements/Volumes and ratios/vph1GFP_volumes_ratios.xlsx', engine = 'xlsxwriter')
# # vph1GFP_Excelwriter
# vph1GFP_vols_df.to_excel(vph1GFP_Excelwriter, index=False)
# vph1GFP_Excelwriter.save()

vac14_Excelwriter = pd.ExcelWriter(sxtdata +'/3D Suite Measurements/Volumes and ratios/vac14_volumes_ratios.xlsx', engine = 'xlsxwriter')
# vac14_Excelwriter
vac14_vols_df.to_excel(vac14_Excelwriter, index=False)
vac14_Excelwriter.save()

