In [2]:
#Libraries and dependencies 
import cv2
import cc3d
import math
import mrcfile
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import imagecodecs

import time
import tifffile as tiff
from tifffile import imread
from PIL import Image

import scipy as scipy
from scipy.ndimage import center_of_mass, sum as ndimage_sum
from scipy.stats import linregress, kurtosis, mode
from scipy import ndimage
from scipy.ndimage import map_coordinates

from sklearn.preprocessing import StandardScaler


import os
import glob

# Functions #
**openmrc, Ves_Analysis, df_forCluster**

In [3]:
#Function to open a mrc file (also has logic to deal with a tiff file)

def openmrc(raw_mrc):
    #os.path.splittext splits a file path into two parts: a root (before the final .) and a extension (after the last . , e.g. the file type)
    file_extension = os.path.splitext(raw_mrc)[1]
    
    if file_extension == ".mrc": 
        with mrcfile.open(raw_mrc, mode = 'r+', permissive = True) as mrc:
            mrc.header.map = mrcfile.constants.MAP_ID
            LAC_dat = mrc.data
    else:
        LAC_data = imread(raw_mrc)
        LAC_dat = np.flip(LAC_data, axis = 1)  #Rotate orientation so same as mrc files 

    return LAC_dat

In [4]:
#Vesicle Analysis Pipeline for LAC and geometric parameters 

def Ves_Analysis(Ves_mask, Mem_mask, Mito_mask, LAC_mrc, LACfactor):

#Resolution is (1/LAC factor) * 1000
    Resolution = (1/LACfactor) * 1000     

#Function to read in vesicle tiff and get positional information from it
    #Read in vesicle mask and create opencv connected components mask 
    vesTifData_open = openmrc(Ves_mask) 
    vesTifData = np.where(vesTifData_open > 0, 1, 0)
    Ves_ConnectedComponents = cc3d.connected_components(vesTifData, connectivity = 6)
    
    #Number of vesicles, remove background 
    Ves_number = len(np.unique(Ves_ConnectedComponents)) - 1 
    
    # Calculate centroids using scipy.ndimage.center_of_mass
    Ves_centers = center_of_mass(np.ones_like(Ves_ConnectedComponents), Ves_ConnectedComponents, range(1, Ves_number + 1))
    
    #Create Dataframe of vesicle centers(X,Y,Z)
    df_Ves = pd.DataFrame(Ves_centers, columns=['Centroid X', 'Centroid Y', 'Centroid Z'])
    df_Ves['Group'] = range(1, Ves_number + 1)

#Function to calculate LAC per vesicle. Importantly, the code here finds every coordinate of the vesicle voxels 
# and maps that to the LAC 3D data 

    # Mapping coordinates of each voxel in vesicle mask  
    locations = np.argwhere(vesTifData != 0)
    location_labels = Ves_ConnectedComponents[locations[:, 0], locations[:, 1], locations[:, 2]]

    # Create DataFrame for locations voxels and their corresponding groups
    locations_df = pd.DataFrame(locations, columns=['X', 'Y', 'Z'])
    locations_df['Group'] = location_labels
    # Filter out background group (label 0)
    locations_df = locations_df[locations_df['Group'] != 0]

    # Map locations onto vesicle mask to get corresponding LAC values 
    #openLAC_mrc = openmrc(LAC_mrc)
    openLAC_mrc = ( openmrc(LAC_mrc) ) * (LACfactor)
    lac_values = openLAC_mrc[locations_df['X'].values, locations_df['Y'].values, locations_df['Z'].values]
    locations_df['LAC Value'] = lac_values

    # Add mean LAC values to dataframe
    meanGrayValVes = locations_df.groupby('Group')['LAC Value'].mean().values
    df_Ves['LAC Value'] = meanGrayValVes
    
#Adding different LAC (chemical) statistics to main dataframe 
    
    # Add max LAC values to df_3D
    maxGrayValVes = locations_df.groupby('Group')['LAC Value'].max().values
    df_Ves['LAC Max'] = maxGrayValVes

    # Add min LAC values to df_3D
    minGrayValVes = locations_df.groupby('Group')['LAC Value'].min().values
    df_Ves['LAC Min'] = minGrayValVes

    # Add stddev LAC values to df_3D
    stddevGrayValVes = locations_df.groupby('Group')['LAC Value'].std().values
    df_Ves['LAC Std Dev'] = stddevGrayValVes

    # Add skew LAC values to df_3D
    skewGrayValVes = locations_df.groupby('Group')['LAC Value'].skew().values
    df_Ves['LAC Skew'] = skewGrayValVes

    # Add kurtosis LAC values to df_3D
    kurtosisGrayValVes = locations_df.groupby('Group')['LAC Value'].apply(kurtosis).values
    df_Ves['LAC Kurtosis'] = kurtosisGrayValVes

    # Add median LAC values to df_3D
    medianGrayValVes = locations_df.groupby('Group')['LAC Value'].median().values
    df_Ves['LAC Median'] = medianGrayValVes

    # Add mode LAC values to df_3D
    #modeGrayValVestmp = locations_df.groupby('Group')['LAC Value'].apply(lambda x: x.mode())
    #df_Ves['LAC Mode'] = modeGrayValVestmp.values

    # Add 25th Quantile LAC to df_3D
    firstquantile = locations_df.groupby('Group')['LAC Value'].apply(np.quantile, 0.25).values
    df_Ves['LAC 25th Quantile'] = firstquantile

    #Add 75th Quantile LAC to df_3D
    thirdquantile = locations_df.groupby('Group')['LAC Value'].apply(np.quantile, 0.75).values
    df_Ves['LAC 75th Quantile'] = thirdquantile

#Adding different geometric properties to the Dataframe
    
    #Calculate vesicle volumes
    volumes = ndimage_sum(np.ones_like(Ves_ConnectedComponents), Ves_ConnectedComponents, range(1, Ves_number + 1))
    volumes_Res = (volumes) * ((Resolution) ** (3)) * ((1/1000) ** 3)
    df_Ves['Volume (um3)'] = volumes_Res

    # Diameter (Geometric Mean Diameter) 
    x_dis = locations_df.groupby('Group')['X'].max().values - locations_df.groupby('Group')['X'].min().values
    y_dis = locations_df.groupby('Group')['Y'].max().values - locations_df.groupby('Group')['Y'].min().values
    z_dis = locations_df.groupby('Group')['Z'].max().values - locations_df.groupby('Group')['Z'].min().values
    Geo_mean_dia = (x_dis*y_dis*z_dis)** (1/3)
    Geo_mean_dia_Res = (Geo_mean_dia) * (Resolution)
    df_Ves['Geometric Diameter (nm)'] = Geo_mean_dia_Res

    # Ellipsoid Approximate Surface Area calculation
    const_p = 1.6075 
    a = x_dis
    b = y_dis
    c = z_dis 
    SA_ves = (4 * np.pi)*(((a*b)**const_p + (a*c)**const_p + (b*c)**const_p)/3)**(1/const_p) 
    SA_ves_Res = (SA_ves) * ((Resolution) ** (2)) * ((1/1000) ** 2)
    df_Ves['Ellipsoid Surface Area (um2)'] = SA_ves_Res

#Membrane distances of vesicles 
    #Membrane tiff read in 
    MemTif = openmrc(Mem_mask)
    binaryMem = np.where(MemTif > 0, 1, 0)
    
    # Apply the Euclidean distance transform. Then, normalize distances 
    Mem_dist = ndimage.distance_transform_edt(binaryMem)
    Mem_dist_norm = (Mem_dist) / (Mem_dist.max())
    
    # Map voxel locations to the membrane distance map 
    Mem_dist_atVesVoxels = Mem_dist_norm[locations_df['X'].values, locations_df['Y'].values, locations_df['Z'].values]
    locations_df['Mem_dist'] = Mem_dist_atVesVoxels
    
    # Return only the minimum distance p/ group 
    minves_dist_atVesVoxels = locations_df.groupby('Group')['Mem_dist'].min().values
    df_Ves['Min Distance to PM EDT'] = minves_dist_atVesVoxels

#Mitochondria distances of vesicles

    #Read in Mitochondria mask and invert 
    MitoTif = openmrc(Mito_mask)
    binaryMito = np.where(MitoTif > 0, 1, 0)
    invertMito = 1 - binaryMito
    
    #EDT transoform and normalize 
    Mito_dist = ndimage.distance_transform_edt(invertMito)
    Mito_dist_norm = (Mito_dist) / (Mem_dist.max())
    
    # Map voxel locations to the mitochondria distance map 
    Mito_dist_atVesVoxels = Mito_dist_norm[locations_df['X'].values, locations_df['Y'].values, locations_df['Z'].values]
    locations_df['Mito_dist'] = Mito_dist_atVesVoxels
    
    # Return only the minimum distance p/ group
    minves_dist_atVesVoxels = locations_df.groupby('Group')['Mito_dist'].min().values
    df_Ves['Mito Dist EDT'] = minves_dist_atVesVoxels

#Drop vesicles with diameters less than 100 nm 
    #threshold = 100
    #threshold_1 = 600
    #df_Ves = df_Ves[(df_Ves['Geometric Diameter (nm)'] >= threshold) & 
                #(df_Ves['Geometric Diameter (nm)'] <= threshold_1)]

    return df_Ves

In [5]:
#Vesicle Analysis Pipeline for LAC and geometric parameters 

def Ves_Analysis_RawDist(Ves_mask, Mem_mask, Mito_mask, LAC_mrc, LACfactor):

#Resolution is (1/LAC factor) * 1000
    Resolution = (1/LACfactor) * 1000     

#Function to read in vesicle tiff and get positional information from it
    #Read in vesicle mask and create opencv connected components mask 
    vesTifData_open = openmrc(Ves_mask) 
    vesTifData = np.where(vesTifData_open > 0, 1, 0)
    Ves_ConnectedComponents = cc3d.connected_components(vesTifData, connectivity = 6)
    
    #Number of vesicles, remove background 
    Ves_number = len(np.unique(Ves_ConnectedComponents)) - 1 
    
    # Calculate centroids using scipy.ndimage.center_of_mass
    Ves_centers = center_of_mass(np.ones_like(Ves_ConnectedComponents), Ves_ConnectedComponents, range(1, Ves_number + 1))
    
    #Create Dataframe of vesicle centers(X,Y,Z)
    df_Ves = pd.DataFrame(Ves_centers, columns=['Centroid X', 'Centroid Y', 'Centroid Z'])
    df_Ves['Group'] = range(1, Ves_number + 1)

#Function to calculate LAC per vesicle. Importantly, the code here finds every coordinate of the vesicle voxels 
# and maps that to the LAC 3D data 

    # Mapping coordinates of each voxel in vesicle mask  
    locations = np.argwhere(vesTifData != 0)
    location_labels = Ves_ConnectedComponents[locations[:, 0], locations[:, 1], locations[:, 2]]

    # Create DataFrame for locations voxels and their corresponding groups
    locations_df = pd.DataFrame(locations, columns=['X', 'Y', 'Z'])
    locations_df['Group'] = location_labels
    # Filter out background group (label 0)
    locations_df = locations_df[locations_df['Group'] != 0]

    # Map locations onto vesicle mask to get corresponding LAC values 
    #openLAC_mrc = openmrc(LAC_mrc)
    openLAC_mrc = ( openmrc(LAC_mrc) ) * (LACfactor)
    lac_values = openLAC_mrc[locations_df['X'].values, locations_df['Y'].values, locations_df['Z'].values]
    locations_df['LAC Value'] = lac_values

    # Add mean LAC values to dataframe
    meanGrayValVes = locations_df.groupby('Group')['LAC Value'].mean().values
    df_Ves['LAC Value'] = meanGrayValVes
    
#Adding different LAC (chemical) statistics to main dataframe 
    
    # Add max LAC values to df_3D
    maxGrayValVes = locations_df.groupby('Group')['LAC Value'].max().values
    df_Ves['LAC Max'] = maxGrayValVes

    # Add min LAC values to df_3D
    minGrayValVes = locations_df.groupby('Group')['LAC Value'].min().values
    df_Ves['LAC Min'] = minGrayValVes

    # Add stddev LAC values to df_3D
    stddevGrayValVes = locations_df.groupby('Group')['LAC Value'].std().values
    df_Ves['LAC Std Dev'] = stddevGrayValVes

    # Add skew LAC values to df_3D
    skewGrayValVes = locations_df.groupby('Group')['LAC Value'].skew().values
    df_Ves['LAC Skew'] = skewGrayValVes

    # Add kurtosis LAC values to df_3D
    kurtosisGrayValVes = locations_df.groupby('Group')['LAC Value'].apply(kurtosis).values
    df_Ves['LAC Kurtosis'] = kurtosisGrayValVes

    # Add median LAC values to df_3D
    medianGrayValVes = locations_df.groupby('Group')['LAC Value'].median().values
    df_Ves['LAC Median'] = medianGrayValVes

    # Add mode LAC values to df_3D
    #modeGrayValVestmp = locations_df.groupby('Group')['LAC Value'].apply(lambda x: x.mode())
    #df_Ves['LAC Mode'] = modeGrayValVestmp.values

    # Add 25th Quantile LAC to df_3D
    firstquantile = locations_df.groupby('Group')['LAC Value'].apply(np.quantile, 0.25).values
    df_Ves['LAC 25th Quantile'] = firstquantile

    #Add 75th Quantile LAC to df_3D
    thirdquantile = locations_df.groupby('Group')['LAC Value'].apply(np.quantile, 0.75).values
    df_Ves['LAC 75th Quantile'] = thirdquantile

#Adding different geometric properties to the Dataframe
    
    #Calculate vesicle volumes
    volumes = ndimage_sum(np.ones_like(Ves_ConnectedComponents), Ves_ConnectedComponents, range(1, Ves_number + 1))
    volumes_Res = (volumes) * ((Resolution) ** (3)) * ((1/1000) ** 3)
    df_Ves['Volume (um3)'] = volumes_Res

    # Diameter (Geometric Mean Diameter) 
    x_dis = locations_df.groupby('Group')['X'].max().values - locations_df.groupby('Group')['X'].min().values
    y_dis = locations_df.groupby('Group')['Y'].max().values - locations_df.groupby('Group')['Y'].min().values
    z_dis = locations_df.groupby('Group')['Z'].max().values - locations_df.groupby('Group')['Z'].min().values
    Geo_mean_dia = (x_dis*y_dis*z_dis)** (1/3)
    Geo_mean_dia_Res = (Geo_mean_dia) * (Resolution)
    df_Ves['Geometric Diameter (nm)'] = Geo_mean_dia_Res

    # Ellipsoid Approximate Surface Area calculation
    const_p = 1.6075 
    a = x_dis
    b = y_dis
    c = z_dis 
    SA_ves = (4 * np.pi)*(((a*b)**const_p + (a*c)**const_p + (b*c)**const_p)/3)**(1/const_p) 
    SA_ves_Res = (SA_ves) * ((Resolution) ** (2)) * ((1/1000) ** 2)
    df_Ves['Ellipsoid Surface Area (um2)'] = SA_ves_Res

#Membrane distances of vesicles 
    #Membrane tiff read in 
    MemTif = openmrc(Mem_mask)
    binaryMem = np.where(MemTif > 0, 1, 0)
    
    # Apply the Euclidean distance transform. Then, DO NOT normalize distances 
    Mem_dist = ndimage.distance_transform_edt(binaryMem)
    
    # Map voxel locations to the membrane distance map 
    Mem_dist_atVesVoxels = Mem_dist[locations_df['X'].values, locations_df['Y'].values, locations_df['Z'].values]
    locations_df['Mem_dist'] = Mem_dist_atVesVoxels
    
    # Return only the minimum distance p/ group 
    minves_dist_atVesVoxels = locations_df.groupby('Group')['Mem_dist'].min().values
    df_Ves['Min Raw Distance to PM EDT'] = (minves_dist_atVesVoxels * Resolution)

#Mitochondria distances of vesicles

    #Read in Mitochondria mask and invert 
    MitoTif = openmrc(Mito_mask)
    binaryMito = np.where(MitoTif > 0, 1, 0)
    invertMito = 1 - binaryMito
    
    #EDT transoform and normalize 
    Mito_dist = ndimage.distance_transform_edt(invertMito)
    
    # Map voxel locations to the mitochondria distance map 
    Mito_dist_atVesVoxels = Mito_dist[locations_df['X'].values, locations_df['Y'].values, locations_df['Z'].values]
    locations_df['Mito_dist'] = Mito_dist_atVesVoxels
    
    # Return only the minimum distance p/ group
    minves_dist_atVesVoxels = locations_df.groupby('Group')['Mito_dist'].min().values
    df_Ves['Mito Raw Dist EDT'] = (minves_dist_atVesVoxels * Resolution)

#Drop vesicles with diameters less than 100 nm and greater than 600 nm
    #threshold = 100
    #threshold_1 = 600
    #df_Ves = df_Ves[(df_Ves['Geometric Diameter (nm)'] >= threshold) & 
     #           (df_Ves['Geometric Diameter (nm)'] <= threshold_1)]

    return df_Ves

In [32]:
def Ves_Analysis_RawDist(Ves_mask, Mem_mask, Mito_mask, LAC_mrc, LACfactor):

#Resolution is (1/LAC factor) * 1000
    Resolution = (1/LACfactor) * 1000     

#Function to read in vesicle tiff and get positional information from it
    #Read in vesicle mask and create opencv connected components mask 
    vesTifData_open = openmrc(Ves_mask) 
    vesTifData = np.where(vesTifData_open > 0, 1, 0)
    
    #Read in Mitochondria mask
    MitoTif = openmrc(Mito_mask)
    binaryMito = np.where(MitoTif > 0, 1, 0)

    
    return binaryMito

In [7]:
#Clean up dataframe for clustering usage. 

def df_forCluster(dat_frame):
    #Drop parameters that are correlated or not relevant for clustering. 
    dat_frame_clust = dat_frame.drop(['Centroid X','Centroid Y', 'Centroid Z', 'Group', 
                                      'LAC Max', 'LAC Min', 'LAC Median', 
                                       'LAC 25th Quantile', 'LAC 75th Quantile', 
                                       'Volume (um3)', 'Ellipsoid Surface Area (um2)'],axis = 1)
    
    # Then, perform log transform on Mitochondria EDT. Formula log(x+0.01) is used to deal with zeros
    dat_frame_clust['Mito Dist EDT'] = (dat_frame_clust['Mito Dist EDT']).apply(lambda x: np.log10(x + 0.1))

    
    #Then, standardize data(reconvert back to Dataframe, reinput in columns)
    dat_frame_clust_stdrd = StandardScaler().fit_transform(dat_frame_clust)
    dat_frame_clust_stdrd_df = pd.DataFrame(dat_frame_clust_stdrd)
    dat_frame_clust_stdrd_df.columns = dat_frame_clust.columns
    
    return dat_frame_clust_stdrd_df

In [8]:
#Clean up dataframe for clustering usage. 

def df_forCluster_unnorm(dat_frame):
    #Drop parameters that are correlated or not relevant for clustering. 
    #dat_frame_clust = dat_frame.drop(['Centroid X','Centroid Y', 'Centroid Z', 'Group', 
    #dat_frame_clust = dat_frame.drop(['Group',
    #                                  'LAC Max', 'LAC Min', 'LAC Median', 
    #                                   'LAC 25th Quantile', 'LAC 75th Quantile', 
    #                                   'Volume (um3)', 'Ellipsoid Surface Area (um2)'],axis = 1)
    
    # Then, perform log transform on Mitochondria EDT. Formula log(x+0.01) is used to deal with zeros
    #dat_frame_clust['Mito Dist EDT'] = (dat_frame_clust['Mito Dist EDT']).apply(lambda x: np.log10(x + 0.1))
    
    return dat_frame

In [38]:
#Function to Analyze Contact

def Mito_LAC_atContacts(Ves_mask, Mito_mask, LAC_mrc, LACfactor):

#Resolution is (1/LAC factor) * 1000
    Resolution = (1/LACfactor) * 1000    

#Opening mitochondria and vesicle masks 
    binarymito = openmrc(Mito_mask)
    binarymito_true = np.where(binarymito > 0, 1, 0)

    binaryves = openmrc(Ves_mask)
    binaryves_true = np.where(binaryves > 0, 1, 0)

#EDT image transform of vesicles/ISGs
    ISG_EDT = ndimage.distance_transform_edt(1 - binaryves_true)
    #location of mitochondria 
    mito_loc = np.argwhere(binarymito_true != 0)
    mito_loc_df = pd.DataFrame(mito_loc, columns=['X', 'Y', 'Z'])

#Overlay mito on LAC map to get LAC values at every mito voxel 
    openedLAC_mrc = (openmrc(LAC_mrc)) * (LACfactor)
    mito_lac_values = openedLAC_mrc[mito_loc_df['X'].values, mito_loc_df['Y'].values, mito_loc_df['Z'].values]
    mito_loc_df['LAC Value Mito'] = mito_lac_values

#Calculate distance of mito voxels from ISGs 
    Mitodist_fromves = ISG_EDT[mito_loc_df['X'].values, mito_loc_df['Y'].values, mito_loc_df['Z'].values]
    MitoRealDist = Mitodist_fromves * Resolution 
    mito_loc_df['MitoDist'] = MitoRealDist

    return mito_loc_df

# Classes #

In [37]:
# Considering each cell as a class
#Class should have nucleus, mitochondria, vesicles, membrane masks, and raw LAC file 
class INS1E:
    #defining features of the class
    def __init__(self, filelist, LACfactor):
        #Pick out elements from list. Replace brackets with comma-seperated string
        self.membrane = ", ".join([element for element in filelist if "membrane" in element])
        self.mitochondria = ", ".join([element for element in filelist if "mito" in element])
        self.nucleus = ", ".join([element for element in filelist if "nucleus" in element])
        self.vesicles = ", ".join([element for element in filelist if "vesic" in element]) 
        self.LACFile = ", ".join([element for element in filelist if "Raw MRC File" in element])
        self.LACfactor = LACfactor
    
    #Uses function from other notebook 
    def Ves_Analysis_func(self):
        res = Ves_Analysis(self.vesicles, self.membrane, self.mitochondria, self.LACFile, self.LACfactor)
        return res 

    #Normalization Function on analysis function 
    def Ves_Analysis_norm(self):
        df_int = Ves_Analysis(self.vesicles, self.membrane, self.mitochondria, self.LACFile, self.LACfactor)
        res2 = df_forCluster(df_int)
        return res2

    #Vesicle Processing without normalization
    def Ves_Analysis_unnorm(self):
        df_int = Ves_Analysis(self.vesicles, self.membrane, self.mitochondria, self.LACFile, self.LACfactor)
        res3 = df_forCluster_unnorm(df_int)
        return res3 

    #Vesicle Analysis with raw distances and without normalization
    def Ves_Analysis_Raw_Unorm(self):
        df_int = Ves_Analysis_RawDist(self.vesicles, self.membrane, self.mitochondria, self.LACFile, self.LACfactor)
        res4 = df_forCluster_unnorm(df_int)
        return res4


##Mitochondria LAC Analysis
    def MitoContactLAC(self):
        res_mitoLAC = Mito_LAC_atContacts(self.vesicles, self.mitochondria, self.LACFile, self.LACfactor)
        return res_mitoLAC
        
        

In [10]:
class Condition:
   def __init__(self, cells):
       self.cells = cells

# Dictionaries # 

In [11]:
#Creating a LAC factor dictionary 

#Reading in Excel File
sheet_names = ['NS', 'Glucose', 'Glucose + GIP', 'Glucose +TAK', 'Glucose + GKA', "Glucose + GLM"]
LACfactors = pd.read_excel("LAC Factors(4 conditions).xlsx", sheet_name=sheet_names)

#Creating dataframe for all the features
NSfact = LACfactors['NS']
Glufac = LACfactors['Glucose']
GIPfac = LACfactors['Glucose + GIP']
TAKfac = LACfactors['Glucose +TAK']
GKAfac = LACfactors['Glucose + GKA']
GLMfac = LACfactors['Glucose + GLM']

#Ext dataframe
LAC_fact_Ext = pd.read_csv("D:/Downloads/Drug Stimulated INS1E Cells/Exendin_Glucose/Ext_4_LAC_factors.txt")
LAC_fact_Ext

#Combining dataframes 
CombinedLACfactors = pd.concat([NSfact, Glufac, GIPfac, TAKfac, GKAfac, GLMfac, LAC_fact_Ext])
LACfactordict = CombinedLACfactors.set_index('Cell Name')['LAC Factor'].to_dict()
LACfactordict

{'6_5': 33.33,
 '6_10': 33.33,
 '6_17-19': 30.74,
 '1537_19': 33.33,
 '7_22': 33.33,
 '1537_16-18': 33.33,
 '7_8=9': 33.33,
 '9905_12-13': 32.89,
 '9908_4-5': 30.21,
 '9905_3': 32.89,
 '9905_8': 32.89,
 '9905_11-12': 32.89,
 '9905_7': 32.89,
 '9905_6': 32.89,
 '1152_15-17': 32.89,
 '1535_5-7': 33.33,
 '2216_3-5': 33.33,
 '2216_6-7': 33.33,
 '2220_9': 33.33,
 '2220_12-13': 33.33,
 '2220_5': 33.33,
 '2220_4': 33.33,
 '2211_9-11': 33.33,
 '2213_11-12': 33.33,
 '1529_1-3': 33.33,
 '2213_4': 32.89,
 '9917_1': 32.89,
 '9917_2': 32.89,
 '9919_12-13': 32.89,
 '9919_8-9': 32.89,
 '9892_5-6': 32.89,
 '9889_9': 29.07,
 '9889_6': 30.21,
 '9892_3': 32.89,
 '9897_1': 32.89,
 '9895_1': 32.89,
 '9929_10-11': 29.07,
 '9928_6': 32.89,
 '9930_8-10': 29.07,
 '9929_3': 32.89,
 '9929_12': 29.07,
 '9928_2-3': 32.89,
 '1518_14': 30.74,
 '9910_4': 30.21,
 '9913_3-4': 32.89,
 '9913_5-6': 32.89,
 '9913_1-2': 32.89,
 '9911_2': 32.89}

In [12]:
LACfactordict["9911_2"]

32.89

# File Management 

In [13]:
#File paths on personal computer 

#No Stimulation
directoryNS = "D:/Downloads/Drug Stimulated INS1E Cells/NS"

#No stimulation files listed here (6 final cells used in total)
NS_7_22_files = glob.glob(directoryNS + '/**/' + "/White_PBC_INS1e_0min_7_22_pre_rec*.mrc")
NS_6_5_files = glob.glob(directoryNS + '/**/' + "/White_PBC_INS1e_0min_6_5_pre_rec*.mrc")
NS_1537_16_18_files = glob.glob(directoryNS + '/**/' + "/White_PBC_INS1e_0min_1537_16-18_pre_rec*.mrc")
NS_7_8_9_files = glob.glob(directoryNS + '/**/' + "/*White_PBC_INS1e_0min_7_8-9_pre_rec*.mrc")
NS_6_17_19_files = glob.glob(directoryNS + '/**/' + "/White_PBC_INS1e_0min_6_17-19_pre_rec*.mrc")
NS_1537_19_files = glob.glob(directoryNS + '/**/' + "/White_PBC_INS1e_0min_1537_19_pre_rec*.mrc")

#NS_6_10_files = glob.glob(directoryNS + '/**/' + "/White_PBC_INS1e_0min_6_10_pre_rec*.mrc")
#6_10 remove from analysis, only 190 vesicles compared to average of 700 vesicles 

In [14]:
#Glucose 
directoryGlu = "D:/Downloads/Drug Stimulated INS1E Cells/Glucose(25mM)"

#Glucose stim files here (6 final cells used in total)
Glu_9905_7_files = glob.glob(directoryGlu + '/**/' + "/White_PBC_Ins1e_25glucose_30_9905_7_pre_rec*.mrc")
Glu_9905_8_files = glob.glob(directoryGlu + '/**/' + "/White_PBC_Ins1e_25glucose_30_9905_8_pre_rec*.mrc")
Glu_9905_11_12_files = glob.glob(directoryGlu + '/**/' + "/White_PBC_Ins1e_25glucose_30_9905_11-12_pre_rec*.mrc")
Glu_9905_8_9_files = glob.glob(directoryGlu + '/**/' + "/White_PBC_Ins1e_25glucose_30_9905_8-9_pre_rec*.mrc")
Glu_9908_4_5_files = glob.glob(directoryGlu + '/**/' + "/White_PBC_Ins1e_25glucose_30_9908_4-5_pre_rec*.mrc")
Glu_9905_12_13_files = glob.glob(directoryGlu + '/**/' + "/White_PBC_Ins1e_25glucose_30_9905_12-13_pre_rec*.mrc")

#Glu_9905_3_files = glob.glob(directoryGlu + '/**/' + "/White_PBC_Ins1e_25glucose_30_9905_3_pre_rec*.mrc")
#9905_3 High cell Lac (total of 0.31) compaed to avg of 0.22 + 0.01

In [15]:
#GIP + Glucose
directoryGIP = "D:/Downloads/Drug Stimulated INS1E Cells/GIP_Glucose"

#Glucose + GIP stim files here (7 final cells used in total)
GIP_2220_4_files = glob.glob(directoryGIP + '/**/' + "/White_PBC_INS1e_GIP_30_2220_4_pre_rec*.mrc")
GIP_2220_5_files = glob.glob(directoryGIP + '/**/' + "/White_PBC_INS1e_GIP_30_2220_5_pre_rec*.mrc")
GIP_2220_12_13_files = glob.glob(directoryGIP + '/**/' + "/White_PBC_INS1e_GIP_30_2220_12-13_pre_rec*.mrc")
GIP_2220_9_files = glob.glob(directoryGIP + '/**/' + "/White_PBC_INS1e_GIP_30_2220_9_pre_rec*.mrc")
GIP_2216_6_7_files = glob.glob(directoryGIP + '/**/' + "/White_PBC_Ins1e_GIP_30_2216_6-7_pre_rec*.mrc")
GIP_2216_3_5_files = glob.glob(directoryGIP + '/**/' + "/White_PBC_Ins1e_GIP_30_2216_3-5_pre_rec*.mrc")
GIP_1532_15_17_files = glob.glob(directoryGIP + '/**/' + "/White_PBC_INS1e_glu-GIP_30_1532_15-17_pre_rec*.mrc")

#GIP_1535_5_7_files = glob.glob(directoryGIP + '/**/' + "/White_PBC_INS1e_glu-GIP_30_1535_5-7_pre_rec*.mrc")
#1535_5-7 low mito vol 3.2 um3 compared to average of 21 +- 5 um3


In [16]:
#TAK + Glucose
directoryTAK = "D:/Downloads/Drug Stimulated INS1E Cells/TAK-875_Glucose"

#TAK + Glucose stim files here (7 final cells used in total)
TAK_9920_8_9_files = glob.glob(directoryTAK + '/**/' + "/White_PBC_INS1e_TAK_30_9920_8-9_pre_rec*.mrc")
TAK_9919_12_13_files = glob.glob(directoryTAK + '/**/' + "/White_PBC_INS1e_TAK_30_9919_12-13_pre_rec*.mrc")
TAK_9917_2_files = glob.glob(directoryTAK + '/**/' + "/White_PBC_INS1e_TAK_30_9917_2_pre_rec*.mrc")
TAK_9917_1_files = glob.glob(directoryTAK + '/**/' + "/White_PBC_INS1e_TAK_30_9917_1_pre_rec*.mrc")
TAK_2213_4_files = glob.glob(directoryTAK + '/**/' + "/White_PBC_Ins1e_TAK_30_2213_4_pre_rec*.mrc")
TAK_2213_11_12_files = glob.glob(directoryTAK + '/**/' + "/White_PBC_Ins1e_TAK_30_2213_11-12_pre_rec*.mrc")
TAK_2211_9_11_files = glob.glob(directoryTAK + '/**/' + "/White_PBC_Ins1e_TAK_30_2211_9-11_pre_rec*.mrc")

#TAK_1529_1_3_files = glob.glob(directoryTAK + '/**/' + "/White_PBC_INS1e_glu-TAK_30_1529_1-3_pre_rec*.mrc")
#Reconstruction errors. Also, unusually less about of vesicles 

In [17]:
#GKA + Glucose
directoryGKA = "D:/Downloads/Drug Stimulated INS1E Cells/GKA_Glucose"

#GKA + Glucose stim files here (6 final cells used in total)
GKA_9892_5_6_files = glob.glob(directoryGKA + '/**/' + "/White_PBC_Ins1e_GKA_30_9892_5-6_pre_rec*.mrc")
GKA_9889_9_files = glob.glob(directoryGKA + '/**/' + "/White_PBC_Ins1e_GKA_30_9889_9_pre_rec*.mrc")
GKA_9889_6_files = glob.glob(directoryGKA + '/**/' + "/White_PBC_Ins1e_GKA_30_9889_6_pre_rec*.mrc")
GKA_9892_3_files = glob.glob(directoryGKA + '/**/' + "/White_PBC_Ins1e_GKA_30_9892_3_pre_rec*.mrc")
GKA_9897_1_files = glob.glob(directoryGKA + '/**/' + "/White_PBC_Ins1e_GKA_30_9897_1_pre_rec*.mrc")
GKA_9895_1_files = glob.glob(directoryGKA + '/**/' + "/White_PBC_Ins1e_GKA_30_9895_1_pre_rec*.mrc")

In [18]:
#GLM + Glucose
directoryGLM = "D:/Downloads/Drug Stimulated INS1E Cells/GLM_Glucose"

#GLM + Glucose stim files here (7 final cells used in total)
GLM_9929_10_11_files = glob.glob(directoryGLM + '/**/' + "/White_PBC_Ins1e_GLM_30_9929_10-11_pre_rec*.mrc")
GLM_9928_6_files = glob.glob(directoryGLM + '/**/' + "/White_PBC_Ins1e_GLM_30_9928_6_pre_rec*.mrc")
GLM_9930_8_10_files = glob.glob(directoryGLM + '/**/' + "/White_PBC_Ins1e_GLM_30_9930_8-10_pre_rec*.mrc")
GLM_9929_3_files = glob.glob(directoryGLM + '/**/' + "/White_PBC_Ins1e_GLM_30_9929_3_pre_rec*.mrc")
GLM_9929_12_files = glob.glob(directoryGLM + '/**/' + "/White_PBC_Ins1e_GLM_30_9929_12_pre_rec*.mrc")
GLM_9928_2_3_files = glob.glob(directoryGLM + '/**/' + "/White_PBC_Ins1e_GLM_30_9928_2-3_pre_rec*.mrc")
GLM_1518_14_files = glob.glob(directoryGLM + '/**/' + "/White_PBC_INS1e_glu-GLM_30_1518_14_pre_rec*.mrc")



In [19]:
#Ext + Glucose 
directoryExt = "D:/Downloads/Drug Stimulated INS1E Cells/Exendin_Glucose"

#Ext + Glucose stim files here (5 final cells used in total)
Ext_9913_1_2_files = glob.glob(directoryExt + '/**/*9913_1-2*')
Ext_9913_5_6_files = glob.glob(directoryExt + '/**/*9913_5-6*')
Ext_9913_3_4_files = glob.glob(directoryExt + '/**/*9913_3-4*')
Ext_9911_2_files = glob.glob(directoryExt + '/**/*9911_2*')
Ext_9910_4_files = glob.glob(directoryExt + '/**/*9910_4*')

#All label masks are tiffs, while half of raw intensity files are also tiffs

In [20]:
#44 cells total (39 non ext, with 5 ext cells)

## Primary Cell Functions ##

In [21]:
#Primary NS cells
directoryPrimNS = "D:/Downloads/Primary Beta Cells Files/NoStim"

#individual files
PrimNS_10501_5_files = glob.glob(directoryPrimNS + '/**/*10501_5*')
#10501_5 has very few number + small mitochondria
PrimNS_10501_7_files = glob.glob(directoryPrimNS + '/**/*10501_7*')
#10501_7 is a little low on # of mitochondria
PrimNS_10510_32_33_files = glob.glob(directoryPrimNS + '/**/*10510_32-33*')
PrimNS_11386_12_13_files = glob.glob(directoryPrimNS + '/**/*11386_12-13*')
PrimNS_10_10_11_12_R_files = glob.glob(directoryPrimNS + '/**/*10_10-11-12_R*')
PrimNS_10_10_11_12_L_files = glob.glob(directoryPrimNS + '/**/*10_10-11-12_L*')
PrimNS_10498_9_10_files = glob.glob(directoryPrimNS + '/**/*10498_9-10*')

In [22]:
# Considering each cell as a class
#Class should have nucleus, mitochondria, vesicles, membrane masks, and raw LAC file 
class Primary_β:
    #defining features of the class
    def __init__(self, filelist, LACfactor):
        #Pick out elements from list. Replace brackets with comma-seperated string
        self.membrane = ", ".join([element for element in filelist if "membrane" in element])
        self.mitochondria = ", ".join([element for element in filelist if "mito" in element])
        self.nucleus = ", ".join([element for element in filelist if "nucleus" in element])
        self.vesicles = ", ".join([element for element in filelist if "vesic" in element]) 
        self.LACFile = ", ".join([element for element in filelist if "Raw MRC File" in element])
        self.LACfactor = LACfactor
    def Ves_Analysis_unnorm(self):
        df_int = Ves_Analysis(self.vesicles, self.membrane, self.mitochondria, self.LACFile, self.LACfactor)
        res3 = df_forCluster_unnorm(df_int)
        return res3

In [23]:
Primβ_dict = {
    "10498_9-10": 30.21,
    "10501_5": 25.25,
    "10501_7": 24.04,
    "10_10-11-12_L": 23.04,
    "10_10-11-12_R": 23.04,
    "11386_12-13": 21.98,
    "10510_32-33": 24.04,
    "11769_6_7": 21.98,
    "11769_7_8": 21.98
}
Primβ_dict

{'10498_9-10': 30.21,
 '10501_5': 25.25,
 '10501_7': 24.04,
 '10_10-11-12_L': 23.04,
 '10_10-11-12_R': 23.04,
 '11386_12-13': 21.98,
 '10510_32-33': 24.04,
 '11769_6_7': 21.98,
 '11769_7_8': 21.98}

In [24]:
#Primary
directoryPrimEx4 = "D:/Downloads/Primary Beta Cells Files/Ex4"

#individual files
PrimEx4_11769_7_8_files = glob.glob(directoryPrimEx4 + '/**/*11769_7_8*')
PrimEx4_11769_6_7_files = glob.glob(directoryPrimEx4 + '/**/*11769_6_7*')