In [5]:
import anndata, pickle, os, fnmatch, math 
import scanpy as sc
import numpy as np
import pandas as pd 
import cProfile
import seaborn as sns
import matplotlib.pyplot as plt

In [6]:
'''IMPORTING FUNCTIONS FROM GENERAL FUNCTION FILE'''

%run GeneralFunctions.ipynb

In [7]:
# Adding QC Metrics and DE Score to each Individual AnnData File
    # sampleComparLst = list with lists of comparisons to make 
        # example format: [['condition of 1 and 2', 'sample1', 'sample2']['condition of 3 and 4', 'sample3', 'sample4']...]
    # folderName = string of the folder name where the AnnData object you want to use are located in 
    # saveFolderName = string of the folder name where you want to store your new AnnData objects 
def addingQCandDEscore(sampleComparLst, folderName, saveFolderName):
    for sampleLst in sampleComparLst:
        if len(sampleLst) == 2:
            sampleName = sampleLst[1]
            sample1_name = f'{sampleName} Half 1'
            sample2_name = f'{sampleName} Half 2'
            
            fileName = sampleName
            divideby = 'Half'
            
        else:
            condition = sampleLst[0]
            sample1_name = sampleLst[1]
            sample2_name = sampleLst[2]
            
            fileName = f'{condition}_{sample1_name}_{sample2_name}'
            divideby = 'sample'
        
        obj = sc.read(f'F:/SampleData/{folderName}/{fileName}.h5ad')
        clustLst = list(np.unique(obj.obs['leiden_0.3']))
        sortedClustLst = sortClust(clustLst)
        
        for clust in sortedClustLst: 
            clustObj = obj[obj.obs['leiden_0.3'].isin([clust]),:]
            
            sample1_nucAmount = list(clustObj.obs[divideby]).count(sample1_name)
            sample2_nucAmount = list(clustObj.obs[divideby]).count(sample2_name)
            print(clust, sample1_nucAmount, sample2_nucAmount)
            
            if (sample1_nucAmount > 1) and (sample2_nucAmount > 1):
                # Adds missing field to 'data.uns' that is necessary for running the ranked_sum_genes function
                clustObj.uns['log1p'] = {'base': None}

                print(clust, clustObj.n_obs, clustObj.n_vars)

                sc.pp.calculate_qc_metrics(clustObj, percent_top=None, log1p=False, inplace=True)
                sc.tl.rank_genes_groups(clustObj, divideby, method='wilcoxon')

                score_df = pd.DataFrame(clustObj.uns['rank_genes_groups']['scores'])
                geneName_df = pd.DataFrame(clustObj.uns['rank_genes_groups']['names'])

                geneName_df = geneName_df.rename(columns={sample1_name: f'{sample1_name} GeneName'})
                geneName_df = geneName_df.rename(columns={sample2_name: f'{sample2_name} GeneName'})
                score_df = score_df.rename(columns={sample1_name: f'{sample1_name} Score'})
                score_df = score_df.rename(columns={sample2_name: f'{sample2_name} Score'})

                half1_df = pd.concat([geneName_df[f'{sample1_name} GeneName'], score_df[f'{sample1_name} Score']], axis=1)
                half2_df = pd.concat([geneName_df[f'{sample2_name} GeneName'], score_df[f'{sample2_name} Score']], axis=1)

                half1_df.index = half1_df[f'{sample1_name} GeneName']
                half2_df.index = half2_df[f'{sample2_name} GeneName']

                refGeneLst = list(obj.var_names)
                reordered_half1_df = half1_df.reindex(refGeneLst)
                reordered_half2_df = half2_df.reindex(refGeneLst)

                clustObj.var[f'{sample1_name}_DE_Score'] = reordered_half1_df[f'{sample1_name} Score']
                clustObj.var[f'{sample2_name}_DE_Score'] = reordered_half2_df[f'{sample2_name} Score']

                clustObj.write(f'F:/SampleData/{saveFolderName}/{fileName}_{clust}.h5ad')

# Calculates how many genes per cluster in each sample comparison are DE according to DE Score and gene exp fold change
def thresholdDEGenes(sampleComparLst, folderName, savePath):
    diffExpGenesDict = {}
    
    for sampleLst in sampleComparLst:
        if len(sampleLst) == 2:
            sampleName = sampleLst[1]
            sample1_name = f'{sampleName} Half 1'
            sample2_name = f'{sampleName} Half 2'
            
            sampleFileName = sampleName
            dictName = sampleName
            divideby = 'Half'
            
        else:
            condition = sampleLst[0]
            sample1_name = sampleLst[1]
            sample2_name = sampleLst[2]
            
            sampleFileName = f'{condition}_{sample1_name}_{sample2_name}'
            dictName = f'{sample1_name}_{sample2_name}'
            divideby = 'sample'
        
        obj = sc.read(f'F:/SampleData/{folderName}/{sampleFileName}.h5ad')
        clustLst = list(np.unique(obj.obs['leiden_0.3']))
        sortedClustLst = sortClust(clustLst)
        
        for clust in sortedClustLst:
            
            folderPath = f'F:/SampleData/{folderName}_Clusters/'
            fileName = f'{sampleFileName}_{clust}.h5ad'
            if check_file_exists(folderPath, fileName):
            
                clustObj = sc.read(f'F:/SampleData/{folderName}_Clusters/{sampleFileName}_{clust}.h5ad')

                lessThan = (clustObj.var[f'{sample1_name}_DE_Score'] < -10) # DE SCORE 20 original
                greaterThan = (clustObj.var[fr'{sample1_name}_DE_Score'] > 10) # DE SCORE -20 original

                if (True in list(np.unique(lessThan))) or (True in list(np.unique(greaterThan))):
                    nucCount = 0

                    sample1_obj = clustObj[clustObj.obs[divideby].isin([sample1_name]),:]
                    sample2_obj = clustObj[clustObj.obs[divideby].isin([sample2_name]),:]

                    sample1_geneExpression = sample1_obj.X.toarray()
                    sample1_meanGeneExpression = list(np.mean(sample1_geneExpression, axis=0))

                    sample2_geneExpression = sample2_obj.X.toarray()
                    sample2_meanGeneExpression = list(np.mean(sample2_geneExpression, axis=0))

                    lessThanIdx = [index for index, value in enumerate(lessThan) if value == True]
                    greaterThanIdx = [index for index, value in enumerate(greaterThan) if value == True]
                    idxLst = list(np.unique(lessThanIdx + greaterThanIdx))

                    for idx in idxLst:
                        sample1_exp = sample1_meanGeneExpression[idx]
                        sample2_exp = sample2_meanGeneExpression[idx]

                        if sample1_exp == 0 or sample2_exp == 0: 
                            nucCount += 1

                        elif sample1_exp != 0 and sample2_exp != 0:
                            div = sample1_exp / sample2_exp
                            if div > 2 or div < 0.5: 
                                nucCount += 1

                    if dictName not in (diffExpGenesDict.keys()):
                        diffExpGenesDict[dictName] = [nucCount]
                    else:
                        diffExpGenesDict[dictName].append(nucCount)
                else:
                    if dictName not in (diffExpGenesDict.keys()):
                        diffExpGenesDict[dictName] = [0]
                    else:
                        diffExpGenesDict[dictName].append(0)
            else:
                if dictName not in (diffExpGenesDict.keys()):
                    diffExpGenesDict[dictName] = [-5]
                else:
                    diffExpGenesDict[dictName].append(-5)
    print(diffExpGenesDict)
    with open(savePath, 'wb') as file:
        pickle.dump(diffExpGenesDict, file)

def calculatingFoldChange(sampleComparLst, folderName, savePath):
    foldChangeDict = {}
    
    for sampleLst in sampleComparLst:
        if len(sampleLst) == 2:
            sampleName = sampleLst[1]
            sample1_name = f'{sampleName} Half 1'
            sample2_name = f'{sampleName} Half 2'
            
            dictName = sampleName
            divideby = 'Half'
            
        else:
            condition = sampleLst[0]
            sample1_name = sampleLst[1]
            sample2_name = sampleLst[2]
            
            dictName = f'{condition}_{sample1_name}_{sample2_name}'
            divideby = 'sample'
        
        obj = sc.read(f'F:/SampleData/{folderName}/{dictName}.h5ad')
        clustLst = list(np.unique(obj.obs['leiden_0.3']))
        sortedClustLst = sortClust(clustLst)

        for clust in sortedClustLst: 
            clustObj = obj[obj.obs['leiden_0.3'].isin([clust]),:]

            sample1_obj = clustObj[clustObj.obs[divideby].isin([sample1_name]),:]
            sample2_obj = clustObj[clustObj.obs[divideby].isin([sample2_name]),:]
        
        sample1_geneExpression = sample1_obj.X.toarray()
        sample1_meanGeneExpression = list(np.mean(sample1_geneExpression, axis=0))

        sample2_geneExpression = sample2_obj.X.toarray()
        sample2_meanGeneExpression = list(np.mean(sample2_geneExpression, axis=0))
        
        for i in range(len(sample2_meanGeneExpression)):
            sample1_exp = sample1_meanGeneExpression[i]
            sample2_exp = sample2_meanGeneExpression[i]
            
            if sample1_exp == 0 and sample2_exp == 0:
                div = 1
            elif sample1_exp == 0 and sample2_exp != 0:
                div = 0.1 / sample2_exp
            elif sample1_exp != 0 and sample2_exp == 0:
                div = sample1_exp / 0.1
            else:
                div = sample1_exp / sample2_exp
        
            if dictName not in (foldChangeDict.keys()):
                foldChangeDict[dictName] = [div]
            else:
                foldChangeDict[dictName].append(div)
    
    with open(savePath, 'wb') as file:
        pickle.dump(foldChangeDict, file)
        
def addingSexInfo(sexLst, sampleComparLst, folderName):
    for sampleLst in sampleComparLst:
        if len(sampleLst) == 2:
            sampleName = sampleLst[1]
            sampleSex = ''
            
            for lst in sexLst:
                mouseNum = lst[0]
                mouseSex = lst[1]
                if mouseNum in sampleName: sampleSex = mouseSex
            
            obj = sc.read(f'F:/SampleData/{folderName}/{sampleName}.h5ad')
            obj.obs['sex'] = sampleSex
            obj.write(f'F:/SampleData/{folderName}/{sampleName}.h5ad')
    
        else:
            condition = sampleLst[0]
            sample1_name = sampleLst[1]
            sample2_name = sampleLst[2]
            sample1_sex = ''
            sample2_sex = ''
            
            for lst in sexLst:
                mouseNum = lst[0]
                mouseSex = lst[1]
                if mouseNum in sample1_name: sample1_sex = mouseSex
                elif mouseNum in sample2_name: sample2_sex = mouseSex
                    
            fileName = f'{condition}_{sample1_name}_{sample2_name}'
            obj = sc.read(f'F:/SampleData/{folderName}/{fileName}.h5ad')
            
            obj.obs['sex'] = [sample1_sex if sampleName == sample1_name else sample2_sex for sampleName in obj.obs['sample']]
            obj.write(f'F:/SampleData/{folderName}/{fileName}.h5ad')

In [8]:
'''SAMPLE COMPARISON LISTS'''

mouseSexLst = [['19', 'male'], ['20', 'male'], ['22', 'male'], ['23', 'female'], ['24', 'male'], ['25', 'female']]

sampleNameLst = [['MD Contra. Hem. (L)', '1_VC20L'], ['MD Contra. Hem. (L)', '2_AC20L'], ['MD Ipsi. Hem. (R)', '3_VC20R'], 
                 ['MD Ipsi. Hem. (R)', '4_AC20R'], ['Deafened', '5_VC22L'], ['Deafened', '6_AC22L'], ['Control', '7_VC24L'],
                 ['Control', '8_AC24L'], ['Control', '9_VC23L'], ['Deafened', '10_VC25L'], ['Deafened', '13_AC25L'], 
                 ['Control', '14_AC23L'], ['MD Contra. Hem. (L)', '16_VC19L'], ['MD Ipsi. Hem. (R)', '17_VC19R'], 
                 ['MD Contra. Hem. (L)', '19_AC19L'], ['MD Ipsi. Hem. (R)', '20_AC19R']]

bioRepComparisonLst = [['Control', '7_VC24L', '9_VC23L'], ['Control', '8_AC24L', '14_AC23L'], 
                       ['Deafened', '5_VC22L', '10_VC25L'], ['Deafened', '6_AC22L', '13_AC25L'], 
                       ['MD Contra. Hem. (L)', '16_VC19L', '1_VC20L'], ['MD Contra. Hem. (L)', '19_AC19L', '2_AC20L'], 
                       ['MD Ipsi. Hem. (R)', '17_VC19R', '3_VC20R'], ['MD Ipsi. Hem. (R)', '20_AC19R', '4_AC20R']]

plastComparLst = [['Deaf VC vs. Control VC', '5_VC22L', '7_VC24L'], ['Deaf VC vs. Control VC', '5_VC22L', '9_VC23L'], 
                  ['Deaf VC vs. Control VC', '10_VC25L', '7_VC24L'], ['Deaf VC vs. Control VC', '10_VC25L', '9_VC23L'],
                  ['Deaf AC vs. Control AC', '6_AC22L', '8_AC24L'], ['Deaf AC vs. Control AC', '6_AC22L', '14_AC23L'],
                  ['Deaf AC vs. Control AC', '13_AC25L', '8_AC24L'], ['Deaf AC vs. Control AC', '13_AC25L', '14_AC23L'],
                  ['MD VC vs. Control VC', '16_VC19L', '7_VC24L'], ['MD VC vs. Control VC', '16_VC19L', '9_VC23L'],
                  ['MD VC vs. Control VC', '1_VC20L', '7_VC24L'], ['MD VC vs. Control VC', '1_VC20L', '9_VC23L'],
                  ['MD AC vs. Control AC', '19_AC19L', '8_AC24L'], ['MD AC vs. Control AC', '19_AC19L', '14_AC23L'],
                  ['MD AC vs. Control AC', '2_AC20L', '8_AC24L'], ['MD AC vs. Control AC', '2_AC20L', '14_AC23L']]

hemComparLst = [['VC Contra (L) vs. Ipsi Hem (R)', '16_VC19L', '17_VC19R'], 
                ['AC Contra (L) vs. Ipsi Hem (R)', '19_AC19L', '20_AC19R'], 
                ['VC Contra (L) vs. Ipsi Hem (R)', '1_VC20L', '3_VC20R'], 
                ['AC Contra (L) vs. Ipsi Hem (R)', '2_AC20L','4_AC20R']]

# Adding Sample Sex, QC, and DE Info 

In [15]:
folderName = 'IndividualSamples'
saveFolderName = 'IndividualSamples_Clusters'

addingQCandDEscore(sampleNameLst, folderName, saveFolderName)
# addingSexInfo(mouseSexLst, sampleNameLst, folderName)

  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value


  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value


  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value


  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value


  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value


  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value


  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value


  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value


In [16]:
folderName = 'BiologicalReplicaSamples'
saveFolderName = 'BiologicalReplicaSamples_Clusters'

addingQCandDEscore(bioRepComparisonLst, folderName, saveFolderName)
# addingSexInfo(mouseSexLst, bioRepComparisonLst, folderName)

  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value


  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value


  self.data[key] = value


Only considering the two last: ['. (L)_16_VC19L_1_VC20L', '.h5ad'].
Only considering the two last: ['. (L)_16_VC19L_1_VC20L', '.h5ad'].


  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value


Only considering the two last: ['. (L)_19_AC19L_2_AC20L', '.h5ad'].
Only considering the two last: ['. (L)_19_AC19L_2_AC20L', '.h5ad'].


  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value


Only considering the two last: ['. (R)_17_VC19R_3_VC20R', '.h5ad'].
Only considering the two last: ['. (R)_17_VC19R_3_VC20R', '.h5ad'].


  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value


Only considering the two last: ['. (R)_20_AC19R_4_AC20R', '.h5ad'].
Only considering the two last: ['. (R)_20_AC19R_4_AC20R', '.h5ad'].


  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value
  self.data[key] = value


ValueError: Could not calculate statistics for groups 20_AC19R since they only contain one sample.

In [16]:
folderName = 'PlasticityComparisons'
saveFolderName = 'PlasticityComparisons_Clusters'

addingQCandDEscore(plastComparLst, folderName, saveFolderName)
# addingSexInfo(mouseSexLst, plastComparLst, folderName)

KeyboardInterrupt: 

In [29]:
folderName = 'HemisphericComparisons'
saveFolderName = 'HemisphericComparisons_Clusters'

addingQCandDEscore(hemComparLst, folderName, saveFolderName)
# addingSexInfo(mouseSexLst, hemComparLst, folderName)

0 1037 1187
0 2224 32165


  self.data[key] = value


1 868 856
1 1724 32165


  self.data[key] = value


2 824 841
2 1665 32165


  self.data[key] = value


3 1195 381
3 1576 32165


  self.data[key] = value


4 723 828
4 1551 32165


  self.data[key] = value


5 777 624
5 1401 32165


  self.data[key] = value


6 506 651
6 1157 32165


  self.data[key] = value


7 426 480
7 906 32165


  self.data[key] = value


8 405 381
8 786 32165


  self.data[key] = value


9 315 301
9 616 32165


  self.data[key] = value


10 254 274
10 528 32165


  self.data[key] = value


11 234 231
11 465 32165


  self.data[key] = value


12 202 236
12 438 32165


  self.data[key] = value


13 197 210
13 407 32165


  self.data[key] = value


14 153 168
14 321 32165


  self.data[key] = value


15 131 153
15 284 32165


  self.data[key] = value


16 95 78
16 173 32165


  self.data[key] = value


17 84 58
17 142 32165


  self.data[key] = value


0 1314 1296
0 2610 32165


  self.data[key] = value


1 1134 1145
1 2279 32165


  self.data[key] = value


2 1223 946
2 2169 32165


  self.data[key] = value


3 568 736
3 1304 32165


  self.data[key] = value


4 570 680
4 1250 32165


  self.data[key] = value


5 585 558
5 1143 32165


  self.data[key] = value


6 1054 6
6 1060 32165


  self.data[key] = value


7 83 861
7 944 32165


  self.data[key] = value


8 549 372
8 921 32165


  self.data[key] = value


9 397 469
9 866 32165


  self.data[key] = value


10 168 628
10 796 32165


  self.data[key] = value


11 376 400
11 776 32165


  self.data[key] = value


12 102 347
12 449 32165


  self.data[key] = value


13 207 187
13 394 32165


  self.data[key] = value


14 158 113
14 271 32165


  self.data[key] = value


15 127 125
15 252 32165


  self.data[key] = value


16 95 151
16 246 32165


  self.data[key] = value


17 189 1
18 91 75
18 166 32165


  self.data[key] = value


0 1248 1366
0 2614 32165


  self.data[key] = value


1 1180 1236
1 2416 32165


  self.data[key] = value


2 775 797
2 1572 32165


  self.data[key] = value


3 720 825
3 1545 32165


  self.data[key] = value


4 337 631
4 968 32165


  self.data[key] = value


5 420 521
5 941 32165


  self.data[key] = value


6 432 491
6 923 32165


  self.data[key] = value


7 449 432
7 881 32165


  self.data[key] = value


8 376 417
8 793 32165


  self.data[key] = value


9 290 226
9 516 32165


  self.data[key] = value


10 232 254
10 486 32165


  self.data[key] = value


11 194 274
11 468 32165


  self.data[key] = value


12 219 202
12 421 32165


  self.data[key] = value


13 172 207
13 379 32165


  self.data[key] = value


14 275 79
14 354 32165


  self.data[key] = value


15 122 165
15 287 32165


  self.data[key] = value


16 66 73
16 139 32165


  self.data[key] = value


0 1359 1103
0 2462 32165


  self.data[key] = value


1 1077 1208
1 2285 32165


  self.data[key] = value


2 1067 885
2 1952 32165


  self.data[key] = value


3 766 894
3 1660 32165


  self.data[key] = value


4 826 549
4 1375 32165


  self.data[key] = value


5 697 520
5 1217 32165


  self.data[key] = value


6 567 601
6 1168 32165


  self.data[key] = value


7 14 1078
7 1092 32165


  self.data[key] = value


8 414 453
8 867 32165


  self.data[key] = value


9 395 333
9 728 32165


  self.data[key] = value


10 135 424
10 559 32165


  self.data[key] = value


11 262 182
11 444 32165


  self.data[key] = value


12 244 112
12 356 32165


  self.data[key] = value


13 204 130
13 334 32165


  self.data[key] = value


14 200 119
14 319 32165


  self.data[key] = value


15 34 125
15 159 32165


  self.data[key] = value


16 75 75
16 150 32165


  self.data[key] = value


17 22 30
17 52 32165


  self.data[key] = value


## Counting the DE Genes, Calculating Fold Change, and Saving Counts in Dictionary File

In [13]:
'''COUNTING DE GENES FOR INDIVIDUAL SAMPLE HALVES'''

folderName = 'IndividualSamples'
savePath = 'F:/FigureCalculations/Figure4/diffExp_WithinSample'

thresholdDEGenes(sampleNameLst, folderName, savePath)

{'1_VC20L': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '2_AC20L': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '3_VC20R': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '4_AC20R': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '5_VC22L': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '6_AC22L': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '7_VC24L': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '8_AC24L': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '9_VC23L': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '10_VC25L': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '13_AC25L': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '14_AC23L': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '16_VC19L': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '17_VC19R': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '19_AC19L': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '20_AC19R': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [None]:
'''CALCULATING FOLD CHANGE FOR INDIVIDUAL SAMPLE HALVES'''

folderName = 'IndividualSamples'
savePath = 'F:/FigureCalculations/Figure4/foldChange_WithinSample'

calculatingFoldChange(sampleNameLst, folderName, savePath)

In [9]:
'''COUNTING DE GENES FOR BIOLOGICAL REPLICAS'''

folderName = 'BiologicalReplicaSamples'
savePath = 'F:/FigureCalculations/Figure4/diffExp_BiologicalReplicaSamples'

thresholdDEGenes(bioRepComparisonLst, folderName, savePath)

Only considering the two last: ['. (L)_16_VC19L_1_VC20L', '.h5ad'].
Only considering the two last: ['. (L)_16_VC19L_1_VC20L', '.h5ad'].
Only considering the two last: ['. (L)_16_VC19L_1_VC20L_0', '.h5ad'].
Only considering the two last: ['. (L)_16_VC19L_1_VC20L_0', '.h5ad'].
Only considering the two last: ['. (L)_16_VC19L_1_VC20L_1', '.h5ad'].
Only considering the two last: ['. (L)_16_VC19L_1_VC20L_1', '.h5ad'].
Only considering the two last: ['. (L)_16_VC19L_1_VC20L_2', '.h5ad'].
Only considering the two last: ['. (L)_16_VC19L_1_VC20L_2', '.h5ad'].
Only considering the two last: ['. (L)_16_VC19L_1_VC20L_3', '.h5ad'].
Only considering the two last: ['. (L)_16_VC19L_1_VC20L_3', '.h5ad'].
Only considering the two last: ['. (L)_16_VC19L_1_VC20L_4', '.h5ad'].
Only considering the two last: ['. (L)_16_VC19L_1_VC20L_4', '.h5ad'].
Only considering the two last: ['. (L)_16_VC19L_1_VC20L_5', '.h5ad'].
Only considering the two last: ['. (L)_16_VC19L_1_VC20L_5', '.h5ad'].
Only considering the two

Only considering the two last: ['. (L)_19_AC19L_2_AC20L_3', '.h5ad'].
Only considering the two last: ['. (L)_19_AC19L_2_AC20L_3', '.h5ad'].
Only considering the two last: ['. (L)_19_AC19L_2_AC20L_4', '.h5ad'].
Only considering the two last: ['. (L)_19_AC19L_2_AC20L_4', '.h5ad'].
Only considering the two last: ['. (L)_19_AC19L_2_AC20L_5', '.h5ad'].
Only considering the two last: ['. (L)_19_AC19L_2_AC20L_5', '.h5ad'].
Only considering the two last: ['. (L)_19_AC19L_2_AC20L_6', '.h5ad'].
Only considering the two last: ['. (L)_19_AC19L_2_AC20L_6', '.h5ad'].
Only considering the two last: ['. (L)_19_AC19L_2_AC20L_7', '.h5ad'].
Only considering the two last: ['. (L)_19_AC19L_2_AC20L_7', '.h5ad'].
Only considering the two last: ['. (L)_19_AC19L_2_AC20L_8', '.h5ad'].
Only considering the two last: ['. (L)_19_AC19L_2_AC20L_8', '.h5ad'].
Only considering the two last: ['. (L)_19_AC19L_2_AC20L_9', '.h5ad'].
Only considering the two last: ['. (L)_19_AC19L_2_AC20L_9', '.h5ad'].
Only considering the

Only considering the two last: ['. (R)_17_VC19R_3_VC20R_8', '.h5ad'].
Only considering the two last: ['. (R)_17_VC19R_3_VC20R_8', '.h5ad'].
Only considering the two last: ['. (R)_17_VC19R_3_VC20R_9', '.h5ad'].
Only considering the two last: ['. (R)_17_VC19R_3_VC20R_9', '.h5ad'].
Only considering the two last: ['. (R)_17_VC19R_3_VC20R_10', '.h5ad'].
Only considering the two last: ['. (R)_17_VC19R_3_VC20R_10', '.h5ad'].
Only considering the two last: ['. (R)_17_VC19R_3_VC20R_11', '.h5ad'].
Only considering the two last: ['. (R)_17_VC19R_3_VC20R_11', '.h5ad'].
Only considering the two last: ['. (R)_17_VC19R_3_VC20R_12', '.h5ad'].
Only considering the two last: ['. (R)_17_VC19R_3_VC20R_12', '.h5ad'].
Only considering the two last: ['. (R)_17_VC19R_3_VC20R_13', '.h5ad'].
Only considering the two last: ['. (R)_17_VC19R_3_VC20R_13', '.h5ad'].
Only considering the two last: ['. (R)_17_VC19R_3_VC20R_14', '.h5ad'].
Only considering the two last: ['. (R)_17_VC19R_3_VC20R_14', '.h5ad'].
Only consi

In [None]:
'''CALCULATING FOLD CHANGE FOR BIOLOGICAL REPLICAS'''

folderName = 'BiologicalReplicaSamples'
savePath = 'F:/FigureCalculations/Figure4/foldChange_BiologicalReplicaSamples'

calculatingFoldChange(bioRepComparisonLst, folderName, savePath)

In [10]:
'''COUNTING DE GENES FOR PLASTICITY COMPARISONS'''

folderName = 'PlasticityComparisons'
savePath = 'F:/FigureCalculations/Figure4/diffExp_PlasticityComparisons'

thresholdDEGenes(plastComparLst, folderName, savePath)

{'5_VC22L_7_VC24L': [5, 8, 1, 5, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0], '5_VC22L_9_VC23L': [41, 13, 40, 14, 21, 5, 6, 9, 8, 3, 5, 6, 2, 2, -5, 1, 1, -5, 1, 1, 0], '10_VC25L_7_VC24L': [14, 16, 11, 53, 24, 13, 10, 5, 4, 6, 5, 4, 5, 1, 2, 2, 1, 1, 1, 0], '10_VC25L_9_VC23L': [2, 4, 2, 0, 4, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '6_AC22L_8_AC24L': [15, 14, 3, 8, 5, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0], '6_AC22L_14_AC23L': [58, 66, 40, 24, 15, 5, 17, 4, 4, 5, 1, 2, 1, 1, 0, 0], '13_AC25L_8_AC24L': [10, 11, 19, 8, 9, 7, 16, 4, 4, 4, 2, 2, 2, 2, 0, 1], '13_AC25L_14_AC23L': [3, 5, 11, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0], '16_VC19L_7_VC24L': [42, 29, 13, 24, 11, 22, 4, 1, 8, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0], '16_VC19L_9_VC23L': [17, 66, 10, 49, 16, 16, 18, 7, 9, 9, 5, 4, 8, 1, 2, 3, 1, 2, -5, 1, 0], '1_VC20L_7_VC24L': [41, 29, 15, 30, 18, 7, 8, 3, 10, 1, 7, 1, 1, 0, 0, 0, 0, 1, 0], '1_VC20L_9_VC23L': [106, 42, 75, 32, 12, 18, 13, 16, 21, 11, 7, 1, 5, 0, 5, 3, 2, 5, 0, 0, 0], '19_AC

In [None]:
'''CALCULATING FOLD CHANGE FOR PLASTICITY COMPARISONS'''

folderName = 'PlasticityComparisons'
savePath = 'F:/FigureCalculations/Figure4/foldChange_PlasticityComparisons'

calculatingFoldChange(plastComparLst, folderName, savePath)

In [11]:
'''COUNTING DE GENES FOR HEMISPHERIC COMPARISONS'''

folderName = 'HemisphericComparisons'
savePath = 'F:/FigureCalculations/Figure4/diffExp_HemiComparisons'

thresholdDEGenes(hemComparLst, folderName, savePath)

{'16_VC19L_17_VC19R': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '19_AC19L_20_AC19R': [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -5, 0], '1_VC20L_3_VC20R': [1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '2_AC20L_4_AC20R': [0, 2, 0, 0, 0, 0, 0, 0, 4, 0, 132, 0, 0, 0, 0, 0, 0, 0]}


In [55]:
'''CALCULATING FOLD CHANGE FOR HEMISPHERIC COMPARISONS'''

folderName = 'HemisphericComparisons'
savePath = 'F:/FigureCalculations/Figure4/foldChange_HemiComparisons'

calculatingFoldChange(hemComparLst, folderName, savePath)