In [1]:
import anndata, pickle, os, fnmatch, math, random
import scanpy as sc
import numpy as np
import pandas as pd

# Lists Used in All Files 

In [1]:
# The sex of each mouse
mouseSexLst = [['19', 'male'], ['20', 'male'], ['22', 'male'], ['23', 'female'], ['24', 'male'], ['25', 'female']]

In [2]:
# All sample names 
sampleNameLst = ['1_VC20L', '2_AC20L', '3_VC20R', '4_AC20R', '5_VC22L', '6_AC22L', '7_VC24L', '8_AC24L', '9_VC23L', 
               '10_VC25L', '13_AC25L', '14_AC23L', '16_VC19L', '17_VC19R', '19_AC19L', '20_AC19R']

In [3]:
# The Condition of each Sample
sampleNameConditionLst = [['MD Contra. Hem. (L)', '1_VC20L'], ['MD Contra. Hem. (L)', '2_AC20L'],
                          ['MD Ipsi. Hem. (R)', '3_VC20R'], ['MD Ipsi. Hem. (R)', '4_AC20R'], ['Deafened', '5_VC22L'], 
                          ['Deafened', '6_AC22L'], ['Control', '7_VC24L'], ['Control', '8_AC24L'], ['Control', '9_VC23L'], 
                          ['Deafened', '10_VC25L'], ['Deafened', '13_AC25L'], ['Control', '14_AC23L'], 
                          ['MD Contra. Hem. (L)', '16_VC19L'], ['MD Ipsi. Hem. (R)', '17_VC19R'], 
                          ['MD Contra. Hem. (L)', '19_AC19L'], ['MD Ipsi. Hem. (R)', '20_AC19R']]

In [4]:
# Biological Replicate Samples to Compare 
bioRepComparisonLst = [['Control', '7_VC24L', '9_VC23L'], ['Control', '8_AC24L', '14_AC23L'], 
                       ['Deafened', '5_VC22L', '10_VC25L'], ['Deafened', '6_AC22L', '13_AC25L'], 
                       ['MD Contra. Hem. (L)', '16_VC19L', '1_VC20L'], ['MD Contra. Hem. (L)', '19_AC19L', '2_AC20L'], 
                       ['MD Ipsi. Hem. (R)', '17_VC19R', '3_VC20R'], ['MD Ipsi. Hem. (R)', '20_AC19R', '4_AC20R']]

In [5]:
# Plasticity Samples to Compare 
plastComparLst = [['VC Deaf vs. Control', '5_VC22L', '7_VC24L'], ['VC Deaf vs. Control', '5_VC22L', '9_VC23L'], 
                  ['VC Deaf vs. Control', '10_VC25L', '7_VC24L'], ['VC Deaf vs. Control', '10_VC25L', '9_VC23L'],
                  ['AC Deafvs. Control', '6_AC22L', '8_AC24L'], ['AC Deaf vs. Control', '6_AC22L', '14_AC23L'],
                  ['AC Deaf vs. Control', '13_AC25L', '8_AC24L'], ['AC Deaf vs. Control', '13_AC25L', '14_AC23L'],
                  ['VC MD vs. Control', '16_VC19L', '7_VC24L'], ['VC MD vs. Control', '16_VC19L', '9_VC23L'],
                  ['VC MD vs. Control', '1_VC20L', '7_VC24L'], ['VC MD vs. Control', '1_VC20L', '9_VC23L'],
                  ['AC MD vs. Control', '19_AC19L', '8_AC24L'], ['AC MD vs. Control', '19_AC19L', '14_AC23L'],
                  ['AC MD vs. Control', '2_AC20L', '8_AC24L'], ['AC MD vs. Control', '2_AC20L', '14_AC23L']]

In [6]:
# Hemispheric Samples to Compare 
hemComparLst = [['VC Contra (L) vs. Ipsi Hem (R)', '16_VC19L', '17_VC19R'], 
                ['AC Contra (L) vs. Ipsi Hem (R)', '19_AC19L', '20_AC19R'], 
                ['VC Contra (L) vs. Ipsi Hem (R)', '1_VC20L', '3_VC20R'], 
                ['AC Contra (L) vs. Ipsi Hem (R)', '2_AC20L','4_AC20R']]

# General Functions

In [2]:
# Reads dictionaries that have been saved in .pkl files 
def readDict(filePath):
    with open(filePath, 'rb') as file:
        dictionary = pickle.load(file)
    return(dictionary)

In [None]:
# Sorts the list of cluster numbers in increasing order
def sortClust(clusterStrLst):
    # Turns list of cluster number strings into intigers 
    clusterNumLst = [int(string) for string in clusterStrLst]
    #sorts numbers 
    clusterNumLst.sort()
    # Turns numbers into strings again 
    sortedClusterStrLst = [str(num) for num in clusterNumLst]
    
    return(sortedClusterStrLst)

In [None]:
# This function finds the path to the files using a keyword (Chat GBT)
def search_files(folder_path, keyword):
    matches = []
    for root, dirnames, filenames in os.walk(folder_path):
        for filename in filenames:
            if fnmatch.fnmatch(filename, f'*{keyword}*'):
                matches.append(os.path.join(root, filename))
    return matches

In [None]:
def mouseSex(sampleName):
    for mouseInfo in mouseSexLst:
        mouseNum = mouseInfo[0]
        mouseSex = mouseInfo[1]
        
        if mouseNum in sampleName: return mouseSex

In [1]:
def comaringSex(sample1_name, sample2_name):
    sample1 = sc.read(f'F:/SampleData/IndividualSamples/{sample1_name}')
    sample2 = sc.read(f'F:/SampleData/IndividualSamples/{sample2_name}')
    
    sample1_sex = list(np.unique(sample1.obs['sex']))
    sample2_sex = list(np.unique(sample2.obs['sex']))
    
    if sample1_sex[0] == sample2_sex[0]: return(True)
    else: return(False)

In [None]:
# This function checks whether a file exists in a given path 
def check_file_exists(folder_path, file_name):
    file_path = os.path.join(folder_path, file_name)
    if os.path.exists(file_path):
        return True
    else:
        return False

In [None]:
# Adding the specified DE Score to the given AnnData File
    # sampleObj = AnnData object you want to add the DE score to
    # divideby = string of the category in data.obs you want to divide the sample by 
    # cat1 = string of one of the two categories in the divide by data.vars column
    # cat2 = string of the other of the two categories in the divide by data.vars column 
def addingDEscore(sampleObj, divideby, cat1, cat2):
    
    # Adds missing field to 'data.uns' that is necessary for running the ranked_sum_genes function
    sampleObj.uns['log1p'] = {'base': None}

    # Calculating DE Scores 
    sc.tl.rank_genes_groups(sampleObj, divideby, method='wilcoxon')

    # Getting DE Info 
    score_df = pd.DataFrame(sampleObj.uns['rank_genes_groups']['scores'])
    geneName_df = pd.DataFrame(sampleObj.uns['rank_genes_groups']['names'])

    # Renaming the columns of each Data Frame 
    geneName_df = geneName_df.rename(columns={cat1: f'{cat1} GeneName'})
    geneName_df = geneName_df.rename(columns={cat2: f'{cat2} GeneName'})
    score_df = score_df.rename(columns={cat1: f'{cat1} Score'})
    score_df = score_df.rename(columns={cat2: f'{cat2} Score'})

    # Concatenating the two Data Frames
    cat1_df = pd.concat([geneName_df[f'{cat1} GeneName'], score_df[f'{cat1} Score']], axis=1)
    cat2_df = pd.concat([geneName_df[f'{cat2} GeneName'], score_df[f'{cat2} Score']], axis=1)

    # Adding an Index Column to the Data Frames
    cat1_df.index = cat1_df[f'{cat1} GeneName']
    cat2_df.index = cat2_df[f'{cat2} GeneName']

    # Reordering the Data Frames According to the Gene Names in the AnnData Object 
    refGeneLst = list(sampleObj.var_names)
    reordered_cat1_df = cat1_df.reindex(refGeneLst)
    reordered_cat2_df = cat2_df.reindex(refGeneLst)

    sampleObj.var[f'{cat1}_DE_Score'] = reordered_cat1_df[f'{cat1} Score']
    sampleObj.var[f'{cat2}_DE_Score'] = reordered_cat2_df[f'{cat2} Score']

    return sampleObj