In [16]:
import anndata
import scanpy as sc
import numpy as np
import pickle
import os
import fnmatch
import pandas as pd 
import math 

In [34]:
# This function finds the path to the files using a keyword (Chat GBT)
def search_files(folder_path, keyword):
    matches = []
    for root, dirnames, filenames in os.walk(folder_path):
        for filename in filenames:
            if fnmatch.fnmatch(filename, f'*{keyword}*'):
                matches.append(os.path.join(root, filename))
    return matches

# Saves a list onto a .TXT file (Chat GBT)
def save_list_to_file(lst, file_path):
    with open(file_path, 'w') as file:
        for item in lst:
            file.write(str(item) + '\n')

def differentialExpScatter(sampleObj, sampleName, divideby, xFilePath, yFilePath):
    sc.tl.rank_genes_groups(sampleObj, divideby, method='wilcoxon')
    
    score_df = pd.DataFrame(sampleObj.uns['rank_genes_groups']['scores'])
    geneName_df = pd.DataFrame(sampleObj.uns['rank_genes_groups']['names'])

    geneName_lst = list(sampleObj.var_names)

    samples = list(np.unique(sampleObj.obs[divideby]))
    sample1_name = samples[0]
    sample2_name = samples[1]
    nameLst = [sample1_name, sample2_name]

    sample1_obj = sampleObj[sampleObj.obs[divideby].isin([sample1_name]),:]
    sample2_obj = sampleObj[sampleObj.obs[divideby].isin([sample2_name]),:]

    sample1_geneExpression = sample1_obj.X.toarray()
    sample1_meanGeneExpression = list(np.mean(sample1_geneExpression, axis=0))

    sample2_geneExpression = sample2_obj.X.toarray()
    sample2_meanGeneExpression = list(np.mean(sample2_geneExpression, axis=0))

    count = 0 

    y = []
    x = []
    
    for i in range(sampleObj.n_vars):
        geneName = geneName_lst[i]

        geneIdx = geneName_df[geneName_df[sampleName] == geneName].index.item()
        geneScore = score_df[sampleName][geneIdx]

        x.append(geneScore)

        sample1 = sample1_meanGeneExpression[i]
        sample2 = sample2_meanGeneExpression[i]

        if sample1 == 0:
            y.append(10)
        elif sample2 == 0:
            y.append(-10)
        elif sample1 == 0 and sample2 == 0:
            y.append(0)
        else:
            sampleDiv = (sample1 / sample2)
            diffGeneExp = math.log2(sampleDiv)
            y.append(diffGeneExp)

    save_list_to_file(x, xFilePath)
    save_list_to_file(y, yFilePath)
    
#     ticks, labels = yTicks(y)
    
'''    ax.scatter(x, y)
#     ax.set_xlabel(f' {sampleName} DE Score')
    ax.set_xlabel('DE Score')
    ax.set_ylabel('Log Base Differential Gene Expression')
    ax.set_yticks(ticks, labels)
    ax.set_title(f'DE Score vs. Differential Gene Expression - {sample1_name} vs. {sample2_name}')'''

def savingDiffScatter(sampleComparLst, folderPath, savePath):
    for sampleLst in sampleComparLst: 
        condition = sampleLst[0]

        # Within sample calculation 
        if len(sampleLst) == 2:
            condition = sampleLst[0]
            sampleName = sampleLst[1]
            sample_half1 = f'{sampleName} Half 1'
            sample_half2 = f'{sampleName} Half 2'
            
            sampleFilePaths = search_files(folderPath, sampleName)
            sampleObj = sc.read(sampleFilePaths[0])
            divideby = 'Half'
            fileName = sampleName            
            
            sampleObj.uns['log1p'] = {'base': None}
            
            differentialExpScatter(sampleObj, sample_half1, divideby, f'{savePath}x_{condition}_{sample_half1}', 
                                   f'{savePath}y_{condition}_{sample_half1}')
            differentialExpScatter(sampleObj, sample_half2, divideby,  f'{savePath}x_{condition}_{sample_half2}', 
                                   f'{savePath}y_{condition}_{sample_half2}')
            
        # Between sample calculation 
        else:
            condition = sampleLst[0]
            sample1_name = sampleLst[1]
            sample2_name = sampleLst[2]
            
            sampleFilePaths = search_files(folderPath, f'{sample1_name}_{sample2_name}')
            sampleObj = sc.read(sampleFilePaths[0])
            divideby = 'sample'
            fileName = f'{sample1_name}_{sample2_name}'            
            
            sampleObj.uns['log1p'] = {'base': None}
            
            diffScatterCalc(sampleObj, divideby, savePath, condition)

In [4]:
'''SAMPLE COMPARISON LISTS'''

sampleNameLst = [['MD Contra. Hem. (L)', '1_VC20L'], ['MD Contra. Hem. (L)', '2_AC20L'], ['MD Ipsi. Hem. (R)', '3_VC20R'], 
                 ['MD Ipsi. Hem. (R)', '4_AC20R'], ['Deafened', '5_VC22L'], ['Deafened', '6_AC22L'], ['Control', '7_VC24L'],
                 ['Control', '8_AC24L'], ['Control', '9_VC23L'], ['Deafened', '10_VC25L'], ['Deafened', '13_AC25L'], 
                 ['Control', '14_AC23L'], ['MD Contra. Hem. (L)', '16_VC19L'], ['MD Ipsi. Hem. (R)', '17_VC19R'], 
                 ['MD Contra. Hem. (L)', '19_AC19L'], ['MD Ipsi. Hem. (R)', '20_AC19R']]

bioRepComparisonLst = [['Control', '7_VC24L', '9_VC23L'], ['Control', '8_AC24L', '14_AC23L'], 
                       ['Deafened', '5_VC22L', '10_VC25L'], ['Deafened', '6_AC22L', '13_AC25L'], 
                       ['MD Contra. Hem. (L)', '16_VC19L', '1_VC20L'], ['MD Contra. Hem. (L)', '19_AC19L', '2_AC20L'], 
                       ['MD Ipsi. Hem. (R)', '17_VC19R', '3_VC20R'], ['MD Ipsi. Hem. (R)', '20_AC19R', '4_AC20R']]

plastComparLst = [['Deaf VC vs. Control VC', '5_VC22L', '7_VC24L'], ['Deaf VC vs. Control VC', '5_VC22L', '9_VC23L'], 
                  ['Deaf VC vs. Control VC', '10_VC25L', '7_VC24L'], ['Deaf VC vs. Control VC', '10_VC25L', '9_VC23L'],
                  ['Deaf AC vs. Control AC', '6_AC22L', '8_AC24L'], ['Deaf AC vs. Control AC', '6_AC22L', '14_AC23L'],
                  ['Deaf AC vs. Control AC', '13_AC25L', '8_AC24L'], ['Deaf AC vs. Control AC', '13_AC25L', '14_AC23L'],
                  ['MD VC vs. Control VC', '16_VC19L', '7_VC24L'], ['MD VC vs. Control VC', '16_VC19L', '9_VC23L'],
                  ['MD VC vs. Control VC', '1_VC20L', '7_VC24L'], ['MD VC vs. Control VC', '1_VC20L', '9_VC23L'],
                  ['MD AC vs. Control AC', '19_AC19L', '8_AC24L'], ['MD AC vs. Control AC', '19_AC19L', '14_AC23L'],
                  ['MD AC vs. Control AC', '2_AC20L', '8_AC24L'], ['MD AC vs. Control AC', '2_AC20L', '14_AC23L']]

In [35]:
sampleComparLst = sampleNameLst
folderPath = 'F:/SampleData/IndividualSamples/'
savePath = 'F:/FigureCalculations/Figure3/WithinSample/'

savingDiffScatter(sampleComparLst, folderPath, savePath)

In [30]:
sampleComparLst = bioRepComparisonLst
folderPath = 'F:/SampleData/BiologicalReplicaSamples/'
savePath = 'F:/FigureCalculations/Figure3/BiologicalReplicas/'

savingDiffScatter(sampleComparLst, folderPath, savePath)

Only considering the two last: ['. (L)_16_VC19L_1_VC20L', '.h5ad'].
Only considering the two last: ['. (L)_16_VC19L_1_VC20L', '.h5ad'].
Only considering the two last: ['. (L)_19_AC19L_2_AC20L', '.h5ad'].
Only considering the two last: ['. (L)_19_AC19L_2_AC20L', '.h5ad'].
Only considering the two last: ['. (R)_17_VC19R_3_VC20R', '.h5ad'].
Only considering the two last: ['. (R)_17_VC19R_3_VC20R', '.h5ad'].
Only considering the two last: ['. (R)_20_AC19R_4_AC20R', '.h5ad'].
Only considering the two last: ['. (R)_20_AC19R_4_AC20R', '.h5ad'].


In [25]:
sampleComparLst = plastComparLst
folderPath = 'F:/SampleData/PlasticityComparisons/'
savePath = 'F:/FigureCalculations/Figure3/PlasticityComparisons/'

savingDiffScatter(sampleComparLst, folderPath, savePath)