In [None]:
import anndata
import scanpy as sc
import numpy as np
import pickle
import os
import fnmatch
import math
import pandas as pd
import seaborn as sb
import matplotlib as mlp
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
from matplotlib.colors import ListedColormap
from matplotlib.ticker import MultipleLocator, LogLocator, LogFormatter, LogFormatterSciNotation, MaxNLocator
import matplotlib.patches as mpatches

In [None]:
# This function finds the path to the files using a keyword (Chat GBT)
def search_files(folder_path, keyword):
    matches = []
    for root, dirnames, filenames in os.walk(folder_path):
        for filename in filenames:
            if fnmatch.fnmatch(filename, f'*{keyword}*'):
                matches.append(os.path.join(root, filename))
    return matches

# Loads a list from a .TXT file (Chat GBT)
def load_list_from_file(file_path):
    with open(file_path, 'r') as file:
        lst = [line.strip() for line in file]
    return lst

# Creates labels for y ticks 
def yTicks(y): 
    labels = []

    maxY = max(y)
    print(maxY, float(maxY))
    intMaxY = math.ceil(float(maxY))
            
    ticks = [x for x in range(-abs(intMaxY), (intMaxY + 1))]
    
    for num in ticks:
        if num == 0: labels.append('no change')
        elif num < 0: 
            modNum = 2**abs(num)
            label = f'1/{modNum}x'
            labels.append(label)
        else:
            modNum = 2**num
            label = f'{modNum}x'
            labels.append(label)
    
    return(ticks, labels)
    

In [None]:
# PLOTTING FUNCTION: plots the log differential gene expression vs its z-score
def differentialExpScatter(sampleObj, sampleName, divideby, ax):
    sc.tl.rank_genes_groups(sampleObj, divideby, method='wilcoxon')
    
    score_df = pd.DataFrame(sampleObj.uns['rank_genes_groups']['scores'])
    geneName_df = pd.DataFrame(sampleObj.uns['rank_genes_groups']['names'])

    geneName_lst = list(sampleObj.var_names)

    samples = list(np.unique(sampleObj.obs[divideby]))
    sample1_name = samples[0]
    sample2_name = samples[1]
    nameLst = [sample1_name, sample2_name]

    sample1_obj = sampleObj[sampleObj.obs[divideby].isin([sample1_name]),:]
    sample2_obj = sampleObj[sampleObj.obs[divideby].isin([sample2_name]),:]

    sample1_geneExpression = sample1_obj.X.toarray()
    sample1_meanGeneExpression = list(np.mean(sample1_geneExpression, axis=0))

    sample2_geneExpression = sample2_obj.X.toarray()
    sample2_meanGeneExpression = list(np.mean(sample2_geneExpression, axis=0))

    count = 0 

    y = []
    x = []
    
    for i in range(sampleObj.n_vars):
        geneName = geneName_lst[i]

        geneIdx = geneName_df[geneName_df[sampleName] == geneName].index.item()
        geneScore = score_df[sampleName][geneIdx]

        x.append(geneScore)

        sample1 = sample1_meanGeneExpression[i]
        sample2 = sample2_meanGeneExpression[i]

        if sample1 == 0:
            y.append(10)
        elif sample2 == 0:
            y.append(-10)
        elif sample1 == 0 and sample2 == 0:
            y.append(0)
        else:
            sampleDiv = (sample1 / sample2)
            diffGeneExp = math.log2(sampleDiv)
            y.append(diffGeneExp)
    ticks, labels = yTicks(y)
    
    ax.scatter(x, y, s=7)
    ax.set_xlabel(f' {sampleName} DE Score')
#     ax.set_xlabel('DE Score')
    ax.set_ylabel('Log Base Differential Gene Expression')
    ax.set_yticks(ticks, labels)
    ax.set_title(f'DE Score vs. Differential Gene Expression - {sample1_name} vs. {sample2_name}')

In [None]:
def plottingFig3(sampleComparisonLst, obj_filePath, savePath):
    
    uniqueNum = 0 
    for sampleLst in sampleComparisonLst:
        uniqueNum += 1
        condition = sampleLst[0]
        
        if len(sampleLst) == 2:
            sampleName = sampleLst[1]
            sample1_name = f'{sampleName} Half 1'
            sample2_name = f'{sampleName} Half 2'
            sampleObj_filePath = search_files(obj_filePath, sampleName)
            sampleObj = sc.read(sampleObj_filePath[0])
            sampleObj.uns['log1p'] = {'base': None}
            
            title = f'{sampleName} Half 1 vs. Half 2'
            saveName = f'{sampleName}'
            divideby = 'Half'
            
        else:
            sample1_name = sampleLst[1]
            sample2_name = sampleLst[2]
            sampleObj_filePath = search_files(obj_filePath, f'{sample1_name}_{sample2_name}')
            sampleObj = sc.read(sampleObj_filePath[0])
            sampleObj.uns['log1p'] = {'base': None}
            
            title = f'{sample1_name} vs. Sample {sample2_name}'
            saveName = f'{sample1_name}_{sample2_name}'
            divideby = 'sample'

        # Create a figure with non-grid subplots
        fig = plt.figure(figsize=(30,30), dpi=150, num=uniqueNum)

        # Define the size and position of each subplot using subplot2grid
        row = 4
        col = 8

        ax1 = plt.subplot2grid((row, col), (0, 1), colspan=2) # Clustered UMAP
        ax2 = plt.subplot2grid((row, col), (0, 5), colspan=2) # Clustered UMAP - divided by sample halves

        ax3 = plt.subplot2grid((row, col), (1, 1), colspan=2) # Scatterplot Sample1
        ax5 = plt.subplot2grid((row, col), (1, 5), colspan=2) # Scatterplot Sample2

        ax7 = plt.subplot2grid((row, col), (2, 0), colspan=2) # Sample 1 Top 4 Ranked Genes
        ax8 = plt.subplot2grid((row, col), (2, 2), colspan=2) 
        ax9 = plt.subplot2grid((row, col), (2, 4), colspan=2) 
        ax10 = plt.subplot2grid((row, col), (2, 6), colspan=2) 

        ax11 = plt.subplot2grid((row, col), (3, 0), colspan=2) # Sample 2 Top 4 Ranked Genes
        ax12 = plt.subplot2grid((row, col), (3, 2), colspan=2) 
        ax13 = plt.subplot2grid((row, col), (3, 4), colspan=2)
        ax14 = plt.subplot2grid((row, col), (3, 6), colspan=2) 

        # Plot on each subplot
        # All plot properties
        fontSize = 20

        # Clustered UMAP 
        sc.pl.umap(sampleObj, color = 'leiden_0.3', legend_loc='on data', show = False, ax=ax1)
        ax1.set_title('Clustered UMAP', fontsize=fontSize)

        # Clustered UMAP - Divided by samples
        sc.pl.umap(sampleObj, color = divideby, legend_loc='right margin', show = False, ax=ax2)
        ax2.set_title('Clustered UMAP - Labeled by Sample', fontsize=fontSize)

        # Scatterplot Sample1
        differentialExpScatter(sampleObj, sample1_name, divideby, ax3)

        # Sample 1 Top 4 Ranked Gene List 
        sample1_topRankedLst = list(sc.get.rank_genes_groups_df(sampleObj, [sample1_name])['names'])
        sample1_top4RankedLst = sample1_topRankedLst[:4]

        # Scatterplot Sample2
        differentialExpScatter(sampleObj, sample2_name, divideby, ax5)
        
        # Sample 2 Top 4 Ranked Gene List 
        sample2_topRankedLst = list(sc.get.rank_genes_groups_df(sampleObj, [sample2_name])['names'])
        sample2_top4RankedLst = sample2_topRankedLst[:4]

        # Sample 1 Top 4 Ranked Genes
        sc.pl.umap(sampleObj, color = sample1_top4RankedLst[0], vmin=0, vmax='p95', show = False, ax=ax7)
        ax7.set_title(f'{sample1_top4RankedLst[0]} Expression', fontsize=fontSize)

        sc.pl.umap(sampleObj, color = sample1_top4RankedLst[1], vmin=0, vmax='p95', show = False, ax=ax8)
        ax8.set_title(f'{sample1_top4RankedLst[1]} Expression', fontsize=fontSize)

        sc.pl.umap(sampleObj, color = sample1_top4RankedLst[2], vmin=0, vmax='p95', show = False, ax=ax9)
        ax9.set_title(f'{sample1_top4RankedLst[2]} Expression', fontsize=fontSize)

        sc.pl.umap(sampleObj, color = sample1_top4RankedLst[3], vmin=0, vmax='p95', show = False, ax=ax10)
        ax10.set_title(f'{sample1_top4RankedLst[3]} Expression', fontsize=fontSize)

        # Sample 2 Top 4 Ranked Genes
        sc.pl.umap(sampleObj, color = sample2_top4RankedLst[0], vmin=0, vmax='p95', show = False, ax=ax11)
        ax11.set_title(f'{sample2_top4RankedLst[0]} Expression', fontsize=fontSize)

        sc.pl.umap(sampleObj, color = sample2_top4RankedLst[1], vmin=0, vmax='p95', show = False, ax=ax12)
        ax12.set_title(f'{sample2_top4RankedLst[1]} Expression', fontsize=fontSize)

        sc.pl.umap(sampleObj, color = sample2_top4RankedLst[2], vmin=0, vmax='p95', show = False, ax=ax13)
        ax13.set_title(f'{sample2_top4RankedLst[2]} Expression', fontsize=fontSize)

        sc.pl.umap(sampleObj, color = sample2_top4RankedLst[3], vmin=0, vmax='p95', show = False, ax=ax14)
        ax14.set_title(f'{sample2_top4RankedLst[3]} Expression', fontsize=fontSize)


        # Adjust the spacing between subplots
        plt.subplots_adjust(hspace=0.4, wspace=0.4)

        fig.suptitle(f'{condition} - Sample {title} (Samples Clustered Together Using All Genes)', fontsize=30,  fontweight='bold', 
                         y=1, x=0.6)

        # Saving figure
        plt.savefig(os.path.join(savePath, f'{condition}_{saveName}.png'), bbox_inches = 'tight')


In [None]:
'''SAMPLE COMPARISON LISTS'''

sampleNameLst = [['MD Contra. Hem. (L)', '1_VC20L'], ['MD Contra. Hem. (L)', '2_AC20L'], ['MD Ipsi. Hem. (R)', '3_VC20R'], 
                 ['MD Ipsi. Hem. (R)', '4_AC20R'], ['Deafened', '5_VC22L'], ['Deafened', '6_AC22L'], ['Control', '7_VC24L'],
                 ['Control', '8_AC24L'], ['Control', '9_VC23L'], ['Deafened', '10_VC25L'], ['Deafened', '13_AC25L'], 
                 ['Control', '14_AC23L'], ['MD Contra. Hem. (L)', '16_VC19L'], ['MD Ipsi. Hem. (R)', '17_VC19R'], 
                 ['MD Contra. Hem. (L)', '19_AC19L'], ['MD Ipsi. Hem. (R)', '20_AC19R']]

bioRepComparisonLst = [['Control', '7_VC24L', '9_VC23L'], ['Control', '8_AC24L', '14_AC23L'], 
                       ['Deafened', '5_VC22L', '10_VC25L'], ['Deafened', '6_AC22L', '13_AC25L'], 
                       ['MD Contra. Hem. (L)', '16_VC19L', '1_VC20L'], ['MD Contra. Hem. (L)', '19_AC19L', '2_AC20L'], 
                       ['MD Ipsi. Hem. (R)', '17_VC19R', '3_VC20R'], ['MD Ipsi. Hem. (R)', '20_AC19R', '4_AC20R']]

plastComparLst = [['Deaf VC vs. Control VC', '5_VC22L', '7_VC24L'], ['Deaf VC vs. Control VC', '5_VC22L', '9_VC23L'], 
                  ['Deaf VC vs. Control VC', '10_VC25L', '7_VC24L'], ['Deaf VC vs. Control VC', '10_VC25L', '9_VC23L'],
                  ['Deaf AC vs. Control AC', '6_AC22L', '8_AC24L'], ['Deaf AC vs. Control AC', '6_AC22L', '14_AC23L'],
                  ['Deaf AC vs. Control AC', '13_AC25L', '8_AC24L'], ['Deaf AC vs. Control AC', '13_AC25L', '14_AC23L'],
                  ['MD VC vs. Control VC', '16_VC19L', '7_VC24L'], ['MD VC vs. Control VC', '16_VC19L', '9_VC23L'],
                  ['MD VC vs. Control VC', '1_VC20L', '7_VC24L'], ['MD VC vs. Control VC', '1_VC20L', '9_VC23L'],
                  ['MD AC vs. Control AC', '19_AC19L', '8_AC24L'], ['MD AC vs. Control AC', '19_AC19L', '14_AC23L'],
                  ['MD AC vs. Control AC', '2_AC20L', '8_AC24L'], ['MD AC vs. Control AC', '2_AC20L', '14_AC23L']]

In [None]:
sampleComparisonLst = sampleNameLst
obj_filePath = 'F:/SampleData/IndividualSamples/'
savePath = 'F:/Figures/Figure3/WithinSample/'

plottingFig3(sampleComparisonLst, obj_filePath, savePath)

In [None]:
sampleComparisonLst = bioRepComparisonLst
obj_filePath = 'F:/SampleData/BiologicalReplicaSamples/'
savePath = 'F:/Figures/Figure3/BiologicalReplica/'

plottingFig3(sampleComparisonLst, obj_filePath, savePath)

In [None]:
sampleComparisonLst = plastComparLst
obj_filePath = 'F:/SampleData/PlasticityComparisons/'
savePath = 'F:/Figures/Figure3/PlasticityComparisons/'

plottingFig3(sampleComparisonLst, obj_filePath, savePath)

In [None]:
for i in range (len(concatBioRepSmplLst)):
    
    # Getting clustered bio replicate samples 
    bioRepObj = concatBioRepSmplLst[i]
    
    # Condition
    condition = bioRepComparisonLst[i][0]
    
    # Sample Names 
    samples = np.unique(list(bioRepObj.obs['sample']))
    sample1_name = samples[0]
    sample2_name = samples[1]

    sc.tl.rank_genes_groups(bioRepObj, 'sample', method='wilcoxon')
    sc.pl.rank_genes_groups(bioRepObj, n_genes=20, show=False)
    
    # Saving figure
    savePath = 'C:/Users/Hlab/Desktop/Mouse_Data_Analysis_Figures/SubPlots/Figure3/PNG'
    plt.savefig(os.path.join(savePath, f'{condition}_{sample1_name}_vs_{sample2_name}_rankedGenes.png'), bbox_inches = 'tight')