In [None]:
import anndata
import scanpy as sc
import numpy as np
import pickle
import os
import fnmatch
import seaborn as sb
import matplotlib as mlp
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
from matplotlib.colors import ListedColormap
from matplotlib.ticker import MultipleLocator, LogLocator, LogFormatter, LogFormatterSciNotation, MaxNLocator
import matplotlib.patches as mpatches

In [None]:
'''FUNCTIONS USED IN THIS FILE'''

# This function finds the path to the files using a keyword (Chat GBT)
def search_files(folder_path, keyword):
    matches = []
    for root, dirnames, filenames in os.walk(folder_path):
        for filename in filenames:
            if fnmatch.fnmatch(filename, f'*{keyword}*'):
                matches.append(os.path.join(root, filename))
    return matches

def percentileCalc(obj, gene):
    objGeneLst = list(obj.var_names)
    geneIdx = objGeneLst.index(gene)
    geneExpLst = obj.X.toarray()
    geneExp = [row[geneIdx] for row in geneExpLst]
    
    if (np.percentile(geneExp, 95)) > 0:
        return 'p95'
    elif (np.percentile(geneExp, 99)) > 0:
        return 'p99'
    else:
        return(0.1)
    
# Loads a list from a .TXT file (Chat GBT)
def load_list_from_file(file_path):
    with open(file_path, 'r') as file:
        lst = [line.strip() for line in file]
    return lst


In [None]:
'''PLOTTING FUNCTIONS USED IN THIS FILE'''

def barPlot(percentCountDict, fontSize, ax):
    # set width of bar
    barWidth = 0.135  
    
    # title info
    condition = percentCountDict['condition']
    
    # sample names 
    nameLst = percentCountDict['sampleHalfNames']
    sample1_half1Name = nameLst[0]
    sample1_half2Name = nameLst[1]
    if len(nameLst) == 4:
        sample2_half1Name = nameLst[2]
        sample2_half2Name = nameLst[3]
    
    # set heigh for bar
    sample1_half1 = percentCountDict[sample1_half1Name]
    sample1_half2 = percentCountDict[sample1_half2Name]
    if len(nameLst) == 4:    
        sample2_half1 = percentCountDict[sample2_half1Name]
        sample2_half2 = percentCountDict[sample2_half2Name]

    # Set position of bar on X axis
    br1 = np.arange(len(sample1_half1))
    br2 = [x + barWidth for x in br1]
    if len(nameLst) == 4:    
        br3 = [x + barWidth for x in br2]
        br4 = [x + barWidth for x in br3]

    # Make the plot
    ax.bar(br1, sample1_half1, color ='firebrick', width = barWidth, edgecolor ='grey', label = f'{sample1_half1Name}')
    ax.bar(br2, sample1_half2, color ='lightcoral', width = barWidth, edgecolor ='grey', label = f'{sample1_half2Name}')
    if len(nameLst) == 4:   
        ax.bar(br3, sample2_half1, color ='cadetblue', width = barWidth, edgecolor ='grey', label = f'{sample2_half1Name}')
        ax.bar(br4, sample2_half2, color = 'paleturquoise', width = barWidth, edgecolor ='grey', label = f'{sample2_half2Name}')
    
    # List of cluster numbers
    clustLst = [str(x) for x in range(len(sample1_half1))]
   
    #CHAT
    # Adding Xticks
    ax.set_xlabel('Sample Halves per Cluster', fontweight ='bold', fontsize = 15)
    ax.set_ylabel('% of Cells in each Sample Half', fontweight ='bold', fontsize = 15)
    ax.set_title(f'{condition} - % of Cells in each Sample Half vs. Clusters', fontsize = fontSize)
    ax.set_xticks([r + (1.5*barWidth) for r in range(len(sample1_half1))])
    ax.set_xticklabels(clustLst)
    
    ax.grid(False)
    ax.legend()
    
def barPlotDifference(percentDiffDict, colorList, clustNums, sampleOrder, ax):
    sample1_name = sampleOrder[0]
    sample2_name = sampleOrder[1]
    
    newColorLst = []
    
    for color in colorList:
        if color == 'firebrick':
            newColorLst.append('cadetblue')
        else:
            newColorLst.append('firebrick')
    
    clustNum = list(percentDiffDict.keys())
    percentDiff = list(percentDiffDict.values())
    
    ax.bar(clustNum, percentDiff, color=newColorLst, edgecolor ='grey')

    clustLst = [x for x in range(len(clustNums))]
    
    ax.set_xlabel('Clusters')
    ax.set_ylabel('Percent of Cells')
    ax.set_xticks(clustLst, clustNums)
    ax.set_ylim(bottom=0, top=100)
    
    ax.grid(False)
    patchA = mpatches.Patch(color='firebrick', label=f'Sample {sample1_name} has greater portion of Nuclei')
    patchB = mpatches.Patch(color='cadetblue', label=f'Sample {sample2_name} has greater portion of Nuclei')
    
    ax.legend(handles=[patchA, patchB])


In [None]:
# CHANGE THESE ACCORDING TO WHICH SAMPLE SETS YOU WANT TO LOOK AT 

def plottingFig2(dict_filePath, obj_filePath, sampleComparLst, savePath):
    
    uniqueNum = 0 
    for sampleLst in sampleComparLst:
        uniqueNum += 1 
        
        if len(sampleLst) == 2:
            condition = sampleLst[0]
            sample_name = sampleLst[1]
            sample1_name = f'{sample_name} Half 1'
            sample2_name = f'{sample_name} Half 2'
            sampleFilePaths = search_files(obj_filePath, sample_name)
            sampleObj = sc.read(sampleFilePaths[0])
            divideby = 'Half'

            countDict_filePath = search_files(f'{dict_filePath}PercentCount/', sample_name)
            with open(countDict_filePath[0], 'rb') as file:
                countDict = pickle.load(file)

            sampleDiff_filePath = search_files(f'{dict_filePath}CountDifference/', f'sampleDiff_{sample_name}')    
            with open(sampleDiff_filePath[0], 'rb') as file:
                sampleDiff = pickle.load(file)

            clustLst_filePath = search_files(f'{dict_filePath}CountDifference/', f'clustNum_{sample_name}')   
            clustLst = load_list_from_file(clustLst_filePath[0])
            sampleDiff_filePath = search_files(f'{dict_filePath}CountDifference/', f'colorList_{sample_name}')   
            colorLst = load_list_from_file(sampleDiff_filePath[0])
            sampleOrder_filePath = search_files(f'{dict_filePath}CountDifference/', f'sampleOrder_{sample_name}')   
            sampleOrder = load_list_from_file(sampleOrder_filePath[0])
            
            title = f'{sample_name} Half 1 vs. Half 2'
            fileName = f'{condition}_{sample_name}.jpg'

        else:
            condition = sampleLst[0]
            sample1_name = sampleLst[1]
            sample2_name = sampleLst[2]
            sample_filePath = search_files(obj_filePath, f'{sample1_name}_{sample2_name}')
            sampleObj = sc.read(sample_filePath[0])
            divideby = 'sample'

            countDict_filePath = search_files(f'{dict_filePath}PercentCount/', f'{sample1_name}_{sample2_name}')
            print(f'{sample1_name}_{sample2_name}', countDict_filePath)
            with open(countDict_filePath[0], 'rb') as file:
                countDict = pickle.load(file)

            sampleDiff_filePath = search_files(f'{dict_filePath}CountDifference/', f'sampleDiff_{sample1_name}_{sample2_name}')    
            with open(sampleDiff_filePath[0], 'rb') as file:
                sampleDiff = pickle.load(file)

            clustLst_filePath = search_files(f'{dict_filePath}CountDifference/', f'clustNum_{sample1_name}_{sample2_name}')   
            clustLst = load_list_from_file(clustLst_filePath[0])
            sampleDiff_filePath = search_files(f'{dict_filePath}CountDifference/', f'colorList_{sample1_name}_{sample2_name}')   
            colorLst = load_list_from_file(sampleDiff_filePath[0])
            sampleOrder_filePath = search_files(f'{dict_filePath}CountDifference/', f'sampleOrder_{sample1_name}_{sample2_name}')   
            sampleOrder = load_list_from_file(sampleOrder_filePath[0])

            title = f'Sample {sample1_name} vs. Sample {sample2_name}'
            fileName = f'{condition}_{sample1_name}_vs_{sample2_name}.jpg'

        # Create a figure with non-grid subplots
        fig = plt.figure(figsize=(40,50), dpi=150, num=uniqueNum)

        # Define the size and position of each subplot using subplot2grid
        row = 6
        col = 8

        ax1 = plt.subplot2grid((row, col), (0, 1), colspan=2) # Clustered UMAP
        ax2 = plt.subplot2grid((row, col), (0, 5), colspan=2) # Clustered UMAP - divided by sample halves

        ax3 = plt.subplot2grid((row, col), (1, 0), colspan=2) # Gad2
        ax4 = plt.subplot2grid((row, col), (1, 2), colspan=2) # Vip
        ax5 = plt.subplot2grid((row, col), (1, 4), colspan=2) # Pvalb
        ax6 = plt.subplot2grid((row, col), (1, 6), colspan=2) # Sst

        ax7 = plt.subplot2grid((row, col), (2, 0), colspan=2) # Slc17a7
        ax8 = plt.subplot2grid((row, col), (2, 2), colspan=2) # Lamp5
        ax9 = plt.subplot2grid((row, col), (2, 4), colspan=2) # Ndnf
        ax10 = plt.subplot2grid((row, col), (2, 6), colspan=2) # Cux2 
        ax11 = plt.subplot2grid((row, col), (3, 0), colspan=2) # Rorb
        ax12 = plt.subplot2grid((row, col), (3, 2), colspan=2) # Sulf1
        ax13 = plt.subplot2grid((row, col), (3, 4), colspan=2) # Foxp2
        ax14 = plt.subplot2grid((row, col), (3, 6), colspan=2) # Sla2 

        ax15 = plt.subplot2grid((row, col), (4, 0), colspan=2) # Mbp
        ax16 = plt.subplot2grid((row, col), (4, 2), colspan=2) # Cldn5
        ax17 = plt.subplot2grid((row, col), (4, 4), colspan=2) # Ctss
        ax18 = plt.subplot2grid((row, col), (4, 6), colspan=2) # C1qa 

        ax19 = plt.subplot2grid((row, col), (5, 0), colspan=4) # Bar Plot - cells for sample halve / sample count in cluster
        ax20 = plt.subplot2grid((row, col), (5, 4), colspan=4) # Bar Plot - percent diff of cells in each sample

        # Plot on each subplot
        # All plot properties
        fontSize = 20

        # Clustered UMAP 
        sc.pl.umap(sampleObj, color = 'leiden_0.3', legend_loc='on data', show = False, ax=ax1)
        ax1.set_title('Clustered UMAP', fontsize=fontSize)

        # Clustered UMAP - Divided by samples
        sc.pl.umap(sampleObj, color = divideby, legend_loc='right margin', show = False, ax=ax2)
        ax2.set_title('Clustered UMAP - Labeled by Sample', fontsize=fontSize)

        # Inhibitory 
        vmax = percentileCalc(sampleObj, 'Gad2')
        sc.pl.umap(sampleObj, color = 'Gad2', vmin=0, vmax=vmax, show = False, ax=ax3)
        ax3.set_title('Gad2 Expression', fontsize=fontSize)

        vmax = percentileCalc(sampleObj, 'Vip')
        sc.pl.umap(sampleObj, color = 'Vip', vmin=0, vmax=vmax, show = False, ax=ax4)
        ax4.set_title('Vip Expression', fontsize=fontSize)

        vmax = percentileCalc(sampleObj, 'Pvalb')
        sc.pl.umap(sampleObj, color = 'Pvalb', vmin=0, vmax=vmax, show = False, ax=ax5)
        ax5.set_title('Pvalb Expression', fontsize=fontSize)

        vmax = percentileCalc(sampleObj, 'Sst')
        sc.pl.umap(sampleObj, color = 'Sst', vmin=0, vmax=vmax, show = False, ax=ax6)
        ax6.set_title('Sst Expression', fontsize=fontSize)

        # Excitatory
        vmax = percentileCalc(sampleObj, 'Slc17a7')
        sc.pl.umap(sampleObj, color = 'Slc17a7', vmin=0, vmax=vmax, show = False, ax=ax7)
        ax7.set_title('Slc17a7 Expression', fontsize=fontSize)

        vmax = percentileCalc(sampleObj, 'Lamp5')
        sc.pl.umap(sampleObj, color = 'Lamp5', vmin=0, vmax=vmax, show = False, ax=ax8)
        ax8.set_title('Lamp5 Expression', fontsize=fontSize)

        vmax = percentileCalc(sampleObj, 'Ndnf')
        sc.pl.umap(sampleObj, color = 'Ndnf', vmin=0, vmax=vmax, show = False, ax=ax9)
        ax9.set_title('Ndnf Expression', fontsize=fontSize)

        vmax = percentileCalc(sampleObj, 'Cux2')
        sc.pl.umap(sampleObj, color = 'Cux2', vmin=0, vmax=vmax, show = False, ax=ax10)
        ax10.set_title('Cux2 Expression', fontsize=fontSize)

        vmax = percentileCalc(sampleObj, 'Rorb')
        sc.pl.umap(sampleObj, color = 'Rorb', vmin=0, vmax=vmax, show = False, ax=ax11)
        ax11.set_title('Rorb Expression', fontsize=fontSize)

        vmax = percentileCalc(sampleObj, 'Sulf1')
        sc.pl.umap(sampleObj, color = 'Sulf1', vmin=0, vmax=vmax, show = False, ax=ax12)
        ax12.set_title('Sulf1 Expression', fontsize=fontSize)

        vmax = percentileCalc(sampleObj, 'Foxp2')
        sc.pl.umap(sampleObj, color = 'Foxp2', vmin=0, vmax=vmax, show = False, ax=ax13)
        ax13.set_title('Foxp2 Expression', fontsize=fontSize)

        vmax = percentileCalc(sampleObj, 'Sla2')
        sc.pl.umap(sampleObj, color = 'Sla2', vmin=0, vmax=vmax, show = False, ax=ax14)
        ax14.set_title('Sla2 Expression', fontsize=fontSize)

        # Glia 
        vmax = percentileCalc(sampleObj, 'Mbp')
        sc.pl.umap(sampleObj, color = 'Mbp', vmin=0, vmax=vmax, show = False, ax=ax15)
        ax15.set_title('Mbp Expression', fontsize=fontSize)

        vmax = percentileCalc(sampleObj, 'Aldh1l1')
        sc.pl.umap(sampleObj, color = 'Aldh1l1', vmin=0, vmax=vmax, show = False, ax=ax16)
        ax16.set_title('Aldh1l1 Expression', fontsize=fontSize)

        vmax = percentileCalc(sampleObj, 'Cldn5')
        sc.pl.umap(sampleObj, color = 'Cldn5', vmin=0, vmax=vmax, show = False, ax=ax17)
        ax17.set_title('Cldn5 Expression', fontsize=fontSize)

        vmax = percentileCalc(sampleObj, 'C1qa')
        sc.pl.umap(sampleObj, color = 'C1qa', vmin=0, vmax=vmax, show = False, ax=ax18)
        ax18.set_title('C1qa Expression', fontsize=fontSize)

        # Bar Plot
        barPlot(countDict, fontSize, ax19)

        '''nameLst = countDict['sampleHalfNames']

        if len(nameLst) == 4:
            sample1_name = nameLst[0]
            sample2_name = nameLst[2]'''
        
        # Bar Plot
        barPlotDifference(sampleDiff, colorLst, clustLst, sampleOrder, ax20)
        ax20.set_title('Percent Difference of Sample Nuclei per Cluster', fontsize=fontSize)

        # Adjust the spacing between subplots
        plt.subplots_adjust(hspace=0.4, wspace=0.4)

        fig.suptitle(f'{condition} - {title} (Samples Clustered Together Using All Genes)', fontsize=50,  fontweight='bold', 
                         y=0.95, x=0.5)

        # Saving figure 
        plt.savefig(os.path.join(savePath, fileName), bbox_inches = 'tight')


In [None]:
'''SAMPLE COMPARISON LISTS'''

sampleNameLst = [['MD Contra. Hem. (L)', '1_VC20L'], ['MD Contra. Hem. (L)', '2_AC20L'], ['MD Ipsi. Hem. (R)', '3_VC20R'], 
                 ['MD Ipsi. Hem. (R)', '4_AC20R'], ['Deafened', '5_VC22L'], ['Deafened', '6_AC22L'], ['Control', '7_VC24L'],
                 ['Control', '8_AC24L'], ['Control', '9_VC23L'], ['Deafened', '10_VC25L'], ['Deafened', '13_AC25L'], 
                 ['Control', '14_AC23L'], ['MD Contra. Hem. (L)', '16_VC19L'], ['MD Ipsi. Hem. (R)', '17_VC19R'], 
                 ['MD Contra. Hem. (L)', '19_AC19L'], ['MD Ipsi. Hem. (R)', '20_AC19R']]

bioRepComparisonLst = [['Control', '7_VC24L', '9_VC23L'], ['Control', '8_AC24L', '14_AC23L'], 
                       ['Deafened', '5_VC22L', '10_VC25L'], ['Deafened', '6_AC22L', '13_AC25L'], 
                       ['MD Contra. Hem. (L)', '16_VC19L', '1_VC20L'], ['MD Contra. Hem. (L)', '19_AC19L', '2_AC20L'], 
                       ['MD Ipsi. Hem. (R)', '17_VC19R', '3_VC20R'], ['MD Ipsi. Hem. (R)', '20_AC19R', '4_AC20R']]

plastComparLst = [['Deaf VC vs. Control VC', '5_VC22L', '7_VC24L'], ['Deaf VC vs. Control VC', '5_VC22L', '9_VC23L'], 
                  ['Deaf VC vs. Control VC', '10_VC25L', '7_VC24L'], ['Deaf VC vs. Control VC', '10_VC25L', '9_VC23L'],
                  ['Deaf AC vs. Control AC', '6_AC22L', '8_AC24L'], ['Deaf AC vs. Control AC', '6_AC22L', '14_AC23L'],
                  ['Deaf AC vs. Control AC', '13_AC25L', '8_AC24L'], ['Deaf AC vs. Control AC', '13_AC25L', '14_AC23L'],
                  ['MD VC vs. Control VC', '16_VC19L', '7_VC24L'], ['MD VC vs. Control VC', '16_VC19L', '9_VC23L'],
                  ['MD VC vs. Control VC', '1_VC20L', '7_VC24L'], ['MD VC vs. Control VC', '1_VC20L', '9_VC23L'],
                  ['MD AC vs. Control AC', '19_AC19L', '8_AC24L'], ['MD AC vs. Control AC', '19_AC19L', '14_AC23L'],
                  ['MD AC vs. Control AC', '2_AC20L', '8_AC24L'], ['MD AC vs. Control AC', '2_AC20L', '14_AC23L']]

In [None]:
dict_filePath = 'F:/FigureCalculations/Figure2/WithinSample/'
obj_filePath = 'F:/SampleData/IndividualSamples/'
sampleComparLst = sampleNameLst
savePath = 'F:/Figures/Figure2/WithinSample/'

plottingFig2(dict_filePath, obj_filePath, sampleComparLst, savePath)

In [None]:
dict_filePath = 'F:/FigureCalculations/Figure2/BiologicalReplica/'
obj_filePath = 'F:/SampleData/BiologicalReplicaSamples/'
sampleComparLst = bioRepComparisonLst
savePath = 'F:/Figures/Figure2/BiologicalReplica/'

plottingFig2(dict_filePath, obj_filePath, sampleComparLst, savePath)

In [None]:
dict_filePath = 'F:/FigureCalculations/Figure2/PlasticityComparisons/'
obj_filePath = 'F:/SampleData/PlasticityComparisons/'
sampleComparLst = plastComparLst
savePath = 'F:/Figures/Figure2/PlasticityComparisons/'

plottingFig2(dict_filePath, obj_filePath, sampleComparLst, savePath)