In [None]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
#import scanpy as sc
import matplotlib.pyplot as plt
np.random.seed(111)

In [None]:
d = {'IRE1alpha-down': ['Yif1a','Tmem165'],
     'PERK-down': ['Hspe1','Rps26','Ppia','Ndufs5','Prdx1','Ptma','Dnaja1','Tubb4b','Uqcr11','Polr2l','Cox6b1','Rps10','Cox6a1'],
     'PERK-up': ['Mthfd2','Eif4ebp1','Xbp1','Ddit3','Trib3','Socs2','Cebpg','Eif1b','Eif1','Map1b','Gars','Pck2','Sesn2','Cth','Psph','Wars','Phgdh','Hax1','Psat1','Bex2','Lmo4','Fam89a','Tsc22d3','Rgs16','Map3k8','Idh1','Ccpg1','Pim1','Slc3a2','Snhg8'],
     'IRE1alpha-up': ['Dnajb9','Tmed2','Serp1','Vimp','Derl2','Slc35b1','Erlec1','Armcx3','Sec61a1','Sec61b','Ppib','Ssr2','Tmed9','Nans','Ostc','Ssr3','Ssr1'],
     'ATF6-up': ['Selk','Cdk2ap2','Hspa5','Herpud1','Sdf2l1','Dnajb11','Manf','Hsp90b1','Creld2','Pdia6','Pdia4','Calr','Dnajc3','Hyou1','Tmem50b'],
    }

In [None]:
order = ['Yif1a','Tmem165','Hspe1','Rps26','Ppia','Ndufs5','Prdx1','Ptma','Dnaja1','Tubb4b','Uqcr11','Polr2l','Cox6b1','Rps10','Cox6a1','Mthfd2','Eif4ebp1','Xbp1','Ddit3','Trib3','Socs2',
         'Cebpg','Eif1b','Eif1','Map1b','Gars','Pck2','Sesn2','Cth','Psph','Wars','Phgdh','Hax1','Psat1','Bex2','Lmo4','Fam89a','Tsc22d3','Rgs16','Map3k8','Idh1','Ccpg1','Pim1','Slc3a2',
         'Snhg8','Dnajb9','Tmed2','Serp1','Vimp','Derl2','Slc35b1','Erlec1','Armcx3','Sec61a1','Sec61b','Ppib','Ssr2','Tmed9','Nans','Ostc','Ssr3','Ssr1','Selk','Cdk2ap2','Hspa5',
         'Herpud1','Sdf2l1','Dnajb11','Manf','Hsp90b1','Creld2','Pdia6','Pdia4','Calr','Dnajc3',
         'Hyou1','Tmem50b']

In [None]:
cell_type = 'TAL'
section = 'medulla'

In [None]:
# input_path is path to file with genes x features for all UPR genes
# features = {'pvals_adj','cluster','logs'}, where cluster signifies whether the gene was upregulated in UMOD-KI or UMOD-WT,
# logs are avg log FC of gene expression between UMOD-KI and UMOD-WT in specified cell type and section
# only contains data for genes where pval_adj < 0.05, logs>0.0
input_path = '{cell_type}_{section}_UPR_pathway_DE_results.csv'.format(cell_type=cell_type,section=section)
DE_dat = pd.read_csv(input_path,index_col=0)

In [None]:
temp = [x for x in order if x not in list(DE_dat['genes'])]
nonsig_genes = pd.DataFrame()
nonsig_genes['genes'] = temp
nonsig_genes['pvals_adj'] = [1.0]*nonsig_genes.shape[0]
nonsig_genes['cluster'] = [np.nan]*nonsig_genes.shape[0]
nonsig_genes['logs'] = [0.0]*nonsig_genes.shape[0]

In [None]:
DE_dat = pd.concat([DE_dat,nonsig_genes])

In [None]:
pathways = []
for gene in DE_dat['genes']:
    for x,y in d.items():
        if gene in y:
            pathway = x
            break
    pathways.append(x)

In [None]:
DE_dat['pathway'] = pathways

In [None]:
DE_dat=DE_dat.sort_values(by=['logs'],ascending=False).groupby('pathway').head(DE_dat.shape[0])

In [None]:
pathway_order = ['IRE1alpha-down','PERK-down', 'ATF6-up', 'IRE1alpha-up', 'PERK-up']
DE_dat_ord = pd.DataFrame()
for pathway in pathway_order:
    temp = DE_dat[DE_dat['pathway']==pathway].copy()
    temp1 = temp[temp['cluster']=='UMOD-KI']
    temp2 = temp[temp['cluster']=='UMOD-WT']
    temp3 = temp[temp['cluster'].isnull().values]
    temp = pd.concat([temp1,temp2,temp3])
    DE_dat_ord = pd.concat([DE_dat_ord,temp])

In [None]:
DE_dat_ord = DE_dat_ord.reset_index()
DE_dat_ord = DE_dat_ord.drop(columns={'index'})

In [None]:
DE_dat=DE_dat_ord[DE_dat_ord['pvals_adj']<0.05]
DE_dat=DE_dat_ord[DE_dat_ord['logs']>0]

In [None]:
sig_genes=list(DE_dat['genes'])
sig_gene_d = {}
for g in sig_genes:
    sig_gene_d[g] = '* '+g

In [None]:
order = ['geno']+list(DE_dat_ord['genes'])
array_avg=array_avg[order]
array_avg=array_avg.rename(columns={'geno':'genotype'})
temp1 = array_avg[array_avg['genotype']=='UMOD-WT']
temp2 = array_avg[array_avg['genotype']=='UMOD-KI']
array_avg = pd.concat([temp1,temp2])

In [None]:
genesonly=array_avg.iloc[:,1:]

In [None]:
pathways = []
for gene in genesonly.columns:
    for x,y in d.items():
        if gene in y:
            pathway = x
            break
    pathways.append(x)

In [None]:
pathways=pd.DataFrame(pathways)
pathways=pathways.rename(columns={0:'pathway'})
pathways=pathways.set_index(genesonly.columns)

In [None]:
pathways=pathways.rename(index=sig_gene_d)
genesonly=genesonly.rename(columns=sig_gene_d)

In [None]:
plt.rcParams["font.family"] = "Arial"
plt.rcParams['font.size'] = 22
sns.clustermap(genesonly,row_cluster=False,col_cluster=False,cmap='viridis',xticklabels=1,yticklabels=False,cbar_pos=(0.1, 0.5, 0.05, 0.18),standard_scale=1,figsize=(28,10))
plt.savefig('{cell_type}_{section}_avg_by_mouse_clustermap_viridis_missing_added.pdf'.format(cell_type=celltype,section=section),dpi=300)

In [None]:
array_avg=array_avg.replace({'UMOD-WT':'WT','UMOD-KI':'UMOD_KI'})

In [None]:
DE_dat=DE_dat_ord.copy()

In [None]:
cell_type='TAL'
section='medulla'
for pathway in np.unique(pathways):
    temp = DE_dat[DE_dat['pathway']==pathway]
    temp = list(temp['genes'])
    n_genes=len(temp)
    for j in range(n_genes):
        print(pathway,pheno,temp[j])

        colors = ["dodgerblue", "firebrick"]
        sns.set_style("whitegrid", {'axes.grid' : False,'font.family': ['sans-serif'],'font.sans-serif': ['Arial'],'figure.figsize':(5,5)})
        ax=sns.violinplot(data=array_avg,x='genotype',y=temp[j], inner=None,palette=colors,scale='width')
        ax = sns.swarmplot(x="genotype", y=temp[j], data=array_avg, color=".2")
        sns.despine(top=True,right=True)
        ax.set_xlabel(None)
        ax.set_ylabel('Expression level', fontsize=18)
        l = ax.get_xticklabels()
        ax.set_xticklabels(l, fontsize=16)
        l = ax.get_yticks()
        ax.set_yticklabels(l.round(3), fontsize=16)
        ax.set_title(temp[j], fontsize=18)
        ax.spines['bottom'].set_color('black')
        ax.spines['left'].set_color('black')
        ax.tick_params(axis='x', colors='black')
        plt.savefig('{cell_type}_{section}_mouse_avg_{gene}_{pathway}_violin_missing_added.pdf'.format(cell_type=cell_type,section=section,pathway=pathway,gene=temp[j]),dpi=300,bbox_inches='tight')
        plt.show()
        plt.close('all')
