# Plot Y Chromosome Gene Brain Expression From Requantified Data with Different Y Chromosome Haplogroups
- **Author(s)** - Frank Grenn
- **Quick Description:** Get NABEC requantified expression data, subset chrY genes and transcripts, and plot with haplogroup.

In [None]:

import os

import pandas as pd
import mygene

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

In [None]:
WRKDIR = "/PATH"
NABEC_DIR = "/PATH/quants_default_ref"


## NABEC Frontal Cortex Data

In [None]:
nabec_counts  = pd.read_csv(f"{NABEC_DIR}/quants_chrY_default_ref_matrix.csv")
nabec_counts = nabec_counts.set_index('Geneid')
print(nabec_counts.shape)
print(nabec_counts.iloc[0:5,0:5])

## Plot for a gene

In [None]:
#gene = "ENSG00000184895"
gene = "ENSG00000184895.8"

In [None]:
nabec_counts.loc[gene,].to_frame()

In [None]:
#merge with haplogroup data
nabec_haplo = pd.read_csv(f"{WRKDIR}/chrY/output_nabec/nabec_haplos.csv")
nabec_haplo['id_fctx'] = nabec_haplo['new_id'] + 'fctx'
print(nabec_haplo.shape)
print(nabec_haplo.head())

In [None]:

merged_nabec = pd.merge(left = nabec_counts.loc[gene,].to_frame(), right = nabec_haplo, left_index=True, right_on = 'id_fctx')
print(merged_nabec.shape)
print(merged_nabec.head())
print(merged_nabec.yhaplo_haplo_major.value_counts())

In [None]:
print(merged_nabec.shape)
temp = merged_nabec[merged_nabec.yhaplo_haplo_major.isin(['E','G','I','J','R'])]
print(temp.shape)

In [None]:
temp.head()

In [None]:

fig = plt.figure(figsize=(7, 5), dpi=80)
fig.subplots_adjust(hspace=0.5, wspace=0.2)
sns.set()


data_subset = merged_nabec[[gene,'yhaplo_haplo_major']]
data_subset = data_subset[data_subset.yhaplo_haplo_major.isin(['E','G','I','J','R'])]
data_subset[gene] = data_subset[gene].astype('float64')
print(data_subset.shape)
print(data_subset.head())
sns_plot = sns.violinplot(x='yhaplo_haplo_major',y = gene, data = data_subset, order=sorted(set(data_subset['yhaplo_haplo_major'].tolist())))
sns_plot = sns.stripplot(palette="tab10",linewidth=1,x='yhaplo_haplo_major', y=gene, data=data_subset,color=".4", order=sorted(set(data_subset['yhaplo_haplo_major'].tolist())))




plt.xlabel(f"Y Chromosome Haplogroup")
plt.ylabel(f"Counts")
plt.title(f"NABEC Cortex SRY (uc004fqg.1) Expression ")

plt.show()
sns_plot.get_figure().savefig(f"{WRKDIR}/chrY/expression/nabec_sry_counts_plot.png")