# Plot Major Haplogroup Frequencies in Expression Samples
- **Author(s)** - Frank Grenn
- **Date Started** - September 2021
- **Quick Description:** 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
WRKDIR = "$PATH/chrY"

## AMPPD

In [None]:
amp = pd.read_csv(f"{WRKDIR}/output_male_hemizygous_only_het_filter_run/chrY_meta.csv")
print(amp.shape)
print(amp.head())

In [None]:
amp_exp_samples = pd.read_csv(f"{WRKDIR}/expression/amppd_chrY_featureCounts.csv",nrows=1).columns.tolist()
amp_exp_samples.remove("Geneid")
print(len(amp_exp_samples))

In [None]:
amp_exp_samples_meta = amp[amp.fid.isin(amp_exp_samples)]
print(amp_exp_samples_meta.shape)
print(amp_exp_samples_meta.head())

## NABEC

In [None]:
nabec = pd.read_csv(f"{WRKDIR}/output_nabec/nabec_haplos.csv")
nabec['fid'] = nabec.new_id + "fctx"
print(nabec.shape)
print(nabec.head())

In [None]:
nabec_exp_samples = pd.read_csv(f"$PATH/quants_default_ref/quants_chrY_default_ref_matrix.csv",nrows=1).columns.tolist()
nabec_exp_samples.remove("Geneid")
print(len(nabec_exp_samples))

In [None]:
nabec_exp_samples_meta = nabec[nabec.fid.isin(nabec_exp_samples)]
print(nabec_exp_samples_meta.shape)
print(nabec_exp_samples_meta.head())

## Get Counts

In [None]:
amp_counts = amp_exp_samples_meta.yhaplo_haplo_major.value_counts().to_frame()
amp_counts['haplo_major'] = amp_counts.index
amp_counts['dataset'] = 'AMP-PD'
amp_counts.columns = ['haplo_count','haplo_major','dataset']
print(amp_counts)

In [None]:
nabec_counts = nabec_exp_samples_meta.yhaplo_haplo_major.value_counts().to_frame()
nabec_counts['haplo_major'] = nabec_counts.index
nabec_counts['dataset'] = 'NABEC'
nabec_counts.columns = ['haplo_count','haplo_major','dataset']
print(nabec_counts)

In [None]:
exp_haplo = amp_counts.append(nabec_counts).sort_values("haplo_major")
print(exp_haplo)

In [None]:
exp_haplo.to_csv(f"{WRKDIR}/expression/expression_samples_haplo_major_counts.csv",index=None)

## Plot

In [None]:
fig = plt.figure(figsize=(6, 4), dpi=80)
fig.subplots_adjust(hspace=0.5, wspace=0.2)
sns.set()
sns_plot = sns.barplot(x="haplo_major", y="haplo_count", hue="dataset", data=exp_haplo)

plt.xlabel("Major Haplogroup")
plt.ylabel("Sample Count")
plt.title("Expression Sample Major Haplogroups")
plt.show()
sns_plot.get_figure().savefig(f"{WRKDIR}/expression/expression_samples_haplo_major_counts.png")#,bbox_inches='tight')  