In [None]:
# Import packages
# Pandas documentation can be found here: https://pandas.pydata.org/docs/getting_started/index.html
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Initalize DataFrame

h_df = pd.read_excel('BoutinResults.xlsx',sheet_name='hygienic_overexpressed')  #import overexpressed genes in hygienic bees
h_df.insert(1, "Hygiene", "H") #Hygiene Status = H (hygienic) or NH (non-hygienic)
h_df.insert(7,"FC",0) #Add a column for calculating expression fold change
#Calculate FC overexpression
for k in range(len(h_df)):
    if h_df.loc[k,'Non-hygienic FPKM'] == 0.000000:
        h_df.loc[k, 'FC'] = 0
    else:
        FC = h_df.loc[k,'Hygienic FPKM'] / h_df.loc[k,'Non-hygienic FPKM']
        h_df.loc[k,'FC'] = FC
h_df['FC'] = h_df['FC'].astype(float)
#display(h_df)

nh_df = pd.read_excel('BoutinResults.xlsx',sheet_name='nonhygienic_overexpressed')  #import overexpressed genes in nonhygienic bees
nh_df.insert(1, "Hygiene", "NH") #Hygiene Status = H (hygienic) or NH (non-hygienic)
#Calculate FC overexpression
for k in range(len(nh_df)):
    if nh_df.loc[k,'Hygienic FPKM'] == 0.000000:
        nh_df.loc[k, 'FC'] = 0
    else:
        FC = nh_df.loc[k,'Non-hygienic FPKM'] / nh_df.loc[k,'Hygienic FPKM']
        nh_df.loc[k,'FC'] = FC
nh_df['FC'] = nh_df['FC'].astype(float)
#display(nh_df)

overex_df = pd.concat([h_df,nh_df]) #bring all overexpressed genes together in one df
display(overex_df)

In [None]:
#Fold changes

plt.figure(1, dpi=300)
h_df = h_df.sort_values(by=['FC'],ascending=False) #sort by highest FC
h_df.plot.bar(x='Gene', y='FC',color='red',label='Hygienic')
plt.ylabel('Expression Fold Change')
plt.title('Genes Overexpressed in Hygienic Bees')
plt.tight_layout()
plt.savefig('OverExHyg.png') #uncomment if you actually want to save figure

plt.figure(2,figsize=(15,5),dpi=300)
nh_df = nh_df.sort_values(by=['FC'],ascending=False) #sort by highest FC
nh_df.plot.bar(x='Gene', y='FC',color='blue',label='Non-Hygienic')
plt.ylabel('Expression Fold Change')
plt.title('Genes Overexpressed in Non-Hygienic Bees')
plt.tick_params(axis='x', which='major', labelsize=7.5)
plt.tight_layout()
plt.savefig('OverExNonHyg.png') #uncomment if you actually want to save figure


In [None]:
#Bring it all into 1 plot
plt.figure(3)
plt.title('Overexpressed Genes')
sns.barplot(data=overex_df, x='Gene', y='FC', hue='Hygiene',saturation=1)
plt.ylabel('Expression Fold Change')
plt.tick_params(axis='x', labelrotation=90, labelsize=7.5)
plt.tight_layout()
plt.savefig('OverExAll.png') #uncomment if you actually want to save figure