In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
%matplotlib inline

#setting plot fonts
plt.rcParams['font.family'] = 'Arial'
plt.rcParams['font.size'] = 7

In [None]:
#getting annotations for PDR5 variants
annos = pd.read_csv('../data_tables/QTL_pool_annotations.tsv',sep = '\t')
pdr5_annos =annos[annos['Gene']=='PDR5']

In [None]:
#getting fitness values for PDR5 variants
pdr5_fit = pd.read_csv('../data_tables/Fitness_files/pdr5_fitness_FC.tsv',sep= '\t')
pdr5_fit = pdr5_fit.merge(pdr5_annos,'inner', 'var_id')

In [None]:
#setting line width
plt.rcParams['lines.linewidth'] = .8
fig,ax = plt.subplots(figsize=(2.75,2))


#removing SC fitness values
justcaff_and_fluc = pdr5_fit[pdr5_fit['condition'].isin(['CAFF','FLUC'])]
justcaff = justcaff_and_fluc[justcaff_and_fluc['condition']=='CAFF']
justfluc = justcaff_and_fluc[justcaff_and_fluc['condition']=='FLUC']

#plotting lines connecting fitness values for the same variant in 
#caffeine and fluconazole
for key,grp in justcaff_and_fluc.groupby('var_id'):
    plt.plot(grp.SNP_chr_pos,grp.coef,label = key, color = 'black', alpha  =.4)
    
#plotting markers for caffeine and fluconazole fitness for each variant    
fluc = ax.scatter(justfluc['SNP_chr_pos'],justfluc['coef'], marker = 'o', s = 3)
caff = ax.scatter(justcaff['SNP_chr_pos'],justcaff['coef'], marker = 'o', s = 3)

#defining patches to denote promoter/noncoding regions of PDR5
prom = ax.axvspan(619000,619840, alpha = .2, facecolor = 'dimgrey')
coding = ax.axvspan(619840,624375, alpha = .2, facecolor = 'white')
ax.axvspan(624375,624800, alpha = .2, color = 'dimgrey')

#creating rectangles to serve as markers for patches in the legend (otherwise the white patch will be blank)
prom = Rectangle(xy = (0,0),width = 10,height = 5, facecolor = 'dimgrey', alpha = .2, edgecolor = 'black')
coding = Rectangle(xy = (0,0),width = 10,height = 5, facecolor = 'white', alpha = .2, edgecolor = 'black')
ax.set_xlim(619200,624800)
#preventing overlapping x tick markers
ax.locator_params(tight = True,axis="x", nbins=3)
lgnd = ax.legend([caff, fluc, prom, coding], ['CAFF', 'FLC','Noncoding','Coding'],loc='center left', bbox_to_anchor=(1, 0.5))
#making legend markers larger for easier reading
lgnd.legendHandles[0]._sizes = [20]
lgnd.legendHandles[1]._sizes = [20]


plt.xlabel('Genomic Coordinates (chr15)')
plt.ylabel('Variant Fitness')
plt.savefig('../GxE_Figures/Figure_2/figure2g_genomic_coords_PDR5_FC.svg')


In [None]:
#Generating ecological origins data for figure 2g

In [None]:
pdr5_fit[pdr5_fit['SNP_chr_pos'].isin([622657,622658])]

In [None]:
inf = pd.read_csv('../../1011_genome_info_var_ids.tsv',sep = '\t')

def f(l):
    try:
        return(sum([int(i)for i in l]))
    except:
        return(0)

for col in inf.columns[14:]:
    inf[col] = inf[col].str.split(':').str[0].str.split('/').apply(lambda x: f(x))

inf.drop_duplicates(inplace = True)

In [None]:
eco = pd.read_excel('../data_tables/1011_genomes_suppdata.xls', header = 3)
strains = eco['Standardized name'].dropna().tolist()
strains.remove('ALT')
def get_strains(pos):
    counts = inf[inf['POS'] == pos].loc[:,strains].sum()
    
    return eco[eco['Standardized name'].isin(counts[counts!=0].index.tolist())]['Standardized name'].tolist()

In [None]:
eco[eco['Standardized name'].isin(get_strains(622657))]['Ecological origins'].value_counts()

In [None]:
eco[eco['Standardized name'].isin(get_strains(622658))]['Ecological origins'].value_counts()