In [1]:
from poola import core as pool
import pandas as pd
import seaborn as sns
import gpplot as gpp
import matplotlib.pyplot as plt
gpp.set_aesthetics(context = 'paper')

In [2]:
#define paths
file_path = "../../Data/primaryLibrary/"
figure1_outpath = '../../Figures/Figure 1/'

In [3]:
#read in read counts
file = file_path + 'counts-STS_HCT116_RDB181.txt'
HCT116_Kox1 = pd.read_table(file)

In [4]:
HCT116_Kox1.head()

Unnamed: 0,Construct Barcode,Construct IDs,HCT116_RDB_181 repA,pDNA CP1948,HCT116_RDB_181 repB,Unlabeled Sample Barcodes
0,ATAGCGACGTCCGTTGGACA,ATAGCGACGTCCGTTGGACA,665,95,698,0
1,ACCCGGACTACGCCATGTTG,ACCCGGACTACGCCATGTTG,183,91,149,0
2,AGTACCCCTATTCAACCGGA,AGTACCCCTATTCAACCGGA,807,91,632,0
3,TAATCAGAGGAACTACGTTT,TAATCAGAGGAACTACGTTT,587,84,530,0
4,TATGGCTGTCACCTAGATCA,TATGGCTGTCACCTAGATCA,393,117,346,0


In [5]:
#read in design file
design_file = file_path + 'essentials_nonessentials_CRISPRi_tiling_designs_v2.csv'
ess_noness = pd.read_csv(design_file)
ess_noness = ess_noness[['Target Gene Symbol','Target Gene ID','Category','TSS Position', 
       'sgRNA \'Cut\' Site TSS Offset','sgRNA Sequence', 
           'sgRNA Context Sequence', 'On-Target Ruleset',
       'On-Target Efficacy Score']]
ess_noness.head()

Unnamed: 0,Target Gene Symbol,Target Gene ID,Category,TSS Position,sgRNA 'Cut' Site TSS Offset,sgRNA Sequence,sgRNA Context Sequence,On-Target Ruleset,On-Target Efficacy Score
0,ATP5F1B,506.0,Essential,56645984.0,-255.0,ATAGCGACGTCCGTTGGACA,GATCATAGCGACGTCCGTTGGACAGGGTAA,RS3seq-Chen2013,0.9257
1,ATP5F1B,506.0,Essential,56645984.0,24.0,ACCCGGACTACGCCATGTTG,CTCCACCCGGACTACGCCATGTTGGGGTTT,RS3seq-Chen2013,0.7588
2,ATP5F1B,506.0,Essential,56645984.0,396.0,AGTACCCCTATTCAACCGGA,GTTCAGTACCCCTATTCAACCGGAAGGTCA,RS3seq-Chen2013,0.7492
3,ATP5F1B,506.0,Essential,56645984.0,-278.0,TAATCAGAGGAACTACGTTT,AGGGTAATCAGAGGAACTACGTTTAGGAAA,RS3seq-Chen2013,0.7506
4,ATP5F1B,506.0,Essential,56645984.0,-362.0,TATGGCTGTCACCTAGATCA,CCCCTATGGCTGTCACCTAGATCAAGGACC,RS3seq-Chen2013,0.9978


In [6]:
#calculate log-normalized reads, filter guides with low representation in pDNA
lognorms_HCT116_Kox1 = pool.lognorm_columns(reads_df=HCT116_Kox1, columns=['HCT116_RDB_181 repA','pDNA  CP1948', 'HCT116_RDB_181 repB'])
filtered_lognorms_lognorms_HCT116_Kox1 = pool.filter_pdna(lognorm_df=lognorms_HCT116_Kox1, pdna_cols=['pDNA  CP1948'], z_low=-3)
print('Filtered ' + str(lognorms_HCT116_Kox1.shape[0] - filtered_lognorms_lognorms_HCT116_Kox1.shape[0]) + ' rows due to low pDNA abundance')

Filtered 553 rows due to low pDNA abundance


In [7]:
#calculate log-fold changes
lfc_df_HCT116_Kox1 = pool.calculate_lfcs(lognorm_df=filtered_lognorms_lognorms_HCT116_Kox1, 
                                         ref_col='pDNA  CP1948', target_cols=['HCT116_RDB_181 repA', 'HCT116_RDB_181 repB'])
lfc_df_HCT116_Kox1.head()

Unnamed: 0,Construct Barcode,Construct IDs,HCT116_RDB_181 repA,HCT116_RDB_181 repB,Unlabeled Sample Barcodes
0,ATAGCGACGTCCGTTGGACA,ATAGCGACGTCCGTTGGACA,-0.1383,0.148099,0
1,ACCCGGACTACGCCATGTTG,ACCCGGACTACGCCATGTTG,-1.620041,-1.658089,0
2,AGTACCCCTATTCAACCGGA,AGTACCCCTATTCAACCGGA,0.173086,0.073104,0
3,TAATCAGAGGAACTACGTTT,TAATCAGAGGAACTACGTTT,-0.138867,-0.052559,0
4,TATGGCTGTCACCTAGATCA,TATGGCTGTCACCTAGATCA,-1.081762,-1.03078,0


In [None]:
#plot correlation between replicate LFCs
plt.subplots(figsize=(3,3))
gpp.point_densityplot(data=lfc_df_HCT116_Kox1, x='HCT116_RDB_181 repA', y='HCT116_RDB_181 repB')
gpp.add_correlation(data=lfc_df_HCT116_Kox1, x='HCT116_RDB_181 repA', y='HCT116_RDB_181 repB')
sns.despine()
plt.xlabel('Replicate A',fontsize=9)
plt.ylabel('Replicate B',fontsize=9)
plt.xticks(fontsize=7)
plt.yticks(fontsize=7)
plt.title('HCT116 Kox1',fontsize=9)
gpp.savefig(figure1_outpath+'HCT116_Kox1_rep_corr.pdf',dpi=300, bbox_inches='tight')

In [None]:
lfc_df_HCT116_Kox1 = lfc_df_HCT116_Kox1[['Construct Barcode','HCT116_RDB_181 repA','HCT116_RDB_181 repB']]

In [None]:
#calculate average LFC across replicates
avg_replicate_lfc_df_HCT116_Kox1 = pool.average_replicate_lfcs(lfcs=lfc_df_HCT116_Kox1, guide_col='Construct Barcode', condition_indices=[0],sep='_')
avg_replicate_lfc_df_HCT116_Kox1.head()

In [None]:
avg_replicate_lfc_df_HCT116_Kox1.rename(columns={'Construct Barcode':'sgRNA Sequence'}, inplace = True)

In [None]:
#z-score guide LFCs relative to intergenic controls
annot_guide_lfcs_HCT116_Kox1 = pool.annotate_guide_lfcs(avg_replicate_lfc_df_HCT116_Kox1, ess_noness, 'Target Gene Symbol',
                                            merge_on='sgRNA Sequence', z_score_neg_ctls=True,
                                            z_score_neg_ctl_genes='ONE_INTERGENIC')
annot_guide_lfcs_HCT116_Kox1.head()

In [None]:
annot_guide_lfcs_HCT116_Kox1.to_csv(file_path + 'HCT116_Kox1_zscored.csv', index = False)