In [1]:
import pandas as pd


Brief exploration of genes that are not targeted in the Jacquere library to confirm that guides targeting these genes in other libraries are not of adequate quality (according to Jacquere criteria for library design). 

In [3]:
#Get data for external libraries

folderpath= "../Data/CRISPick evaluated CRISPRko Cas9 Genome Wide Libraries/"

'''

For these CRISPick runs, I used Ensembl 113 as the reference genome and indicated targeting of all protein coding genes. 
I then supplied the set of guide sequences from the library (the same sets used as input to the guide annotation tool 
for the assessment of library drift). I indicated a quota of 1 and for CRISPick to show unpicked sequences.

'''

gecko=pd.read_table(folderpath+"gecko_bothsets_2_10_2025-sgrna-designs.txt",low_memory=False)
avana=pd.read_table(folderpath+"avana_2_10_2025-sgrna-designs.txt",low_memory=False)
brunello=pd.read_table(folderpath+"brunello_2_10_2025-sgrna-designs.txt",low_memory=False)
hsangerv1=pd.read_table(folderpath+"hsangerv1_2_10_2025-sgrna-designs.txt",low_memory=False)
tkov3=pd.read_table(folderpath+"tkov3_2_10_2025-sgrna-designs.txt",low_memory=False)
gattinara=pd.read_table(folderpath+"gattinara_2_10_2025-sgrna-designs.txt",low_memory=False)
minlibcas9=pd.read_table(folderpath+"minlibcas9_2_10_2025-sgrna-designs.txt",low_memory=False)

#subset CRISPick runs to the guides that are actually in the libraries 
gecko=gecko[(gecko["Picking Notes"].str.count("Preselect")>0)].reset_index(drop=True)
avana=avana[(avana["Picking Notes"].str.count("Preselect")>0)].reset_index(drop=True)
brunello=brunello[(brunello["Picking Notes"].str.count("Preselect")>0)].reset_index(drop=True)
hsangerv1=hsangerv1[(hsangerv1["Picking Notes"].str.count("Preselect")>0)].reset_index(drop=True)
tkov3=tkov3[(tkov3["Picking Notes"].str.count("Preselect")>0)].reset_index(drop=True)
gattinara=gattinara[(gattinara["Picking Notes"].str.count("Preselect")>0)].reset_index(drop=True)
minlibcas9=minlibcas9[(minlibcas9["Picking Notes"].str.count("Preselect")>0)].reset_index(drop=True)

jacquere=pd.read_csv("../../4. Jacquere Design/Data/jacquere_assembled_crispick.csv")


  jacquere=pd.read_csv("../../4. Jacquere Design/Data/jacquere_assembled_crispick.csv")


## Assess genes not targetable by Jacquere in other libraries

Demonstration that guides targeting genes untargetable in Jacquere have RS3<0.2, Agg CFD >4.8, violate ancestry filter, or contain BsmBI sites in other libraries

In [5]:
jacquere_not_targeted= pd.read_csv("../../4. Jacquere Design/Jacquere_untargetable_genes.csv")
jacquere_not_targeted=jacquere_not_targeted[jacquere_not_targeted["Source"]=="GENCODE"]["Input"].tolist()

gecko_jacquere_untargetable=gecko[gecko["Target Gene ID"].isin(jacquere_not_targeted)]
gecko_jacquere_untargetable=gecko_jacquere_untargetable[gecko_jacquere_untargetable["Aggregate CFD Score"]!="MAX"].reset_index(drop=True)
gecko_jacquere_untargetable["Aggregate CFD Score"]=gecko_jacquere_untargetable["Aggregate CFD Score"].astype(float)
gecko_jacquere_untargetable[(gecko_jacquere_untargetable["Aggregate CFD Score"]<4.8)&(gecko_jacquere_untargetable["On-Target Efficacy Score"]>0.2)]

Unnamed: 0,Input,Quota,Target Taxon,Target Gene ID,Target Gene Symbol,Target Transcript,Target Alias,CRISPR Mechanism,Target Domain,Reference Sequence,...,Other Target Matches,Aggregate CFD Score,Off-Target CFD100 Hits,Off-Target Tier I CFD100 Hits,On-Target Ruleset,On-Target Efficacy Score,On-Target Rank,Pick Order,Picking Round,Picking Notes
31,ENSG00000183206,1.0,9606,ENSG00000183206,POTEC,ENST00000358970.10,,CRISPRko,CDS,NC_000018.10,...,,3.6667,0,0,RS3seq-Chen2013+RS3target,0.2807,33.0,2.0,0.0,Preselected


The 1 jacquere untargetable gene targeted by GECKO with agg cfd <4.8 and RS3>0.2 violates the ancestry filter and targets before the first 5% of the protein coding region (shown by ad-hoc CRISPick run of all guides targeting POTEC) 

In [6]:
avana_jacquere_untargetable=avana[avana["Target Gene ID"].isin(jacquere_not_targeted)]
avana_jacquere_untargetable=avana_jacquere_untargetable[avana_jacquere_untargetable["Aggregate CFD Score"]!="MAX"].reset_index(drop=True)
avana_jacquere_untargetable["Aggregate CFD Score"]=avana_jacquere_untargetable["Aggregate CFD Score"].astype(float)
avana_jacquere_untargetable[(avana_jacquere_untargetable["Aggregate CFD Score"]<4.8)&(avana_jacquere_untargetable["On-Target Efficacy Score"]>0.2)]



Unnamed: 0,Input,Quota,Target Taxon,Target Gene ID,Target Gene Symbol,Target Transcript,Target Alias,CRISPR Mechanism,Target Domain,Reference Sequence,...,Other Target Matches,Aggregate CFD Score,Off-Target CFD100 Hits,Off-Target Tier I CFD100 Hits,On-Target Ruleset,On-Target Efficacy Score,On-Target Rank,Pick Order,Picking Round,Picking Notes
71,ENSG00000230178,1.0,9606,ENSG00000230178,OR4F3,ENST00000456475.1,,CRISPRko,CDS,NC_000005.10,...,"ENSG00000284662 (OR4F16), ENSG00000284733 (OR4...",4.1538,2,2,RS3seq-Chen2013+RS3target,0.2834,49.0,1.0,0.0,BsmBI:5'cgTCTC; Preselected


In [7]:
brunello_jacquere_untargetable=brunello[brunello["Target Gene ID"].isin(jacquere_not_targeted)]
brunello_jacquere_untargetable=brunello_jacquere_untargetable[brunello_jacquere_untargetable["Aggregate CFD Score"]!="MAX"].reset_index(drop=True)
brunello_jacquere_untargetable["Aggregate CFD Score"]=brunello_jacquere_untargetable["Aggregate CFD Score"].astype(float)
brunello_jacquere_untargetable[(brunello_jacquere_untargetable["Aggregate CFD Score"]<4.8)&(brunello_jacquere_untargetable["On-Target Efficacy Score"]>0.2)]



Unnamed: 0,Input,Quota,Target Taxon,Target Gene ID,Target Gene Symbol,Target Transcript,Target Alias,CRISPR Mechanism,Target Domain,Reference Sequence,...,Other Target Matches,Aggregate CFD Score,Off-Target CFD100 Hits,Off-Target Tier I CFD100 Hits,On-Target Ruleset,On-Target Efficacy Score,On-Target Rank,Pick Order,Picking Round,Picking Notes
169,ENSG00000230178,1.0,9606,ENSG00000230178,OR4F3,ENST00000456475.1,,CRISPRko,CDS,NC_000005.10,...,"ENSG00000284662 (OR4F16), ENSG00000284733 (OR4...",4.1538,2,2,RS3seq-Chen2013+RS3target,0.2834,49.0,2.0,0.0,BsmBI:5'cgTCTC; Preselected


Avana and Brunello: for only jacquere untargetable gene targeted (aside from AggCFD>4.8, RS3<0.2), guide has BsmBI site

The rest of the libraries do not target the jacquere untargetable genes with any guides that have RS3>0.2 or Agg CFD <4.8.

In [8]:
hsangerv1_jacquere_untargetable=hsangerv1[hsangerv1["Target Gene ID"].isin(jacquere_not_targeted)]
hsangerv1_jacquere_untargetable=hsangerv1_jacquere_untargetable[hsangerv1_jacquere_untargetable["Aggregate CFD Score"]!="MAX"].reset_index(drop=True)
hsangerv1_jacquere_untargetable["Aggregate CFD Score"]=hsangerv1_jacquere_untargetable["Aggregate CFD Score"].astype(float)
print(len(hsangerv1_jacquere_untargetable[(hsangerv1_jacquere_untargetable["Aggregate CFD Score"]<4.8)&(hsangerv1_jacquere_untargetable["On-Target Efficacy Score"]>0.2)]))

tkov3_jacquere_untargetable=tkov3[tkov3["Target Gene ID"].isin(jacquere_not_targeted)]
tkov3_jacquere_untargetable=tkov3_jacquere_untargetable[tkov3_jacquere_untargetable["Aggregate CFD Score"]!="MAX"].reset_index(drop=True)
tkov3_jacquere_untargetable["Aggregate CFD Score"]=tkov3_jacquere_untargetable["Aggregate CFD Score"].astype(float)
print(len(tkov3_jacquere_untargetable[(tkov3_jacquere_untargetable["Aggregate CFD Score"]<4.8)&(tkov3_jacquere_untargetable["On-Target Efficacy Score"]>0.2)]))

gattinara_jacquere_untargetable=gattinara[gattinara["Target Gene ID"].isin(jacquere_not_targeted)]
gattinara_jacquere_untargetable=gattinara_jacquere_untargetable[gattinara_jacquere_untargetable["Aggregate CFD Score"]!="MAX"].reset_index(drop=True)
gattinara_jacquere_untargetable["Aggregate CFD Score"]=gattinara_jacquere_untargetable["Aggregate CFD Score"].astype(float)
print(len(gattinara_jacquere_untargetable[(gattinara_jacquere_untargetable["Aggregate CFD Score"]<4.8)&(gattinara_jacquere_untargetable["On-Target Efficacy Score"]>0.2)]))

minlibcas9_jacquere_untargetable=minlibcas9[minlibcas9["Target Gene ID"].isin(jacquere_not_targeted)]
minlibcas9_jacquere_untargetable=minlibcas9_jacquere_untargetable[minlibcas9_jacquere_untargetable["Aggregate CFD Score"]!="MAX"].reset_index(drop=True)
minlibcas9_jacquere_untargetable["Aggregate CFD Score"]=minlibcas9_jacquere_untargetable["Aggregate CFD Score"].astype(float)
print(len(minlibcas9_jacquere_untargetable[(minlibcas9_jacquere_untargetable["Aggregate CFD Score"]<4.8)&(minlibcas9_jacquere_untargetable["On-Target Efficacy Score"]>0.2)]))


0
0
0
0
