# Generate CSN Complex (COP9 Signalsome) Libraries

In [1]:
import be_scan
import warnings
warnings.filterwarnings('ignore')
import pandas as pd

In [2]:
gene_dir = '../../../../../../Downloads/CSN/new_Genes/'
protein_dir = '../../../../../../Downloads/CSN/new_Proteins/'
libraries_dir = '../../../../../../Downloads/CSN/new_Libraries/'

subunits = ['1', '2', '3', '4', '5', '6', '7A', '7B', '8']

In [3]:
cop9 = [('CAND1', gene_dir+'CAND1_Input.fasta', protein_dir+'CAND1_Protein.fasta'), 
        ('COPS1', gene_dir+'GSP1_Input.fasta', protein_dir+'GSP1_Protein.fasta')]
for s in subunits[1:]: 
    cop9.append((f"COPS{s}", gene_dir+f"COPS{s}_Input.fasta", protein_dir+f"COPS{s}_Protein.fasta"))

editors = [("A", "G"), 
           ("C", "T",)
           ]

In [4]:
# for each protein (gene, protein)
for x in cop9: 
    name, gene, protein = x[0], x[1], x[2]
    # for each ABE CBE
    for edit in editors: 
        e1, e2 = edit[0], edit[1]
        be_scan.sgrna.generate_BE_guides(
                gene_filepath = gene, 
                cas_type      = "SpG", 
                edit_from     = e1, 
                edit_to       = e2,
                gene_name     = name,
                output_name   = '_'.join([name, e1+'to'+e2, 'guides.csv']),
                output_dir    = libraries_dir,
                )
        be_scan.sgrna.annotate_guides(
                guides_file=libraries_dir+'_'.join([name, e1+'to'+e2, 'guides.csv']), 
                gene_filepath='', 
                protein_filepath=protein, 
                edit_from     = e1, 
                edit_to       = e2,
                output_name   = '_'.join([name, e1+'to'+e2, 'annotated_guides.csv']),
                output_dir    = libraries_dir,
                )
    be_scan.sgrna.merge_guide_df(
        guide_df1_filepath = libraries_dir+'_'.join([name, 'AtoG_annotated_guides.csv']),
        guide_df2_filepath = libraries_dir+'_'.join([name, 'CtoT_annotated_guides.csv']),
        output_name = '_'.join([name, 'annotated_guides.csv']),
        output_dir = libraries_dir,
    )
    be_scan.sgrna.annotate_guides(
        guides_file      = libraries_dir+'_'.join([name, 'annotated_guides.csv']), 
        gene_filepath    = '', 
        protein_filepath = protein,
        edit_from        = 'AC', 
        edit_to          = 'GT',
        output_dir       = libraries_dir,
        output_name      = '_'.join([name, 'annotated_guides.csv']),
        )
    

Create gene object from ../../../../../../Downloads/CSN/new_Genes/CAND1_Input.fasta
Parsing exons: 15 exons found
Preprocessing sucessful
Guides generated and duplicates removed
Guides annotated
Create gene object from ../../../../../../Downloads/CSN/new_Genes/CAND1_Input.fasta
Parsing exons: 15 exons found
Preprocessing sucessful
Guides generated and duplicates removed
Guides annotated
Guides annotated
Create gene object from ../../../../../../Downloads/CSN/new_Genes/GSP1_Input.fasta
Parsing exons: 13 exons found
Preprocessing sucessful
Guides generated and duplicates removed
Guides annotated
Create gene object from ../../../../../../Downloads/CSN/new_Genes/GSP1_Input.fasta
Parsing exons: 13 exons found
Preprocessing sucessful
Guides generated and duplicates removed
Guides annotated
Guides annotated
Create gene object from ../../../../../../Downloads/CSN/new_Genes/COPS2_Input.fasta
Parsing exons: 13 exons found
Preprocessing sucessful
Guides generated and duplicates removed
Guides ann

In [5]:
# add dataframes together, check against reference genome
cops = []
for name, gene, protein in cop9: 
    cops.append(pd.read_csv(libraries_dir+f"{name}_annotated_guides.csv"))

csn_lib = pd.concat(cops)
csn_lib['sgRNA_seq'] = csn_lib['sgRNA_seq'].apply(lambda x: x[:20])
csn_lib.to_csv(libraries_dir+'CSN_Library.csv')


In [6]:
HSK_lib = pd.read_csv('../../../../../../Downloads/CSN/'+'HSK_CSN_lib.csv')

# merged = HSK_lib.merge(csn_lib, how='outer', on='sgRNA_seq')
merged = pd.merge(HSK_lib, csn_lib, left_on=HSK_lib["sgRNA_seq"].str.lower(), right_on=csn_lib["sgRNA_seq"].str.lower(), how="outer")
merged.to_csv('check.csv')