# Split MF4plus into independent Codebooks

In [2]:
import os

import numpy as np
import pandas as pd

from IPython.display import display

import MERFISH_probe_design.IO.file_io as fio
import MERFISH_probe_design.probe_design.probe_dict as p_d
import MERFISH_probe_design.probe_design.OTTable_dict as ot
import MERFISH_probe_design.probe_design.readout_sequences as rs
import MERFISH_probe_design.probe_design.probe_selection as ps
import MERFISH_probe_design.probe_design.quality_check as qc
from MERFISH_probe_design.probe_design import filters
from MERFISH_probe_design.probe_design import plot
from MERFISH_probe_design.probe_design import primer_design


In [5]:
library_master_folder = r'/lab/solexa_weissman/puzheng/MERFISH_Probes'
print(f"- FISH probe libraries stored in folder: {library_master_folder}")
library_folder = os.path.join(library_master_folder, '4T1_MERFISH', 'MF9_4T1Lung_mf4plus')
print(f"- library will be saved in: {library_folder}")


# The output codebook filename
codebook_file = os.path.join(library_folder, 'codebook.csv')
# Load the codebook
cb_version, cb_name, bit_names, barcode_table = fio.load_merlin_codebook(codebook_file)
#barcode_table.replace({'name':replace_dict}, inplace=True)
#gene_ids = list(kept_gene_df.index)
transcript_ids = set(barcode_table['id'][barcode_table['id'] != '']) # Get the non-blank transcript ids
barcode_table # Let's have a look at the barcode table

- FISH probe libraries stored in folder: /lab/solexa_weissman/puzheng/MERFISH_Probes
- library will be saved in: /lab/solexa_weissman/puzheng/MERFISH_Probes/4T1_MERFISH/MF9_4T1Lung_mf4plus


Unnamed: 0,name,id,barcode_str
0,Trdc,ENSMUST00000196323.2,010010000000010010
1,Ighd,ENSMUST00000194162.6,010000100000000110
2,Klhl14,ENSMUST00000122333.2,000101101000000000
3,Tmem252,ENSMUST00000057243.6,001000000001000110
4,Gpr141,ENSMUST00000065335.3,000000010100100010
...,...,...,...
186,Blank-12,,010010000101000000
187,Blank-13,,000001000010011000
188,Blank-14,,000010011000000100
189,Blank-15,,100000000010110000


## load MF9-mf4plus-base gene list

In [11]:
manual_genes = pd.read_csv(os.path.join(library_folder, 'mf4plus_base.csv'))['Gene'].values
print(manual_genes)

['Chil1' 'Cxcr2' 'Ly6g' 'Asprv1' 'Cd79a' 'Ebf1' 'Ms4a1' 'Fcmr' 'Pax5'
 'Ighd' 'Chst3' 'Cd22' 'Fcer2a' 'Pou2af1' 'Cr2' 'Klhl14' 'C1qb' 'Mrc1'
 'Mmp12' 'Slc7a2' 'Ms4a7' 'Dab2' 'C1qa' 'C1qc' 'Enpp1' 'Arg1' 'Ccl24'
 'Cxcl14' 'Krt79' 'Arhgef37' 'Rassf4' 'Plcb1' 'Ptpro' 'Vcan' 'Ccr2'
 'Tppp3' 'Slc8a1' 'Cxcl3' 'Zbtb7b' 'Ccr4' 'Cd28' 'Cd4' 'Cd247' 'Cpa3'
 'Grm6' 'Gata2' 'Ms4a2' 'Fcer1a' 'Cyp11a1' 'Csrp3' 'Alox15' 'Cd200r3'
 'Klrk1' 'Klrb1c' 'Prdm1' 'Eomes' 'Ncr1' 'Fasl' 'Foxp3' 'Camk4' 'Wnt7b'
 'Fgfbp1' 'Hoxa10' 'Ibsp' 'Fxyd3' 'Fermt1' 'Inava' 'Ankrd1' 'Cldn4'
 'Tmprss11e' 'Robo4' 'Prex2' 'Cyp4b1' 'Sox17' 'Ptprb' 'Cldn5' 'Shank3'
 'Gpihbp1' 'Clec14a' 'Galnt18' 'Tmem252' 'Tspan7' 'Cd8b1' 'Ifng' 'Cd3g'
 'Themis' 'Cd8a' 'Gzmb' 'Ms4a4b' 'Cd96' 'Xcr1' 'Havcr2' 'Batf3' 'Flt3'
 'Itgax' 'Cd300e' 'Fzd2' 'Col6a6' 'Col5a2' 'Fscn1' 'Gpr39' 'Sdc1'
 'Siglech' 'Ccr9' 'Pacsin1' 'Pld4' 'Nes' 'Bcl11b' 'Trdc' 'Stxbp6' 'Htr1b'
 'Myc' 'Msh2' 'Cdca2' 'Rpa2' 'Dscc1' 'Kif2c' 'Nek2' 'Ung' 'Exosc5' 'Foxm1'
 'Ncapd2' 'R

In [22]:
# kept table:
kept_barcode_table = barcode_table.loc[[(_g in manual_genes) or ('Blank-' in _g) for _g in barcode_table['name']]]
# filename
base_codebook_file = os.path.join(library_folder, 'MF9-mf4plus-base_codebook.csv')
print(f"Saving codebook to file: {base_codebook_file}")

fio.write_merlin_codebook(base_codebook_file, cb_version, cb_name, bit_names, 
                          kept_barcode_table['name'].values, 
                          kept_barcode_table['id'].values, 
                          kept_barcode_table['barcode_str'].values)

Saving codebook to file: /lab/solexa_weissman/puzheng/MERFISH_Probes/4T1_MERFISH/MF9_4T1Lung_mf4plus/MF9-mf4plus-base_codebook.csv


In [21]:
kept_barcode_table

Unnamed: 0,name,id,barcode_str
0,Trdc,ENSMUST00000196323.2,010010000000010010
1,Ighd,ENSMUST00000194162.6,010000100000000110
2,Klhl14,ENSMUST00000122333.2,000101101000000000
3,Tmem252,ENSMUST00000057243.6,001000000001000110
5,Dab2,ENSMUST00000080880.12,000010000001011000
...,...,...,...
186,Blank-12,,010010000101000000
187,Blank-13,,000001000010011000
188,Blank-14,,000010011000000100
189,Blank-15,,100000000010110000


array(['Trdc', 'Ighd', 'Klhl14', 'Tmem252', 'Dab2', 'Epcam', 'Msh2',
       'Camk4', 'Themis', 'Aicda', 'Cd200r3', 'Gata2', 'Btla', 'Serpinf1',
       'Myc', 'Ifng', 'Cdh1', 'Cd96', 'Cd8b1', 'Exosc5', 'Cd8a', 'Adgre4',
       'Clec14a', 'Pou2af1', 'Cd274', 'Col5a2', 'Wnt7b', 'Ibsp', 'Htr1b',
       'Prex2', 'Ankrd1', 'Itgb4', 'Cdca2', 'Fcnb', 'Robo4', 'Cd4',
       'Fgf2', 'Foxp3', 'Prdm1', 'Shank3', 'Twist1', 'Krt19', 'Ly6g',
       'Gpihbp1', 'Rpa2', 'Flt3', 'Cyp11a1', 'Cd226', 'Gpr39', 'Stxbp6',
       'Lef1', 'Nes', 'Ms4a4b', 'Ms4a2', 'Foxm1', 'Dscc1', 'Cyp4b1',
       'Bcl11b', 'Vcan', 'Ebf1', 'Zeb1', 'Arhgef37', 'Havcr2', 'Klrk1',
       'Trp63', 'Fgf1', 'Krt79', 'Ptprb', 'Tspan7', 'Sdc1', 'Pacsin1',
       'Galnt18', 'Mrc1', 'Slc8a1', 'Pld4', 'Cd3g', 'Fasl', 'Itgax',
       'Vegfa', 'Kif2c', 'Grm6', 'C1qb', 'C1qc', 'C1qa', 'Batf3', 'Cpa3',
       'Cxcl14', 'Ncr1', 'Enpp1', 'Nek2', 'Ptpro', 'Ms4a1', 'Ms4a7',
       'Tcf7', 'Slc2a1', 'Tmprss11e', 'Cldn5', 'Ung', 'Zbtb7b', 'Slc7a2'