# This code creates HCR probe pairs from targets, listed in corresponding .csv file

If python environment is not available, create it with this command and select it (kernel):
```
conda create -n hcr_probe_generator -c bioconda biopython numpy=1.23.5 pandas=1.3.5 blast openpyxl
```

In [1]:
# import modules
from maker37cb_mod import maker
import os
import pandas as pd
from contextlib import redirect_stdout

Define project name (name of your input csv file) and make sure file paths are correct:

In [2]:
project_name = "20241219_HCR_probes_Martina"

in_path = "/home/mstemmer/repos/HCR_probe_generator/targets/"
out_path = "/home/mstemmer/repos/HCR_probe_generator/generated_probes/"
reference_path= "/home/mstemmer/repos/HCR_probe_generator/references/"

Place your input.csv file into the targets folder (same name as project) \
The reference file (into reference folder) should be an unpacked .fa file, specified also in the input .csv file. \
--> See example_input.csv file for required headers/columns! 

Required headers (all others columns will be ignored): 
```
'short','gene_name','amplifier','reference','sequence'
```
'short': abbreviated species name (e.g. 'dr' for Danio rerio)

Sort out file structures \
Check, if input file is correct!

In [3]:
input_csv = f"{project_name}.csv"
os.makedirs(f'{out_path}{project_name}', exist_ok=True)
out_project = os.path.join(f'{out_path}{project_name}')

# output folder to your generated HCR probes
print(f'You will find your HCR probes here: {out_project}')

# show input csv file with relevant columns
in_file = os.path.join(f'{in_path}{input_csv}')
input_df = pd.read_csv(in_file)
input_df = input_df[['short','gene_name','amplifier','reference','sequence']]
print()
print(f'All correct in the input csv file?')
input_df

You will find your HCR probes here: /home/mstemmer/repos/HCR_probe_generator/generated_probes/20241219_HCR_probes_Martina

All correct in the input csv file?


Unnamed: 0,short,gene_name,amplifier,reference,sequence
0,gg,calbindin1,B1,GCF_016699485.2_bGalGal1.mat.broiler.GRCg7b_rn...,GGGAGGAGCCCTCGGAGTCGTCGTCGCCGCAGCTGCCGCCGTCGCC...
1,gg,vimentin,B2,GCF_016699485.2_bGalGal1.mat.broiler.GRCg7b_rn...,GCTCTTCTTCGCCCGCCGCGCTCCGAGCCCCGCTCCGCTCCCGGAT...
2,gg,gnat3,B3,GCF_016699485.2_bGalGal1.mat.broiler.GRCg7b_rn...,AGCCAGCACAGTTCATGTACCTCACAGTGAGCCTGAGATTTTCACA...
3,gg,trpm5,B5,GCF_016699485.2_bGalGal1.mat.broiler.GRCg7b_rn...,AGGAAAAGAGCAGCATCAAAAAACGTTGGTGACAGCAGTCACTTCA...
4,mm,calbindin1,B1,GCF_000001635.27_GRCm39_rna.fna,GGAGAACTCCGGAGGACGCCCGAACGGAGCAGCACCGCGGACAGCG...
5,mm,vimentin,B2,GCF_000001635.27_GRCm39_rna.fna,CTCTGCCACTCTTGCTCCGGGACCCCAGAGACCCCAGCGCTCCTAC...
6,mm,gnat3,B3,GCF_000001635.27_GRCm39_rna.fna,GCTGCCTGTTGTAGCGAGCACCGCTCATATGTCCTATATCTAAACT...
7,mm,trpm5,B5,GCF_000001635.27_GRCm39_rna.fna,TCTAACACTCAGCACCAGCAGCTACATGCCAGGAATCTGGAAGGAA...
8,sc,calbindin1,B1,GCF_022539315.1_serCan2020_rna.fna,GGGTTCATGGACGGGAAGGAGCTACAAAACTTCATCCAGGAGCTGC...
9,sc,vimentin,B2,GCF_022539315.1_serCan2020_rna.fna,ATGAGCATCAGCACGAAAAACTCCTCGTACCGGCGCATGTTCGGCG...


Run HCR probe generator over all rows in input_csv file. \
Code will try to generate 33 pairs for each target. If that can't be reached, the generator will re-run without trying to reach that maximum.

In [4]:
for index, row in input_df.iterrows():
    print(f"--> Working on {row['short']}_{row['gene_name']}_{row['amplifier']}")
    outfile = os.path.join(f"{out_project}/{row['short']}_{row['gene_name']}_{row['amplifier']}_probes.csv")
    
    with open(os.path.join(f"{out_project}/{row['short']}_{row['gene_name']}_{row['amplifier']}_log.txt"), 'w') as f:
        with redirect_stdout(f):
            try:
                pause = 12
                polyAT = 5
                polyCG = 5
                choose = "n"
                BlastProbes = "y"
                dropout = "y"
                show = "y"
                report = "y"
                maxprobe = "y"
                numbr = 0
                db = f"{reference_path}/{row['reference']}"
                maker(row['gene_name'],row['sequence'],row['amplifier'],pause,choose,polyAT,polyCG,BlastProbes,db,dropout,show,report,maxprobe,numbr,outfile)
            except IndexError:
                maxprobe="n"
                maker(row['gene_name'],row['sequence'],row['amplifier'],pause,choose,polyAT,polyCG,BlastProbes,db,dropout,show,report,maxprobe,numbr,outfile)


# Rename probe pools & fuse all probes into single .csv and .xlsx file for IDT order
print('Fusing probes...')
all_probes = os.path.join(f"{out_project}/{project_name}_all_probes")
all_probes_df = pd.DataFrame({'Pool name': [], 'Sequence': []})

for index, row in input_df.iterrows():
    print(f"--> Fusing {row['short']}_{row['gene_name']}_{row['amplifier']}")
    
    probes = os.path.join(f"{out_project}/{row['short']}_{row['gene_name']}_{row['amplifier']}_probes.csv")

    probes_df = pd.read_csv(probes)
    print(probes_df.shape)

    new_pool_name = f"{row['short']}_{row['gene_name']}_{row['amplifier']}_{index}"
    old_pool_name = probes_df['Pool name'][1]

    probes_df['Pool name'].replace(old_pool_name, new_pool_name, inplace=True)
    probes_df.to_csv(f"{out_project}/{row['short']}_{row['gene_name']}_{row['amplifier']}_probes.csv", index=None)
    
    all_probes_df = all_probes_df.append(probes_df)
all_probes_df.to_csv(f'{all_probes}.csv', index=None)
all_probes_df.to_excel(f'{all_probes}.xlsx', index=None)


--> Working on gg_calbindin1_B1
--> Working on gg_vimentin_B2
--> Working on gg_gnat3_B3
--> Working on gg_trpm5_B5
--> Working on mm_calbindin1_B1
--> Working on mm_vimentin_B2
--> Working on mm_gnat3_B3
--> Working on mm_trpm5_B5
--> Working on sc_calbindin1_B1
--> Working on sc_vimentin_B2
--> Working on sc_gnat3_B3
--> Working on sc_trpm5_B5
--> Working on tg_calbindin1_B1
--> Working on tg_vimentin_B2
--> Working on tg_gnat3_B3
--> Working on tg_trpm5_B5
--> Working on mm_cd166_B2
--> Working on gg_cd166_B2
--> Working on sc_cd166_B2
--> Working on tg_cd166_B2
--> Working on mm_aqp5_B3
--> Working on gg_aqp5_B3
--> Working on sc_aqp5_B3
--> Working on tg_aqp5_B3
--> Working on mm_cdh1_B1
--> Working on sc_cdh1_B1
--> Working on tg_cdh1_B1
--> Working on mm_amy1_B5
Fusing probes...
--> Fusing gg_calbindin1_B1
(66, 2)
--> Fusing gg_vimentin_B2
(66, 2)
--> Fusing gg_gnat3_B3
(66, 2)
--> Fusing gg_trpm5_B5
(66, 2)
--> Fusing mm_calbindin1_B1
(66, 2)
--> Fusing mm_vimentin_B2
(60, 2)
-