# This code creates HCR probe pairs from targets, listed in corresponding .csv file (in_file)

If python environment is not available, create it with this command and select it (kernel):
```
conda create -n hcr_probe_generator -c bioconda biopython numpy=1.23.5 pandas=1.3.5 blast
```

In [2]:
# import modules
from maker37cb_mod import maker
import os
import pandas as pd
from contextlib import redirect_stdout

Define path and filename if input .csv file:

In [3]:
input_csv = "20241121_HCR_probes_Manuel.csv"
in_path = "/home/mstemmer/repos/HCR_probe_generator/targets/"

Define path of output files:

In [4]:
outpath = "/home/mstemmer/repos/HCR_probe_generator/generated_probes/"

In [5]:
# show input csv file with relevant columns
in_file = os.path.join(f'{in_path}{input_csv}')
input_df = pd.read_csv(in_file)
input_df = input_df[['short','gene_name','amplifier','reference','sequence']]
input_df

Unnamed: 0,short,gene_name,amplifier,reference,sequence
0,dr,cfos,B2,Danio_rerio.GRCz11.cdna.all.fa,CCAAAACAGAGAAAAGAGCAGCAGACGAGCAAGGAAATACAAGACC...
1,dr,cfos,B3,Danio_rerio.GRCz11.cdna.all.fa,CCAAAACAGAGAAAAGAGCAGCAGACGAGCAAGGAAATACAAGACC...
2,dr,myo6b,B5,Danio_rerio.GRCz11.cdna.all.fa,GTGCAATGGATGATGGGAAGCCTGTGTGGGCGCCCCACCCAACCGA...
3,dr,eya1,B1,Danio_rerio.GRCz11.cdna.all.fa,TTTTTTTTCTTCCTCAGACACTAGCTAGTCACTGCCTGCAGCAGTA...


Run HCR probe generator over all rows in input_csv file:

In [5]:
for index, row in input_df.iterrows():
    print(f"--> Working on {row['short']}_{row['amplifier']}_{row['gene_name']}")
    outfile = os.path.join(f"{outpath}{row['short']}_{row['amplifier']}_{row['gene_name']}_probes.csv")
    
    with open(os.path.join(f"{outpath}{row['short']}_{row['amplifier']}_{row['gene_name']}_log.txt"), 'w') as f:
        with redirect_stdout(f):
            try:
                pause = 12
                polyAT = 5
                polyCG = 5
                choose = "n"
                BlastProbes = "y"
                dropout = "y"
                show = "y"
                report = "y"
                maxprobe = "y"
                numbr = 0
                db = f"/home/mstemmer/repos/HCR_probe_generator/references/{row['reference']}"
                maker(row['gene_name'],row['sequence'],row['amplifier'],pause,choose,polyAT,polyCG,BlastProbes,db,dropout,show,report,maxprobe,numbr,outfile)
            except IndexError:
                maxprobe="n"
                maker(row['gene_name'],row['sequence'],row['amplifier'],pause,choose,polyAT,polyCG,BlastProbes,db,dropout,show,report,maxprobe,numbr,outfile)
                
        # print(f"Max probes could not be reached for {row['gene_name']}", flush=True)
        # print(f"Trying again without max setting...")

--> Working on dr_B2_cfos
--> Working on dr_B3_cfos
--> Working on dr_B5_myo6b
--> Working on dr_B1_eya1


In [16]:
all_probes_csv = os.path.join(f"{outpath}_all_probes.csv")
all_probes_df = pd.DataFrame({'Pool name': [], 'Sequence': []})

for index, row in input_df.iterrows():
    print(f"--> Fusing {row['gene_name']}")
    
    probes = os.path.join(f"{outpath}{row['short']}_{row['amplifier']}_{row['gene_name']}_probes.csv")

    probes_df = pd.read_csv(probes)
    print(probes_df.shape)
    
    # all_probes_df = 
    all_probes_df = all_probes_df.append(probes_df)
print(all_probes_df)
all_probes_df.to_csv(all_probes_csv, index=None)

--> Fusing cfos
(56, 2)
--> Fusing cfos
(56, 2)
--> Fusing myo6b
(66, 2)
--> Fusing eya1
(66, 2)
           Pool name                                       Sequence
0   B2_cfos_28_Dla12  CCTCGTAAATCCTCATCAaaTTTCAGTACAAAAATAAAAACTCCA
1   B2_cfos_28_Dla12  TTTTTGCAAACAATTCGCAAGTTCAaaATCATCCAGTAAACCGCC
2   B2_cfos_28_Dla12  CCTCGTAAATCCTCATCAaaAAAACTTCATACTTGGACGTCAGAC
3   B2_cfos_28_Dla12  ATTAAAAGAGAGACTACGTTTTCACaaATCATCCAGTAAACCGCC
4   B2_cfos_28_Dla12  CCTCGTAAATCCTCATCAaaTTGAGTAAACAACATTCACACGTTC
..               ...                                            ...
61  B1_eya1_33_Dla12  AAAAAGCAGAAACTTCTCAGAGAGTtaGAAGAGTCTTCCTTTACG
62  B1_eya1_33_Dla12  GAGGAGGGCAGCAAACGGaaAGTCATGGAGCCCCGTGCTGGATCT
63  B1_eya1_33_Dla12  TTTTTCTTTTTCATTTTAAGGGTGAtaGAAGAGTCTTCCTTTACG
64  B1_eya1_33_Dla12  GAGGAGGGCAGCAAACGGaaTTGTCCGATTACTGCTGCAGGCAGT
65  B1_eya1_33_Dla12  GAGACAGACTCCAAGTAAGAACGAGtaGAAGAGTCTTCCTTTACG

[244 rows x 2 columns]
