In [87]:
import os
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
from tqdm.auto import tqdm
from time import sleep

In [88]:
def first_trim(fname, name):
    !cutadapt -j0 -m 10 -g "NNNNtgttccgggacgctatgc;min_overlap=3" \
    --action=retain \
    -o {name}_round1.fastq.gz \
    --untrimmed-output {name}_unknown_round1.fastq.gz \
    panda_out/{fname} > logs/{name}_round1.txt
    
def rc_unk(name):
    !seqfu rc {name}_unknown_round1.fastq.gz | pigz > {name}_rc_round1.fastq.gz

def second_trim(name):
    !cutadapt -j0 -m 10 -g "NNNNtgttccgggacgctatgc;min_overlap=3" \
    --action=retain \
    -o {name}_round2.fastq.gz \
    --untrimmed-output {name}_unknown_round2.fastq.gz \
    {name}_rc_round1.fastq.gz > logs/{name}_round2.txt

def concatenate_rounds(name):
    !seqfu cat {name}_round*.fastq.gz | pigz > {name}_cat.fastq.gz    
    
def demultiplex_GR(name):
    !cutadapt -j0 -a "file:refs/green_red.fa" -o {{name}}_{name}.fastq.gz \
    --action=retain {name}_cat.fastq.gz > logs/{name}_demult_GR_report.txt

def demultiplex_in_bc(name):
    for fluor in ["GFP", "DSRed"]:
        !cutadapt -e1 -j0 -g "file:refs/biorep_bc.fa" -o inputs/{{name}}_{fluor+'_'+name}.fastq.gz \
        --action=retain {fluor+'_'+name}.fastq.gz > logs/{name}_demult_inputbc_report.txt
    
def demultiplex_bc(name, plant):
    !cutadapt -e1 -j0 -g "file:refs/biorep_bc.fa" -o {plant}/{{name}}_{name}.fastq.gz \
    --action=retain {name}_cat.fastq.gz > logs/{name}_demult_bc_report.txt
    
def cleanup(name):
    mv_files = sorted([file for file in os.listdir() if name in file])
    for file in mv_files:
        !mv {file} intermediate/
        
def quant_pipeline(fname, name, plant):
    pbar = tqdm(total=5)
    first_trim(fname, name)
    pbar.update(1)
    rc_unk(name)
    pbar.update(1)
    second_trim(name)
    pbar.update(1)
    concatenate_rounds(name)
    pbar.update(1)
    demultiplex_bc(name, plant)
    pbar.update(1)
    cleanup(name)
    pbar.clear()
    pbar.close()
    
def input_pipeline(fname, name):
    pbar = tqdm(total=6)
    first_trim(fname, name)
    pbar.update(1)
    rc_unk(name)
    pbar.update(1)
    second_trim(name)
    pbar.update(1)
    concatenate_rounds(name)
    pbar.update(1)
    demultiplex_GR(name)
    pbar.update(1)
    demultiplex_in_bc(name)
    pbar.update(1)
    cleanup(name)
    pbar.clear()
    pbar.close()

In [89]:
files = os.listdir('panda_out/')
input_files = [file for file in files if "I" in file]
quant_files = [file for file in files if "I" not in file]
tob_files = [file for file in quant_files if file.startswith('T')]
let_files = [file for file in quant_files if file.startswith('L')]

In [86]:
for file in tqdm(input_files):
    input_pipeline(file, file.split('.')[0])

  0%|                                                                          | 0/4 [00:00<?, ?it/s]
  0%|                                                                          | 0/6 [00:00<?, ?it/s][A

Done           00:01:24    13,808,605 reads @   6.1 µs/read;   9.78 M reads/minute



 17%|███████████                                                       | 1/6 [01:24<07:04, 84.86s/it][A
 33%|██████████████████████                                            | 2/6 [01:54<03:28, 52.15s/it][A

Done           00:00:12     6,104,251 reads @   2.0 µs/read;  30.48 M reads/minute



 50%|█████████████████████████████████                                 | 3/6 [02:06<01:41, 33.91s/it][A
 67%|████████████████████████████████████████████                      | 4/6 [02:37<01:05, 32.65s/it][A

Done           00:00:25    13,737,605 reads @   1.8 µs/read;  32.51 M reads/minute



 83%|███████████████████████████████████████████████████████           | 5/6 [03:02<00:30, 30.08s/it][A

Done           00:00:12     7,461,617 reads @   1.7 µs/read;  35.87 M reads/minute
Done           00:00:09     5,711,134 reads @   1.6 µs/read;  36.82 M reads/minute



100%|██████████████████████████████████████████████████████████████████| 6/6 [03:24<00:00, 27.38s/it][A
100%|██████████████████████████████████████████████████████████████████| 6/6 [03:25<00:00, 34.28s/it]
 25%|████████████████▎                                                | 1/4 [03:25<10:17, 205.67s/it]
  0%|                                                                              | 0/6 [00:00<?, ?it/s][A

Done           00:02:10    20,803,802 reads @   6.3 µs/read;   9.53 M reads/minute



 17%|███████████▌                                                         | 1/6 [02:11<10:55, 131.11s/it][A
 33%|███████████████████████▎                                              | 2/6 [02:55<05:20, 80.06s/it][A

Done           00:00:19     9,227,212 reads @   2.1 µs/read;  28.89 M reads/minute



 50%|███████████████████████████████████                                   | 3/6 [03:14<02:37, 52.33s/it][A
 67%|██████████████████████████████████████████████▋                       | 4/6 [04:01<01:40, 50.28s/it][A

Done           00:00:40    20,661,970 reads @   1.9 µs/read;  30.86 M reads/minute



 83%|██████████████████████████████████████████████████████████▎           | 5/6 [04:42<00:46, 46.70s/it][A

Done           00:00:20    11,392,097 reads @   1.8 µs/read;  33.79 M reads/minute
Done           00:00:14     8,334,911 reads @   1.7 µs/read;  34.47 M reads/minute



100%|██████████████████████████████████████████████████████████████████████| 6/6 [05:17<00:00, 42.76s/it][A
100%|██████████████████████████████████████████████████████████████████████| 6/6 [05:18<00:00, 53.06s/it]
 50%|████████████████████████████████▌                                | 2/4 [08:44<09:03, 271.95s/it]
  0%|                                                                          | 0/6 [00:00<?, ?it/s][A

Done           00:01:48    16,649,657 reads @   6.5 µs/read;   9.23 M reads/minute



 17%|██████████▊                                                      | 1/6 [01:48<09:01, 108.35s/it][A
 33%|██████████████████████                                            | 2/6 [02:25<04:25, 66.37s/it][A

Done           00:00:14     7,313,573 reads @   2.0 µs/read;  29.94 M reads/minute



 50%|█████████████████████████████████                                 | 3/6 [02:40<02:08, 42.84s/it][A
 67%|████████████████████████████████████████████                      | 4/6 [03:17<01:21, 40.64s/it][A

Done           00:00:31    16,551,450 reads @   1.9 µs/read;  31.39 M reads/minute



 83%|███████████████████████████████████████████████████████           | 5/6 [03:49<00:37, 37.46s/it][A

Done           00:00:16     9,437,365 reads @   1.7 µs/read;  35.18 M reads/minute
Done           00:00:10     6,333,009 reads @   1.7 µs/read;  35.04 M reads/minute



100%|██████████████████████████████████████████████████████████████████| 6/6 [04:16<00:00, 34.01s/it][A
100%|██████████████████████████████████████████████████████████████████| 6/6 [04:17<00:00, 42.92s/it]
 75%|████████████████████████████████████████████████▊                | 3/4 [13:01<04:25, 265.37s/it]
  0%|                                                                          | 0/6 [00:00<?, ?it/s][A

Done           00:02:24    21,782,564 reads @   6.6 µs/read;   9.07 M reads/minute



 17%|██████████▊                                                      | 1/6 [02:24<12:01, 144.25s/it][A
 33%|██████████████████████                                            | 2/6 [03:10<05:47, 86.80s/it][A

Done           00:00:19     9,561,036 reads @   2.0 µs/read;  30.12 M reads/minute



 50%|█████████████████████████████████                                 | 3/6 [03:30<02:47, 55.94s/it][A
 67%|████████████████████████████████████████████                      | 4/6 [04:19<01:46, 53.33s/it][A

Done           00:00:42    21,667,838 reads @   2.0 µs/read;  30.29 M reads/minute



 83%|███████████████████████████████████████████████████████           | 5/6 [05:02<00:49, 49.64s/it][A

Done           00:00:22    12,529,126 reads @   1.8 µs/read;  32.83 M reads/minute
Done           00:00:15     8,145,777 reads @   1.8 µs/read;  32.45 M reads/minute



100%|██████████████████████████████████████████████████████████████████| 6/6 [05:40<00:00, 45.80s/it][A
100%|██████████████████████████████████████████████████████████████████| 6/6 [05:41<00:00, 56.96s/it]
100%|█████████████████████████████████████████████████████████████████| 4/4 [18:43<00:00, 280.83s/it]


In [90]:
for file in tqdm(tob_files):
    quant_pipeline(file, file.split('.')[0], 'tobacco')

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Done           00:01:56    19,163,080 reads @   6.1 µs/read;   9.87 M reads/minute
Done           00:00:18     9,103,520 reads @   2.0 µs/read;  30.20 M reads/minute
Done           00:00:31    18,841,140 reads @   1.7 µs/read;  35.67 M reads/minute


  0%|          | 0/5 [00:00<?, ?it/s]

Done           00:01:28    13,955,334 reads @   6.4 µs/read;   9.45 M reads/minute
Done           00:00:12     6,135,589 reads @   2.1 µs/read;  28.63 M reads/minute
Done           00:00:23    13,838,252 reads @   1.7 µs/read;  35.13 M reads/minute


  0%|          | 0/5 [00:00<?, ?it/s]

Done           00:01:41    16,093,543 reads @   6.3 µs/read;   9.49 M reads/minute
Done           00:00:14     7,047,286 reads @   2.1 µs/read;  28.29 M reads/minute
Done           00:00:26    15,818,951 reads @   1.7 µs/read;  36.11 M reads/minute


  0%|          | 0/5 [00:00<?, ?it/s]

Done           00:02:45    26,330,564 reads @   6.3 µs/read;   9.56 M reads/minute
Done           00:00:24    12,557,699 reads @   1.9 µs/read;  31.32 M reads/minute
Done           00:00:44    25,705,115 reads @   1.7 µs/read;  35.05 M reads/minute


In [91]:
for file in tqdm(let_files):
    quant_pipeline(file, file.split('.')[0], 'lettuce')

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Done           00:02:49    26,213,684 reads @   6.5 µs/read;   9.29 M reads/minute
Done           00:00:24    12,615,156 reads @   1.9 µs/read;  30.95 M reads/minute
Done           00:00:41    25,745,714 reads @   1.6 µs/read;  36.83 M reads/minute


  0%|          | 0/5 [00:00<?, ?it/s]

Done           00:02:41    25,485,956 reads @   6.3 µs/read;   9.47 M reads/minute
Done           00:00:24    11,989,433 reads @   2.0 µs/read;  29.40 M reads/minute
Done           00:00:39    23,986,472 reads @   1.6 µs/read;  36.83 M reads/minute


  0%|          | 0/5 [00:00<?, ?it/s]

Done           00:01:31    15,015,689 reads @   6.1 µs/read;   9.87 M reads/minute
Done           00:00:14     7,056,673 reads @   2.1 µs/read;  28.55 M reads/minute
Done           00:00:28    14,545,468 reads @   2.0 µs/read;  30.10 M reads/minute


  0%|          | 0/5 [00:00<?, ?it/s]

Done           00:01:34    14,620,377 reads @   6.5 µs/read;   9.24 M reads/minute
Done           00:00:14     6,646,988 reads @   2.1 µs/read;  28.38 M reads/minute
Done           00:00:25    14,198,080 reads @   1.8 µs/read;  33.91 M reads/minute
