In [None]:
#Two options now:
#1. Run Teloseq to get the CSV lowfilter file with the ID from the telomeric reads and filter the bam/fastaq files for those reads. Then perform Minimap2 or Dorado alignment
#2. Directly perform the alignment and visually inspect at IGV. If we just want to check if it has worked or not it might be sufficient for now. 

cd /storage/scratch01/groups/bu/teloseq/ont-pipeline/wf-teloseq/
conda activate wf-teloseq
export TELOSEQ_OUTDIR=/storage/scratch01/groups/bu/teloseq/ont-pipeline/output/IE16_multiplex/barcode03
export TELOSEQ_WORKDIR=/storage/scratch01/groups/bu/teloseq/ont-pipeline/work/IE16_multiplex/barcode03
export NXF_SINGULARITY_CACHEDIR=/storage/scratch01/groups/bu/teloseq/ont-pipeline/singularity/cache
export TELOSEQ_INPUT=/storage/scratch01/groups/bu/teloseq/dorado/output/IE16_Multiplexing/IE16_demultiplex/barcode03.fastq
export TELOSEQ_SAMPLE=/storage/scratch01/groups/bu/teloseq/dorado/output/IE16_Multiplexing/IE16_demultiplex/barcode03.fastq
srun -c 24 --mem=64000 -t 600 --pty bash -c 'nextflow run main.nf -resume -profile singularity,slurm -work-dir $TELOSEQ_WORKDIR --out_dir $TELOSEQ_OUTDIR --fastq $TELOSEQ_INPUT'"
srun -c 24 --mem=64000 -t 600 --pty bash -c 'nextflow run main02.nf -resume -profile singularity,slurm -work-dir $TELOSEQ_WORKDIR --out_dir $TELOSEQ_OUTDIR --fastq $TELOSEQ_INPUT'


In [None]:
##Fichero python para hacer el filtro de los telomeric reads prior to map con minimap2. 

import csv
import pysam
import argparse

def filter_bam(csv_file, input_bam, output_bam):
    # Step 1: Load read IDs from the CSV file
    read_ids = set()  # Use a set for faster lookup
    with open(csv_file, 'r') as f:
        reader = csv.reader(f)
        for row in reader:
            read_ids.add(row[0])  # Assuming the read ID is in the first column

    # Step 2: Open the input BAM file and prepare the output BAM file
    with pysam.AlignmentFile(input_bam, "rb", check_sq=False) as bam_in, \
         pysam.AlignmentFile(output_bam, "wb", header=bam_in.header) as bam_out:

        # Step 3: Iterate through the BAM file and filter reads
        for read in bam_in:
            if read.query_name in read_ids:  # Check if read ID is in the set
                bam_out.write(read)  # Write matching read to the output BAM

if __name__ == "__main__":
    # Set up argument parsing
    parser = argparse.ArgumentParser(description="Filter a BAM file based on read IDs from a CSV file.")
    parser.add_argument("--csv_file", required=True, help="Path to the CSV file containing read IDs (first column).")
    parser.add_argument("--input_bam", required=True, help="Path to the input BAM file.")
    parser.add_argument("--output_bam", required=True, help="Path to the output BAM file for filtered reads.")

    # Parse the arguments
    args = parser.parse_args()

    # Call the filter_bam function with the parsed arguments
    filter_bam(args.csv_file, args.input_bam, args.output_bam)


In [None]:
## Para ejecutar
python filter_bam.py --csv_file path/to/your.csv --input_bam path/to/input.bam --output_bam path/to/output.bam


In [None]:
##Trying to align the barcode0X.bam files with Dorado 
sbatch -p gpu --gres=gpu:A100:1 -t 300 --mem=32G --wrap "/storage/scratch01/users/mespejo/Dorado/bin/dorado aligner <index> <input_read_folder> --output-dir /storage/scratch01/groups/bu/teloseq/ont-pipeline/output/IE11_demultiplex_align\"


In [None]:
##Trying to align with minimap2:
#Pasamos a fastq
samtools fastq -T "*" output.bam > output.fastq

##Run minimap2. It is installed at env wf-teloseq
sbatch -p gpu --gres=gpu:A100:1 -t 3000 --mem=32G --wrap 'minimap2 -ax map-ont -y --MD -Y --eqx --cap-kalloc 1g -t 20 --secondary=no --paf-no-hit /storage/scratch01/groups/bu/teloseq/dimeloseq/reference/hg002v1.1.fasta output.fastq > temp_outputfastq_aligned.sam'

##Minimap2 at wf-teloseq uses much less indications.
minimap2 -ax map-ont -t 20 $ref $fastqFile | samtools sort -@2 -o ${fastqFile.simpleName}.telomere_filter.bam 
samtools index ${fastqFile.simpleName}.telomere_filter.bam  
samtools view -bq 10 -h ${fastqFile.simpleName}.telomere_filter.bam > ${fastqFile.simpleName}.bam    
samtools index ${fastqFile.simpleName}.bam
sbatch -p gpu --gres=gpu:A100:1 -t 300 --mem=32G --wrap "minimap2 -ax map-ont -y --MD -Y --eqx --cap-kalloc 1g -t 20 --secondary=no --paf-no-hit /storage/scratch01/groups/bu/teloseq/dimeloseq/reference/hg002v1.1.fasta /storage/scratch01/groups/bu/teloseq/dorado/output/IE16_Multiplexing/IE16_demultiplex/barcode03_h3k9me3urea.fastq > temp_bobarcode03_h3k9me3urea.sam"

#Minimap con srun
run -c 8 --mem=12000 -t 120 --pty bash -c 'minimap2 -ax map-ont -y --MD -Y --eqx --cap-kalloc 1g -t 20 --secondary=no --paf-no-hit /storage/scratch01/groups/bu/teloseq/dimeloseq/reference/hg002v1.1.fasta IE10_DiMeLoseq_H3K27me3_Teloseq_supv56mAall.fastq > temp_aligned.sam"


In [None]:
#Generamos ahora el BAM a partir del SAM
run -c 8 --mem=12000 -t 120 --pty bash -c 'minimap2 -ax map-ont -y --MD -Y --eqx --cap-kalloc 1g -t 20 --secondary=no --paf-no-hit /storage/scratch01/groups/bu/teloseq/dimeloseq/reference/hg002v1.1.fasta IE10_DiMeLoseq_H3K27me3_Teloseq_supv56mAall.fastq > temp_aligned.sam"
samtools sort -m 2G -O BAM -@ 4 temp_fastq_aligned.sam > output_aligne.bam
samtools index IE10_DiMeLoseq_H3K27me3_Teloseq_6mA_aligned_filtered.bam
##Now we should need to run Teloseq to get the ID from the telomeric reads, as we dont know if any of the reads that we have is complete telomere or not
##Filter fastaq files after Dorado by the ID of the CSV lowfilter file generated after wf-teloseq. 

In [None]:
minimap2 -ax map-ont -y -Y --MD -t $task.cpus --secondary=no --sam-hit-only --eqx  mapping_reference.fasta reads.fastq | samtools sort -o telomere.bam
samtools index telomere.bam
samtools view -bq ${params.mapq} -h telomere.bam > "telomere.q${params.mapq}.bam"
samtools index "telomere.q${params.mapq}.bam"

run -c 8 --mem=12000 -t 120 --pty bash -c 'minimap2 -ax map-ont -y --MD -Y --eqx --cap-kalloc 1g -t 20 --secondary=no --paf-no-hit mapping_reference.fasta  reads.fastq > temp_aligned.sam"
samtools sort -m 2G -O BAM -@ 4 temp_fastq_aligned.sam > output_aligne.bam
samtools index IE10_DiMeLoseq_H3K27me3_Teloseq_6mA_aligned_filtered.bam