# [SAM file format](https://www.metagenomics.wiki/tools/samtools/bam-sam-file-format)
```python
%%bash
samtools --help
```

# Minimap2 - [Manual Page](https://lh3.github.io/minimap2/minimap2.html)

```python
%%bash
minimap2=/v/scratch/tools/minimap2/minimap2
$minimap2 --help
```

---
# Create SAM files from `timin0421` data

In [None]:
%%bash
BAM_PATH='/v/projects/nanopore/agnes/signal_visualization/data/timin0421/pass/basecalls.bam'
SORTED_BAM_PATH='/v/projects/nanopore/balazs/data/thymine_0421/sorted_basecalls.bam'

samtools sort -n "$BAM_PATH" -o "$SORTED_BAM_PATH"

In [None]:
import pysam
bam_path='/v/projects/nanopore/balazs/data/thymine_0421/sorted_basecalls.bam'
fastq_path='/v/projects/nanopore/balazs/data/thymine_0421/basecalls.fastq'

bam_file = pysam.AlignmentFile(bam_path, "rb", check_sq=False)
with open(fastq_path, "w") as fq_out:
    for read in bam_file:
        if read.is_unmapped:
            # Get the tags (it returns a list of tuples)
            tags = read.get_tags()
            # Convert tags to a string for the FASTQ header
            metadata = " ".join(f"{tag}:{value}" for tag, value in tags)
            # Write the read to FASTQ format with metadata in the header
            fq_out.write(f"@{read.query_name} {metadata}\n")
            fq_out.write(f"{read.query_sequence}\n")
            fq_out.write("+\n")
            fq_out.write(f"{read.qual}\n")

In [None]:
%%bash
minimap2=/v/scratch/tools/minimap2/minimap2

REF_SEQUENCE="/v/volumes/nanopore/ref/p300_ID3.fasta"
FASTQ="/v/projects/nanopore/balazs/data/thymine_0421/basecalls.fastq"
SAM="/v/projects/nanopore/balazs/data/thymine_0421/mapped_basecalls.sam"

$minimap2 -ax map-ont -y -t 8 "$REF_SEQUENCE" "$FASTQ" > "$SAM"

rm "$FASTQ" # size ~200MB
rm "/v/projects/nanopore/balazs/data/thymine_0421/sorted_basecalls.bam"

In [None]:
%%bash
SAM="/v/projects/nanopore/balazs/data/thymine_0421/mapped_basecalls.sam"
head -n 4 "$SAM"

### Old Version for all bam files
```python
%%bash
minimap2=/v/projects/nanopore/tools/minimap2/minimap2-2.24_x64-linux/minimap2

# Define paths
BAM_FILES="/v/projects/nanopore/agnes/signal_visualization/data/timin0421/pass/"
REF_GENOME="/v/volumes/nanopore/ref/p300_ID3.fasta"
SAM_DIR="/v/projects/nanopore/balazs/data/thymine_0421_sam_position/"

# Ensure the output directory exists
mkdir -p $SAM_DIR

# Iterate through BAM files
for BAM in ${BAM_FILES}*.bam; do
    # Extract the base name
    BAM_BASE=$(basename "$BAM" .bam)

    # Generate SAM path
    SAM_SORTED="${SAM_DIR}${BAM_BASE}_temp.sam"

    # Convert BAM to FASTQ
    samtools fastq "$BAM" > "${SAM_DIR}${BAM_BASE}.fastq"

    # Align reads using minimap2 
    $minimap2 -ax map-ont "$REF_GENOME" "${SAM_DIR}${BAM_BASE}.fastq" > "$SAM_SORTED"

    # Sort SAM file
    samtools sort -n  "$SAM_SORTED" -o "${SAM_DIR}${BAM_BASE}.sam" ## use 'sort -n' for sort by read id


    rm "$SAM_SORTED"
    rm "${SAM_DIR}${BAM_BASE}.fastq"

done
```

# Create SAM files from `uracil0504` data

In [None]:
%%bash
BAM_PATH='/v/projects/nanopore/agnes/signal_visualization/uracil0504/guppy/pass/basecalls.bam'
SORTED_BAM_PATH='/v/projects/nanopore/balazs/data/uracil_0504/sorted_basecalls.bam'

samtools sort -n "$BAM_PATH" -o "$SORTED_BAM_PATH"

In [None]:
import pysam
bam_path='/v/projects/nanopore/balazs/data/uracil_0504/sorted_basecalls.bam'
fastq_path='/v/projects/nanopore/balazs/data/uracil_0504/sorted_basecalls.fastq'

bam_file = pysam.AlignmentFile(bam_path, "rb", check_sq=False)
with open(fastq_path, "w") as fq_out:
    for read in bam_file:
        if read.is_unmapped:
            # Get the tags (it returns a list of tuples)
            tags = read.get_tags()
            # Convert tags to a string for the FASTQ header
            metadata = " ".join(f"{tag}:{value}" for tag, value in tags)
            # Write the read to FASTQ format with metadata in the header
            fq_out.write(f"@{read.query_name} {metadata}\n")
            fq_out.write(f"{read.query_sequence}\n")
            fq_out.write("+\n")
            fq_out.write(f"{read.qual}\n")

In [None]:
%%bash
minimap2=/v/scratch/tools/minimap2/minimap2

REF_SEQUENCE="/v/volumes/nanopore/ref/p300_ID3.fasta"
FASTQ="/v/projects/nanopore/balazs/data/uracil_0504/sorted_basecalls.fastq"
SAM="/v/projects/nanopore/balazs/data/uracil_0504/mapped_basecalls.sam"

$minimap2 -ax map-ont -y -t 8 "$REF_SEQUENCE" "$FASTQ" > "$SAM"

rm "$FASTQ"
rm "/v/projects/nanopore/balazs/data/uracil_0504/sorted_basecalls.bam"

In [None]:
%%bash
SAM="/v/projects/nanopore/balazs/data/uracil_0504/mapped_basecalls.sam"
head -n 4 "$SAM"

```python
%%bash
minimap2=/v/projects/nanopore/tools/minimap2/minimap2-2.24_x64-linux/minimap2

# Define paths
BAM_FILES="/v/projects/nanopore/agnes/signal_visualization/uracil0504/guppy/pass/"
REF_SEQUENCE="/v/volumes/nanopore/ref/p300_ID3.fasta"
SAM_DIR="/v/projects/nanopore/balazs/data/uracil_0405_sam_position/"

# Ensure the output directory exists
mkdir -p $SAM_DIR

# Iterate through BAM files
for BAM in ${BAM_FILES}*.bam; do
    # Extract the base name
    BAM_BASE=$(basename "$BAM" .bam)

    # Generate SAM path
    SAM_SORTED="${SAM_DIR}${BAM_BASE}_temp.sam"

    # Convert BAM to FASTQ
    samtools fastq "$BAM" > "${SAM_DIR}${BAM_BASE}.fastq"

    # Align reads using minimap2 
    $minimap2 -ax map-ont "$REF_SEQUENCE" "${SAM_DIR}${BAM_BASE}.fastq" > "$SAM_SORTED"

    # Sort BAM file
    samtools sort -n "$SAM_SORTED" -o "${SAM_DIR}${BAM_BASE}.sam"

    rm "$SAM_SORTED"
    rm "${SAM_DIR}${BAM_BASE}.fastq"

done

```