In [None]:
import igv_notebook
import pandas as pd

In [None]:
prefix = ''
genome_name =  'Anopheles gambiae (PEST)'
reference_fasta = f"{prefix}resources/reference/Anopheles-gambiae-PEST_CHROMOSOMES_AgamP4.fa"
reference_index = f"{prefix}resources/reference/Anopheles-gambiae-PEST_CHROMOSOMES_AgamP4.fa.fai"
path_to_gff3 = f'{prefix}resources/reference/Anopheles-gambiae-PEST_BASEFEATURES_AgamP4.12.gff3'

bed_name = 'AgamDao'
path_to_bed = f"{prefix}resources/AgamDao_wholegenome_snptargets.bed"
region = "2L:24,376,166-24,376,456"

## Exploring amplicon sequencing read data in IGV

In this notebook, we can explore our amplicon sequencing reads in an interactive Jupyter environment. 

In [None]:
metadata_path = f"{prefix}config/metadata.tsv"
if metadata_path.endswith('.xlsx'):
	metadata = pd.read_excel(metadata_path, engine='openpyxl')
elif metadata_path.endswith('.tsv'):
	metadata = pd.read_csv(metadata_path, sep="\t")
elif metadata_path.endswith('.csv'):
	metadata = pd.read_csv(metadata_path, sep=",")
else:
	raise ValueError("Metadata file must be .xlsx or .csv")

samples = metadata['sampleID']

config = {
        "reference":{
            "id": genome_name,
            "name": genome_name,     
            "fastaURL": reference_fasta,
            "indexURL": reference_index,
            "tracks": [
                {
                "name": "Genes",
                "type":"annotation",
                "format": "gff3",
                "url" : path_to_gff3,
                "indexed": False,
                }
            ]
        },
}

In [None]:
igv_notebook.init()

In [None]:
igv_browser = igv_notebook.Browser(config)

for sample in samples[:5]:
    
    bam_path = f"../../results/alignments/{sample}.bam"
    bai_path = bam_path + ".bai"
    
    igv_browser.load_track(
        {
            "name": sample,
            "url": bam_path,
            "indexURL": bai_path,
            "format": "bam",
            "type": "alignment"
        })