In [None]:
#!/usr/bin/env python3

"""
A script to plot the frequencies of variants of interest as a heatmap
"""

import sys
sys.path.insert(1, 'workflow/scripts/')
sys.path.insert(2, '../workflow/scripts/') # for ci 

import pandas as pd
import matplotlib
import rnaseqpoptools as rnaseqpop
import plotly.express as px

In [None]:
### Variants of Interest path ###
voi_path = "../../resources/exampleMutations.tsv"
dataset = ""

In [None]:
## Read VOI data
muts = pd.read_csv(voi_path, sep="\t")

## separate contig and pos data and sort 
muts['chrom'] = muts['Location'].str.split(":").str.get(0)
muts['pos'] = muts['Location'].str.split(":").str.get(1).str.split("-").str.get(0)
muts = muts.sort_values(['chrom', 'pos'])


## Run for all samples
df, annot = rnaseqpop.getAlleleFreqTable(muts, "results/variantAnalysis/variantsOfInterest/csvs/{mut}_alleleBalance.csv", var="sample")
rnaseqpop.plotRectangular(df, annot=annot, path="results/variantAnalysis/variantsOfInterest/VOI.heatmapPerSample.svg")
rnaseqpop.plotRectangular(df, annot=annot, path="results/variantAnalysis/variantsOfInterest/VOI.heatmapPerSample.pdf")


## Run for avarage frequencies across treatments
df2, annot2 = rnaseqpop.getAlleleFreqTable(muts, "results/variantAnalysis/variantsOfInterest/csvs/mean_{mut}_alleleBalance.csv", var="treatment", mean_=True)
rnaseqpop.plotRectangular(df2, annot=annot2, path="results/variantAnalysis/variantsOfInterest/VOI.heatmapPerTreatment.svg", xlab="strain")
rnaseqpop.plotRectangular(df2, annot=annot2, path="results/variantAnalysis/variantsOfInterest/VOI.heatmapPerTreatment.pdf", xlab="strain")

# Join both plots
rnaseqpop.plotTwoRectangular(df, annot, df2, annot2, path="results/variantAnalysis/variantsOfInterest/VOI.heatmapBothPlots.svg", ratio='auto')
rnaseqpop.plotTwoRectangular(df, annot, df2, annot2, path="results/variantAnalysis/variantsOfInterest/VOI.heatmapBothPlots.pdf", ratio='auto')

### Variants of Interest 

**Output Directory:** <span style="color:gray;font-weight:bold">*results/variantsOfInterest/*</span>  

**Rules**

<span style="color:gray;font-weight:bold">

* *VariantsOfInterest.smk*
    * mpileupVariantsOfInterest
    * AlleleBalanceVariantsOfInterest

</span>
    
   
**Introduction** 

In some biological systems, it may be useful to perform surveillance on mutations which we are interested in. For example, this could be to detect drug-resistance markers in *Plasmodium* parasites, or insecticide-resistance mutations in the malaria mosquito. 

If provided with a table of user-defined variants of interest, *RNA-Seq-Pop* can calculate allele frequencies at these sites from the aligned read data, and plot the results.


**Results**

In [None]:
fig = px.imshow(
        img=df2,
        zmin=0,
        zmax=1,
        width=400 + (df2.shape[1] * 50),
        height=400 + (df2.shape[0] * 40),
        text_auto=True,
        aspect=1,
        color_continuous_scale="Reds",
        title=f"{dataset} allele frequencies",
    )
fig.update(layout_coloraxis_showscale=False)

fig