# README

This notebook plots the fraction of alignmnets overlapping coding exons 
It takes an output of the following Galaxy workflow: https://usegalaxy.org/u/cartman/w/cdsoverlaps
The output can be found in the following history: https://usegalaxy.org/u/cartman/h/aligners--exons

In [1]:
import pandas as pd
import altair as alt

In [2]:
df = pd.read_csv('https://usegalaxy.org/api/datasets/f9cad7b01a4721354a0eee02f4c4f179/display?to_ext=bed', 
                 sep='\t', 
                 names='chr,count,fg,lz,mm'.split(',')
                )

In [3]:
df['fastga']=df['fg']/df['count']
df['lastz']=df['lz']/df['count']
df['minimap2']=df['mm']/df['count']

In [4]:
df.head()

Unnamed: 0,chr,count,fg,lz,mm,fastga,lastz,minimap2
0,chr1,21736,6193,20272,810,0.284919,0.932646,0.037265
1,chr10,8776,2048,8253,360,0.233364,0.940406,0.041021
2,chr11,11828,4089,11366,515,0.345705,0.96094,0.043541
3,chr12,12031,3257,11263,430,0.270717,0.936165,0.035741
4,chr13,3843,728,3569,148,0.189435,0.928702,0.038512


In [5]:
data = pd.melt(df,id_vars='chr',value_vars=['fastga','lastz','minimap2'])
data['chr'] = data['chr'].astype(str)
data['value'] = data['value'].astype(float)

In [6]:
data.dtypes

chr          object
variable     object
value       float64
dtype: object

In [7]:
data.head()

Unnamed: 0,chr,variable,value
0,chr1,fastga,0.284919
1,chr10,fastga,0.233364
2,chr11,fastga,0.345705
3,chr12,fastga,0.270717
4,chr13,fastga,0.189435


In [8]:
chr_order = []
for chr in '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,M'.split(','):
    chr_order.append('chr'+chr)

In [20]:
alt.Chart(data).mark_circle(size=100).encode(
    x=alt.X('chr:O',sort=chr_order, title=None),
    y=alt.Y('value:Q',title=['Fraction of hg38 exons','in alignments']),
    color=alt.Color('variable:N',sort=['lastz','fastga','minimap2'],title='Aligner'),
).properties(
    width=800,  # Set width of the chart
    height=150,  # Set height of the chart,
)