# Working with the SRA search results

Based on results generated by https://github.com/luizirber/phd/tree/fcb2095209e91f7892f298de96206ac20a0b6634/experiments/wort/sra_search/

In [1]:
from ipywidgets import interactive
import ipywidgets as widgets

## Loading the data

In [2]:
from IPython.display import HTML
import pandas as pd

data = pd.read_csv("jct.csv.gz",
                   sep=",",
                   quotechar="'",
                   header=0,
                   names=["MAG", "metagenome", "containment"])

# Fix names so it's easier to query
data['MAG'] = data['MAG'].str.replace(r"'(?P<id>.*)'", lambda m: m.group("id"))
data['metagenome'] = data['metagenome'].str.replace(r".*/(?P<id>.*).sig.*", lambda m: m.group("id"))

filtered = data

In [3]:
len(filtered["MAG"].unique())

316

In [4]:
len(filtered[filtered['containment'] > 0.5]["MAG"].unique())

305

In [5]:
len(filtered[filtered['containment'] > 0.5]["metagenome"].unique())

3359

In [6]:
filtered = filtered[filtered['containment'] > 0.5]

## Interactive exploration

In [None]:
def update_candidate(candidate): 
    filters = (
      (filtered["MAG"] == candidate) &
      (filtered['containment'] > 0.5)
    )
    
    with_link = filtered.copy()
    with_link["metagenome"] = filtered["metagenome"].apply(
        lambda x: "<a href='https://trace.ncbi.nlm.nih.gov/Traces/sra/?run={}'>{}</a>".format(x, x)
    )

    display(HTML(with_link[filters]
         .sort_values(by="containment", ascending=False)
         .to_html(render_links=True, escape=False,)))
    

candidatepicker = interactive(update_candidate, candidate=widgets.Dropdown(
    options=filtered['MAG'].unique(),
    description='MAG name',
    disabled=False
))
display(candidatepicker)