In [1]:
import sys
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import plotly.express as px
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

In [2]:
if "/home/alex/qi3/hmmtuf" not in sys.path:
    sys.path.append("/home/alex/qi3/hmmtuf")

In [3]:
from db.sqlite3_db_connector import SQLiteDBConnector
from compute_engine.src.utils import count_kmers
from compute_engine.src.utils import INFO
from compute_engine.src.cpf import map_seq_to_category


In [4]:
# connect to the DB
db_connector = SQLiteDBConnector(db_file="../../play_ground.sqlite3")

In [5]:
# check the table names
tbl_names = db_connector.get_table_names()

for name in tbl_names:
    print("{0} DB table: {1}".format(INFO, name))

INFO: DB table: distance_metric_type
INFO: DB table: distance_sequence_type
INFO: DB table: repeats
INFO: DB table: hmm_state_types
INFO: DB table: repeats_distances
INFO: DB table: repeats_info
INFO: DB table: gquads_info


In [6]:
sql = "SELECT * from distance_sequence_type"
rows = db_connector.fetch_all(sql=sql)

for item in rows:
    print("{0} Sequence type {1}".format(INFO, item))

INFO: Sequence type (1, 'NORMAL')
INFO: Sequence type (2, 'PURINE')
INFO: Sequence type (3, 'AMINO')
INFO: Sequence type (4, 'WEAK_HYDROGEN')


In [7]:
uique_seq_types = [item for item in rows]

In [8]:
sql = "SELECT * from distance_metric_type"
rows = db_connector.fetch_all(sql=sql)

for item in rows:
    print("{0} Metric type {1}".format(INFO, item))

INFO: Metric type (1, 'Hamming', 'ham')
INFO: Metric type (2, 'MLIPNS', 'mlipns')
INFO: Metric type (3, 'Levenshtein', 'lev')
INFO: Metric type (4, 'DamerauLevenshtein', 'damlev')
INFO: Metric type (5, 'JaroWinkler', 'jwink')
INFO: Metric type (6, 'StrCmp95', 'str')
INFO: Metric type (7, 'NeedlemanWunsch', 'nw')
INFO: Metric type (8, 'SmithWaterman', 'sw')
INFO: Metric type (9, 'Gotoh', 'got')
INFO: Metric type (10, 'Jaccard', 'jac')
INFO: Metric type (11, 'Sorensen', 'sor')
INFO: Metric type (12, 'Tversky', 'tve')
INFO: Metric type (13, 'Overlap', 'ov')
INFO: Metric type (14, 'Tanimoto', 'tan')
INFO: Metric type (15, 'Cosine', 'cos')
INFO: Metric type (16, 'MongeElkan', 'mon')
INFO: Metric type (17, 'Bag', 'bag')
INFO: Metric type (18, 'LCSSeq', 'lcsseq')
INFO: Metric type (19, 'LCSStr', 'lcsstr')
INFO: Metric type (20, 'RatcliffObershelp', 'rat')
INFO: Metric type (21, 'ArithNCD', 'ari')
INFO: Metric type (22, 'RLENCD', 'rle')
INFO: Metric type (23, 'BWTRLENCD', 'bwt')
INFO: Metric t

In [9]:
uique_dist_types = [item for item in rows]

In [10]:
sql = "SELECT * from hmm_state_types"
rows = db_connector.fetch_all(sql=sql)

for item in rows:
    print("{0} State type {1}".format(INFO, item))

INFO: State type (1, 'NORMAL')
INFO: State type (2, 'TUF')
INFO: State type (3, 'DELETION')
INFO: State type (4, 'DUPLICATION')


In [11]:
unique_state_types = [item for item in rows]

In [12]:
# the application
app = JupyterDash(__name__)


In [13]:
print(uique_seq_types)

[(1, 'NORMAL'), (2, 'PURINE'), (3, 'AMINO'), (4, 'WEAK_HYDROGEN')]


In [14]:
app.layout = html.Div([
    html.H1("Distances Plot"),
    html.H3("Sequence type"),
    dcc.Dropdown(
        id="dropdown-sequence",
        options=[{"label": x[1], "value": x[0]} for x in uique_seq_types],
        value=uique_seq_types[0][0],
        clearable=False,
    ),
    html.H3("State type"),
    dcc.Dropdown(
        id="dropdown-state",
        options=[{"label": x[1], "value": x[0]} for x in unique_state_types],
        value=unique_state_types[0][0],
        clearable=False,
    ),
    html.H3("Distance type"),
    dcc.Dropdown(
        id="dropdown-distance",
        options=[{"label": x[1], "value": x[0]} for x in uique_dist_types],
        value=uique_dist_types[0][0],
        clearable=False,
    ),
   
    dcc.Graph(id="bar-chart"),
])

In [15]:
@app.callback(
    Output("bar-chart", "figure"), 
    [Input("dropdown-sequence", "value"), 
     Input("dropdown-state", "value"),
     Input("dropdown-distance", "value")])
def update_bar_chart(seq_type, state_type, distance_type):
    
    metric_type_id = distance_type 
    
    """
    for item in uique_dist_types:
        if  item[1] == distance_type:
            metric_type_id = item[0]
            break
    """
            
    hmm_state_id = state_type
    
    """
    for item in unique_state_types:
        if  item[1] == state_type:
            hmm_state_id = item[0]
            break
    """
            
    sequence_type_id = seq_type
    
    """
    for item in uique_seq_types:
        if  item[1] == seq_type:
            sequence_type_id = item[0]
            break
    """
    
    sql = "SELECT value FROM repeats_distances WHERE \
     hmm_state_id_1 = {0} AND  hmm_state_id_2 = {1} \
    AND metric_type_id={2} AND sequence_type_id={3}".format(hmm_state_id, 
                                                            hmm_state_id, 
                                                            metric_type_id,
                                                            sequence_type_id)
    
    print(sql)
    rows = db_connector.fetch_all(sql=sql)
    
    print(len(rows))
    
    counts, bins = np.histogram(rows, bins=35)
    bins = 0.5 * (bins[:-1] + bins[1:])
    
    fig = px.bar(x=bins, y=counts, orientation='v', labels={'x':'distance', 'y':'count'}, range_x=[0,1])
    #fig = px.histogram(df[mask], x="Distance", color="Distance", 
    #                   orientation='v', nbins=45, range_x=[0, 1])
    
    fig.update_layout(
    xaxis = dict(
        tickmode = 'linear',
        tick0 = 0.0,
        dtick = 0.15
    )
   )
    
    return fig

In [16]:
# Run app and display result inline in the notebook
app.run_server(mode='external', use_reloader=False)

Dash app running on http://127.0.0.1:8050/
SELECT value FROM repeats_distances WHERE      hmm_state_id_1 = 1 AND  hmm_state_id_2 = 1     AND metric_type_id=1 AND sequence_type_id=1
189544
SELECT value FROM repeats_distances WHERE      hmm_state_id_1 = 1 AND  hmm_state_id_2 = 1     AND metric_type_id=1 AND sequence_type_id=2
189544
