# Setup
Imports and helper functions to access API and draw widgets.

In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
import requests
import pandas as pd
from StringIO import StringIO

import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
from ipykernel.pylab.backend_inline import flush_figures
from sqlalchemy import create_engine

base_url = 'http://localhost:8088/api/local/local'
query_base_path = '/queries/'
#engine = create_engine('postgresql://postgres:postgres@postgresdb:5432/thunder')
engine = create_engine("virtuoso://dba:dba@VOS")

def callAPI(query, params={}, parse_dates=None):
    headers = {"Accept": "text/csv"}
    query_url = base_url + query
    
    resp = requests.get(query_url, headers=headers,  params=params)
    # boldly assuming response was code 200
    df = pd.read_csv(StringIO(resp.text), parse_dates=parse_dates)
    return df

def getSparqlResult(name, parse_dates=None):
    df = callAPI('/' + name, params={}, parse_dates=parse_dates)
    return df

def getSqlQuery(name):
    with open(query_base_path+name+'.sql') as sqlFile:
        sqlQuery = sqlFile.read();
    return sqlQuery

def getSqlResult(name, parse_dates=None):
    query = getSqlQuery(name)
    return pd.read_sql_query(query, engine, parse_dates=parse_dates)

def getSqlResultByQuery(query):
    return pd.read_sql_query(query, engine)

def getDatasets(name, sqlIdColumn='id', sparqlIdColumn='id', sqlDateColumns=None, sparqlDateColumns=None):
    sql = getSqlResult(name, parse_dates=sqlDateColumns)
    sql = sql.set_index(sqlIdColumn)
    
    sparql = getSparqlResult(name, parse_dates=sparqlDateColumns)
    sparql = sparql.set_index(sparqlIdColumn)
    
    return sql, sparql

INFO:rdflib:RDFLib Version: 4.2.1
INFO:virtuoso.vstore:monkey-patched rdflib.plugins.sparql.processor.prepareQuery


In [3]:
conn = engine.connect()

# Widgets

In [4]:
sqlResult, sparqlResult = getDatasets('birthDates', sqlIdColumn='ID', sparqlIdColumn='person',
                                      sqlDateColumns=['Date_of_birth'], sparqlDateColumns=['birthDate'])

INFO:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): localhost


In [5]:
print("SQL Columns: ", sqlResult.columns)
print("RDF Columns: ", sparqlResult.columns)

('SQL Columns: ', Index([u'COL0', u'Idfu', u'Geb_Datum', u'Date_Intake', u'Institute', u'Gender',
       u'cT', u'cN', u'Date_of_birth', u'Age', u'FF', u'LF', u'TotalDose',
       u'OK', u'Ok_type', u'pT_stage', u'pN_stage', u'TRG__1_4_', u'Date_OK',
       u'WHO'],
      dtype='object'))
('RDF Columns: ', Index([u'name', u'birthDate'], dtype='object'))


In [13]:
def drawGraph(data, x, y, kind='hist'):
    data.plot(x=x, y=y, kind=kind, figsize=(6, 6))
    flush_figures()

In [7]:
sparqlResult['birthDay'] = sparqlResult.loc[:, 'birthDate'].apply(lambda(x):x.day)
sparqlResult['birthMonth'] = sparqlResult.loc[:, 'birthDate'].apply(lambda(x):x.month)
sparqlResult['birthYear'] = sparqlResult.loc[:, 'birthDate'].apply(lambda(x):x.year)

sqlResult['birthDay'] = sqlResult.loc[:, 'Date_of_birth'].apply(lambda(x):x.day)
sqlResult['birthMonth'] = sqlResult.loc[:, 'Date_of_birth'].apply(lambda(x):x.month)
sqlResult['birthYear'] = sqlResult.loc[:, 'Date_of_birth'].apply(lambda(x):x.year)

In [8]:
sparqlResult.columns

Index([u'name', u'birthDate', u'birthDay', u'birthMonth', u'birthYear'], dtype='object')

In [14]:
plot_kinds = ['line', 'bar', 'hist', 'box', 'density', 'area', 'scatter', 'hexbin', 'pie']

x_widget = widgets.Dropdown(
    options=sparqlResult.columns.values,
    value=sparqlResult.columns.values[0],
    description='x',
    disabled=False,
)

y_widget = widgets.Dropdown(
    options=sparqlResult.columns.values,
    value=sparqlResult.columns.values[1],
    description='y',
    disabled=False,
)

def prevent_value_clash(widget1, widget2):
    if widget1.value == widget2.value:
        if widget1.value == sparqlResult.columns.values[0]:
            widget2.value = sparqlResult.columns.values[1]
        else:
            widget2.value = sparqlResult.columns.values[0]

def update_y_value(*args):
    prevent_value_clash(x_widget, y_widget)

def update_x_value(*args):
    prevent_value_clash(y_widget, x_widget)
            
x_widget.observe(update_y_value, 'value')
y_widget.observe(update_x_value, 'value')

interact_manual(drawGraph, data=fixed(sparqlResult), x=x_widget, y=y_widget, 
         kind=plot_kinds)

aW50ZXJhY3RpdmUoY2hpbGRyZW49KERyb3Bkb3duKGRlc2NyaXB0aW9uPXUneCcsIG9wdGlvbnM9KCduYW1lJywgJ2JpcnRoRGF0ZScsICdiaXJ0aERheScsICdiaXJ0aE1vbnRoJywgJ2JpcnTigKY=


<function __main__.drawGraph>

In [15]:
plot_kinds = ['line', 'bar', 'hist', 'box', 'density', 'area', 'scatter', 'hexbin', 'pie']

x_widget = widgets.Dropdown(
    options=sqlResult.columns.values,
    value=sqlResult.columns.values[0],
    description='x',
    disabled=False,
)

y_widget = widgets.Dropdown(
    options=sqlResult.columns.values,
    value=sqlResult.columns.values[1],
    description='y',
    disabled=False,
)

def prevent_value_clash(widget1, widget2):
    if widget1.value == widget2.value:
        if widget1.value == sqlResult.columns.values[0]:
            widget2.value = sqlResult.columns.values[1]
        else:
            widget2.value = sqlResult.columns.values[0]

def update_y_value(*args):
    prevent_value_clash(x_widget, y_widget)

def update_x_value(*args):
    prevent_value_clash(y_widget, x_widget)
            
x_widget.observe(update_y_value, 'value')
y_widget.observe(update_x_value, 'value')

interact_manual(drawGraph, data=fixed(sqlResult), x=x_widget, y=y_widget, 
         kind=plot_kinds)

aW50ZXJhY3RpdmUoY2hpbGRyZW49KERyb3Bkb3duKGRlc2NyaXB0aW9uPXUneCcsIG9wdGlvbnM9KHUnQ09MMCcsIHUnSWRmdScsIHUnR2ViX0RhdHVtJywgdSdEYXRlX0ludGFrZScsIHUnSW7igKY=


<function __main__.drawGraph>