# Bayesian Database Search API Tutorial

## Getting Set Up
Importing libraries and confirming that the API is running:

In [None]:
import requests
import numpy as np
import pandas as pd

r = requests.get('http://bayesrest:5000/table-data',())
resp = r.json()

bayesrest_data_frame = pd.DataFrame(data=resp['data'], columns=resp['columns'])
data = resp['data']
fips_index = resp['columns'].index('state_county_fips')
rowid_index = resp['columns'].index('rowid')
opioid_death_index = resp['columns'].index('Opioid_Deaths')
location_index = resp['columns'].index('Location')

rowid_to_fips = { row[rowid_index]: row[fips_index] for row in data}
rowid_to_od_val = {row[rowid_index]: row[opioid_death_index] for row in data}
rowid_to_location = {row[rowid_index]: row[location_index] for row in data}
bayesrest_data_frame

## Let's find columns most predictive of "Opioid_Deaths" and show the results in a bar chart.

### Fetch the data

In [None]:
payload={'column': 'Opioid_Deaths'}
headers={'content-type': 'application/json'}

response = requests.post('http://bayesrest:5000/find-associated-columns', json=payload, headers=headers)
assert response.status_code == 200

response.json()[:5]

### Transform and render to a bar chart

In [None]:
import plotly.offline
import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=False)

top_columns = response.json()[:25]

columns = [column['column'][:40] + ('…' if len(column['column']) > 40 else '') 
           for column in top_columns]
scores = [column['score'] for column in top_columns]

data = [go.Bar(
            x=scores,
            y=columns,
            orientation = 'h'
)]

layout = go.Layout(
    margin=plotly.graph_objs.layout.Margin(
    ),
    title="Columns relevant to opioid deaths",
    yaxis=dict(
        tickfont=dict(
            size=9,
            color='rgb(107, 107, 107)'
        ),
        tickangle=30,
        automargin=True
    ),
    xaxis=dict(
        title="Relevance to opioid deaths"
    )
)

fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig, filename='horizontal-bar')

## Next we can score counties for how likely they are for opioid deaths

### First, we check which counties have an anomalous number of opioid deaths without context

In [None]:
import plotly.figure_factory as ff
import math

payload={'target-column': 'Opioid_Deaths', 'context-columns': []}
headers={'content-type': 'application/json'}

response = requests.post('http://bayesrest:5000/find-anomalies', json=payload, headers=headers)

def to_maps(data):
    return [{
        "rowid": row[0], 
        "fips": rowid_to_fips[row[0]],
        "probability_log": math.log(row[1]),
        "probability": row[1],
        "opioid_deaths": rowid_to_od_val[row[0]],
        "location": rowid_to_location[row[0]]}
    for row in data 
    if row[1] is not None]

rows = to_maps(response.json())
rows[:5]

In [None]:
fips = [row['fips'] for row in rows]
probabilities_log = [row['probability_log'] for row in rows]
probabilities = [row['probability'] for row in rows]
opioid_deaths = [row['opioid_deaths'] for row in rows]
locations = [row['location'] for row in rows]

In [None]:
colorscale = ["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1",
              "#85bcdb","#6baed6","#57a0ce","#4292c6","#3082be","#2171b5","#1361a9",
              "#08519c","#0b4083","#08306b"]
colorscale.reverse()

In [None]:
binning_endpoints = list(np.linspace(min(probabilities_log), max(probabilities_log), len(colorscale) - 1))

In [None]:
import plotly.offline

fig = ff.create_choropleth(
    fips=fips, 
    values=probabilities_log,
    scope=['usa'],
    colorscale=colorscale,
    binning_endpoints=binning_endpoints,
    county_outline={
        'color': 'rgb(15, 15, 55)', 
        'width': 0.5
    },
    show_hover=True, 
    centroid_marker={'opacity': 0},
    asp=2.9, 
    title='Anomalous counties in terms of opioid deaths',
    showlegend=False
)
plotly.offline.iplot(fig, filename='choropleth_full_usa')

In [None]:
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as figure_factory
import plotly.offline

plotly.offline.init_notebook_mode(connected=False)

trace = go.Scatter(
    x = opioid_deaths,
    y = probabilities,
    mode = 'markers',
    text = locations
)

layout= go.Layout(
    title= 'Anomalous counties in terms of opioid deaths',
    hovermode= 'closest',
    xaxis= dict(
        title= 'Opioid Deaths'
    ),
    yaxis=dict(
        title= 'Anomalousness Score'
    ),
    showlegend= False
)

data = [trace]
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig, filename='basic-scatter')

### Second, we can compare which counties have an anomalous number of opioid deaths in the context of columns that were found to be relevant

In [None]:
import plotly.figure_factory as ff
import math

payload={'target-column': 'Opioid_Deaths', 'context-columns': ['"Trump 2016"', '"Total Population: Foreign Born: Not a Citizen"', '"Families: Income in  below poverty level: Married Couple Family: with Related Child Living  Bellow Poverty Level"']}
headers={'content-type': 'application/json'}

response = requests.post('http://bayesrest:5000/find-anomalies', json=payload, headers=headers)
rows = to_maps(response.json())
rows[:5]

In [None]:
fips = [row['fips'] for row in rows]
probabilities = [row['probability'] for row in rows]
probabilities_log = [row['probability_log'] for row in rows]
opioid_deaths = [row['opioid_deaths'] for row in rows]
locations = [row['location'] for row in rows]

In [None]:
colorscale = ["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1",
              "#85bcdb","#6baed6","#57a0ce","#4292c6","#3082be","#2171b5","#1361a9",
              "#08519c","#0b4083","#08306b"]
colorscale.reverse()

In [None]:
binning_endpoints = list(np.linspace(min(probabilities_log), max(probabilities_log), len(colorscale) - 1))

In [None]:
import plotly.offline

fig = ff.create_choropleth(
    fips=fips, 
    values=probabilities_log,
    scope=['usa'],
    colorscale=colorscale,
    binning_endpoints=binning_endpoints,
    county_outline={
        'color': 'rgb(15, 15, 55)', 
        'width': 0.5
    },
    show_hover=True, 
    centroid_marker={'opacity': 0},
    asp=2.9, 
    title='Anomalous counties in terms of opioid deaths, in the context of support for Trump, <br> density of immigrants, and poverty',
    showlegend=False
)
plotly.offline.iplot(fig, filename='choropleth_full_usa')

In [None]:
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as figure_factory
import plotly.offline

plotly.offline.init_notebook_mode(connected=False)

trace = go.Scatter(
    x = opioid_deaths,
    y = probabilities,
    mode = 'markers',
    text = locations
)

layout= go.Layout(
    title= 'Anomalous counties in terms of opioid deaths, in the context of support for Trump, <br> density of immigrants, and poverty',
    hovermode= 'closest',
    xaxis= dict(
        title= 'Opioid Deaths'
    ),
    yaxis=dict(
        title= 'Logarithm of Anomalousness Score'
    ),
    showlegend= False
)

data = [trace]
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig, filename='basic-scatter')

## Finally, we can find rows most similar to a county with a low number of opioid deaths (Fillmore County, Minnesota)

In [None]:
payload={'target-row': 27, 'context-column': 'Opioid_Deaths'}
headers={'content-type': 'application/json'}

response = requests.post('http://bayesrest:5000/find-peers', json=payload, headers=headers)
assert response.status_code == 200

rows = [{"rowid": row[0],
         "similarity": row[1],
         "fips": rowid_to_fips[row[0]],
         "opioid_deaths": rowid_to_od_val[row[0]],
         "location": rowid_to_location[row[0]]} for row in response.json()]
rows[:25]

In [None]:
fips = [row['fips'] for row in rows]
similarities = [row['similarity'] for row in rows]
opioid_deaths = [row['opioid_deaths'] for row in rows]
locations = [row['location'] for row in rows]

In [None]:
colorscale = ["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1",
              "#85bcdb","#6baed6","#57a0ce","#4292c6"]

In [None]:
binning_endpoints = list(np.linspace(min(similarities), max(similarities), len(colorscale) - 1))

In [None]:
scores = [row['similarity'] for row in rows]
fips = [row['fips'] for row in rows]

import plotly.plotly as plotly
import plotly.figure_factory as figure_factory
import plotly.offline

plotly.offline.init_notebook_mode(connected=False)

fig = figure_factory.create_choropleth(
    fips=fips, 
    values=similarities,
    scope=['usa'],
    binning_endpoints=binning_endpoints,
    colorscale=colorscale,
    county_outline={'color': 'rgb(15, 15, 55)', 'width': 0.5},
    title='Counties Similar to Fillmore County, Minnesota with respect to opioid deaths',
    legend_title='Similarity Score'
)

plotly.offline.iplot(fig, filename='choropleth_full_usa')

In [None]:
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as figure_factory
import plotly.offline

plotly.offline.init_notebook_mode(connected=False)

trace = go.Scatter(
    x = opioid_deaths,
    y = similarities,
    mode = 'markers',
    text = locations
)

layout= go.Layout(
    title= 'Counties Similar to Fillmore County, Minnesota with respect to opioid deaths',
    hovermode= 'closest',
    xaxis= dict(
        title= 'Opioid Deaths'
    ),
    yaxis=dict(
        title= 'Similarity Score'
    ),
    showlegend= False
)

data = [trace]
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig, filename='basic-scatter')