# Bayesian Database Search API Tutorial

## Preamble
Import necessary libraries.

In [None]:
import requests
import math
import pandas

Set HTTP headers to be used across all requests.

In [None]:
headers = {'content-type': 'application/json'}

Load visualization utility functions.

In [None]:
%run visualize.py

## Fetch the data table

In [None]:
http_response = requests.get('http://bayesrest:5000/table-data', ())
assert http_response.status_code == 200

response_json = http_response.json()
data = response_json['data']

df = pandas.DataFrame(
    data=data,
    columns=response_json['columns']
)
df.index = df['rowid']

def with_columns(rdf, columns=[]):
    return rdf.merge(
            df.loc[:, ['rowid'] + columns], 
            on='rowid', 
            how='left'
    )

df[:5]

## Columns most predictive of `Opioid_Deaths`

#### Fetch

In [None]:
fac_payload = dict(column='Opioid_Deaths')
fac_response = requests.post(
    'http://bayesrest:5000/find-associated-columns',
    json=fac_payload, 
    headers=headers
)
assert fac_response.status_code == 200

#### Bar chart

In [None]:
associated_columns = fac_response.json()
fac_bar_chart = bar_chart(
    associated_columns[:25],
    title='Columns most predictive of Opioid_Deaths', 
    x_axis='Relevance to Opioid_Deaths'
)
offline.iplot(fac_bar_chart)

## Counties with unlikely numbers of opioid deaths

### Unlikely counties without context

#### Fetch

In [None]:
fa0_payload = {
        'target-column': 'Opioid_Deaths', 
        'context-columns': []
}
fa0_response = requests.post(
    'http://bayesrest:5000/find-anomalies', 
    json=fa0_payload, 
    headers={'content-type': 'application/json'}
)
assert fa_response_0.status_code == 200

In [None]:
fa0_df = pandas.DataFrame(fa0_response.json(), columns=['rowid', 'probability'])
fa0_df = fa0_df[fa0_df['probability'].notnull()]
fa0_df = with_columns(fa0_df, ['state_county_fips', 'Opioid_Deaths', 'Location'])
fa0_df[:5]

#### Choropleth

In [None]:
fa0_choropleth = choropleth(
    fa0_df['state_county_fips'], 
    fa0_df['probability'].transform(lambda p: math.log(p)), 
    title='Counties with unlikely values for Opioid_Deaths'
)
offline.iplot(fa0_choropleth)

#### Scatterplot

In [None]:
fa0_scatter = scatterplot(
    xs=fa0_df['Opioid_Deaths'],
    ys=fa0_df['probability'],
    text=fa0_df['Location'],
    x_axis='Opioid_Deaths', 
    y_axis='Probability'
)
offline.iplot(fa0_scatter)

### Unlikely counties in the context of predictively relevant columns

#### Fetch

In [None]:
fa1_payload = {
    'target-column': 'Opioid_Deaths', 
    'context-columns': [
        '"Trump 2016"', 
        '"Total Population: Foreign Born: Not a Citizen"', 
        '"Families: Income in  below poverty level: Married Couple Family: with Related Child Living  Bellow Poverty Level"'
    ]
}
fa1_response = requests.post('http://bayesrest:5000/find-anomalies', json=fa1_payload, headers=headers)
assert fa1_response.status_code == 200

In [None]:
fa1_df = pandas.DataFrame(fa1_response.json(), columns=['rowid', 'probability'])
fa1_df = fa1_df[fa1_df['probability'].notnull()]
fa1_df = with_columns(fa1_df, ['state_county_fips', 'Opioid_Deaths', 'Location'])
fa1_df[:5]

#### Choropleth

In [None]:
fa1_choropleth = choropleth(
    fa1_df['state_county_fips'], 
    fa1_df['probability'].transform(lambda p: math.log(p)), 
    title='''Anomalous counties in terms of opioid deaths, in the context of support for Trump, 
    <br>density of immigrants, and poverty'''
)
offline.iplot(fa_choropleth_1)

#### Scatterplot

In [None]:
fa1_scatter = scatterplot(
    fa1_df['Opioid_Deaths'],
    fa1_df['probability'],
    fa1_df['Location'],
    x_axis='Opioid_Deaths', 
    y_axis='Anomalousness Score',
    title='''Anomalous counties in terms of opioid deaths, in the context of support for Trump, 
    <br> density of immigrants, and poverty'''
)
offline.iplot(fa1_scatter)

## Counties similar to a county with a low number of opioid deaths

Here we'll focus on counties similar to a county with a low number of opioid deaths: **Fillmore County, Minnesota**.

#### Fetch

In [None]:
fp_payload = {
    'target-row': 27, 
    'context-column': 'Opioid_Deaths'
}

fp_response = requests.post(
    'http://bayesrest:5000/find-peers', 
    json=fp_payload, 
    headers=headers)
assert fp_response.status_code == 200

In [None]:
fp_df = pandas.DataFrame(fp_response.json(), columns=['rowid', 'similarity'])
fp_df = fp_df[fp_df['similarity'].notnull()]
fp_df = with_columns(fp_df, ['state_county_fips', 'Opioid_Deaths', 'Location'])
fp_df[:5]

#### Choropleth

In [None]:
fp_choropleth = choropleth(
    fips=fp_df['state_county_fips'], 
    values=fp_df['similarity'], 
    legend_title='Similarity Score',
    title='Counties Similar to Fillmore County, Minnesota with respect to opioid deaths',
    color_scale=["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1", "#85bcdb","#6baed6","#57a0ce","#4292c6"]
)
offline.iplot(fp_choropleth)

#### Scatterplot

In [None]:
fp_scatter = scatterplot(
    xs=fp_df['Opioid_Deaths'],
    x_axis='Opioid_Deaths', 
    ys=fp_df['similarity'],
    y_axis='Similarity Score',
    text=fp_df['Location'],
    title='Counties Similar to Fillmore County, Minnesota with respect to opioid deaths'
)
offline.iplot(fp_scatter)