# Bayesian Database Search API Tutorial

## Preamble
Import necessary libraries.

In [None]:
import requests
import math
import pandas

Set HTTP headers to be used across all requests.

In [None]:
headers = {'content-type': 'application/json'}

Load visualization utility functions.

In [None]:
%run visualize.py

## The data

In [None]:
http_response = requests.get('http://bayesrest:5000/table-data', ())
assert http_response.status_code == 200

response_json = http_response.json()
data = response_json['data']

dataframe = pandas.DataFrame(
    data=data,
    columns=response_json['columns']
)
dataframe.index = dataframe['rowid']

def with_columns(response_dataframe, columns=[]):
    return response_dataframe.merge(
            dataframe.loc[:, ['rowid'] + columns], 
            on='rowid', 
            how='left'
    )

dataframe[:5]

## Columns most predictive of `Opioid_Deaths`

In [None]:
column = 'Opioid_Deaths'

#### Fetch

In [None]:
payload = dict(column=column)
response = requests.post(
    'http://bayesrest:5000/find-associated-columns',
    json=payload, 
    headers=headers
)
assert response.status_code == 200

#### Bar chart

In [None]:
od_predictive_columns = response.json()
bar_chart = make_bar_chart(
    od_predictive_columns[:25],
    title='Columns most predictive of {}'.format(column), 
    x_axis='Relevance to {}'.format(column)
)
offline.iplot(bar_chart)

## Columns most predictive of `Total Property Crimes Rate (per 100000 Population)`

#### Fetch

In [None]:
column = 'Total Property Crimes Rate (per 100000 Population)'
payload = dict(column=column)
response = requests.post(
    'http://bayesrest:5000/find-associated-columns',
    json=payload, 
    headers=headers
)
assert response.status_code == 200

#### Bar Chart

In [None]:
columns = response.json()
bar_chart = make_bar_chart(
    columns[:25],
    title='Columns most predictive of {}'.format(column), 
    x_axis='Relevance to {}'.format(column)
)
offline.iplot(bar_chart)

## Counties with unlikely numbers of opioid deaths

In [None]:
target_column = 'Opioid_Deaths'

#### Fetch

In [None]:
payload = {
    'target-column': target_column, 
    'context-columns': [
        '"Trump 2016"', 
        '"Total Population: Foreign Born: Not a Citizen"', 
        '"Families: Income in  below poverty level: Married Couple Family: with Related Child Living  Bellow Poverty Level"'
    ]
}
response = requests.post('http://bayesrest:5000/find-anomalies', json=payload, headers=headers)
assert response.status_code == 200

In [None]:
response_dataframe = pandas.DataFrame(response.json(), columns=['rowid', 'probability'])
response_dataframe = response_dataframe[response_dataframe['probability'].notnull()]
response_dataframe = with_columns(response_dataframe, ['state_county_fips', 'Location', target_column])

#### Scatterplot

In [None]:
scatterplot = make_scatterplot(
    response_dataframe[target_column],
    response_dataframe['probability'],
    response_dataframe['Location'],
    x_axis=target_column, 
    y_axis='Probability Score',
    title='''Anomalous counties in terms of {}, in the context of support for Trump, 
    <br> density of immigrants, and poverty'''.format(target_column)
)
offline.iplot(scatterplot)

## Counties similar to a county with a low number of opioid deaths and low probability density of opioid deaths

In [None]:
def location_to_rowid(location):
    rowid = dataframe.loc[dataframe['Location'] == location]['rowid'].iloc[0]
    return numpy.asscalar(rowid)

In [None]:
context_column = 'Opioid_Deaths'
location = 'Presidio County, Texas'
rowid = location_to_rowid(location)
assert rowid is not None

#### Fetch

In [None]:
payload = {
    'target-row': rowid, 
    'context-column': context_column
}

response = requests.post(
    'http://bayesrest:5000/find-peers', 
    json=payload, 
    headers=headers)
assert response.status_code == 200

In [None]:
response_dataframe = pandas.DataFrame(response.json(), columns=['rowid', 'similarity'])
response_dataframe = response_dataframe[response_dataframe['similarity'].notnull()]
response_dataframe = with_columns(response_dataframe, ['state_county_fips', 'Location', context_column])

#### Choropleth

In [None]:
choropleth = make_choropleth(
    fips=response_dataframe['state_county_fips'], 
    values=response_dataframe['similarity'], 
    legend_title='Similarity Score',
    title='Counties similar to {} with respect to {}'.format(location, context_column),
    color_scale=["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1", "#85bcdb","#6baed6","#57a0ce","#4292c6"]
)
offline.iplot(choropleth)

## Counties similar to a county with a high number of opioid deaths

#### Fetch

In [None]:
context_column = 'Opioid_Deaths'
location = 'Los Angeles County, California'
rowid = location_to_rowid(location)
assert rowid is not None

In [None]:
payload = {
    'target-row': rowid,
    'context-column': context_column
}

response = requests.post(
    'http://bayesrest:5000/find-peers', 
    json=payload, 
    headers=headers)
assert response.status_code == 200

In [None]:
response_dataframe = pandas.DataFrame(response.json(), columns=['rowid', 'similarity'])
response_dataframe = response_dataframe[response_dataframe['similarity'].notnull()]
response_dataframe = with_columns(response_dataframe, ['state_county_fips', 'Location', context_column])

#### Choropleth

In [None]:
choropleth = make_choropleth(
    fips=response_dataframe['state_county_fips'], 
    values=response_dataframe['similarity'], 
    legend_title='Similarity Score',
    title='Counties similar to {} with respect to {}'.format(location, context_column),
    color_scale=["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1", "#85bcdb","#6baed6","#57a0ce","#4292c6"]
)
offline.iplot(choropleth)