# Bayesian Database Search API Tutorial

# New set of queries -- as of 07/11/2018

## Getting Set Up
Importing libraries and confirming that the API is running:

In [None]:
import requests
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import pandas as pd

r = requests.get('http://bayesrest:5000/table-data',())
resp = r.json()

bayesrest_data_frame = pd.DataFrame(data=resp['data'], columns=resp['columns'])
data = resp['data']
fips_index = resp['columns'].index('state_county_fips')
rowid_index = resp['columns'].index('rowid')
rowid_to_fips = { row[rowid_index]: row[fips_index] for row in data}
bayesrest_data_frame

## N1. Find columns most predictive of "Opioid_deaths" and show the results in a bar chart.

### Fetch the data

In [None]:
payload={'column': 'Opioid_Deaths'}
headers={'content-type': 'application/json'}

response = requests.post('http://bayesrest:5000/find-associated-columns', json=payload, headers=headers)
assert response.status_code == 200

response.json()[:5]

### Transform and render to a choropleth map

In [None]:
top_columns = response.json()[:24] # take the top 24

columns = [column['column'] for column in top_columns]
scores = [column['score'] for column in top_columns]

y_pos = np.arange(len(columns))

plt.bar(y_pos, scores, align='center', alpha=0.5)
plt.xticks(y_pos, columns, rotation=20)
plt.ylabel('Dependence probability score')
plt.title('Columns associated with opioid deaths')
 
plt.show()

## N2. Find rows most similar to Oneida County, NY, using Opioid Deaths as the (single) context column, and show the results on a map.

In [None]:
payload={'target-row': 32, 'context-column': 'Opioid_Deaths'}
headers={'content-type': 'application/json'}

response = requests.post('http://bayesrest:5000/find-peers', json=payload, headers=headers)
assert response.status_code == 200

rows = [{"rowid": row[0], "similarity": row[1], "fips": rowid_to_fips[row[0]]} for row in response.json()]
rows[:5]

In [None]:
scores = [row['similarity'] for row in rows]
fips = [row['fips'] for row in rows]

colorscale = ["#f7fbff","#b3d2e9","#4292c6","#0b4083"]

import plotly.plotly as plotly
import plotly.figure_factory as figure_factory
import plotly.offline

plotly.offline.init_notebook_mode(connected=False)

fig = figure_factory.create_choropleth(
    fips=fips, 
    values=scores,
    scope=['usa'],
    colorscale=colorscale,
    county_outline={'color': 'rgb(15, 15, 55)', 'width': 0.5},
    title='Counties Similar to Oneida County, NY with respect to opioid deaths',
    legend_title='Similarity Score'
)

plotly.offline.iplot(fig, filename='choropleth_full_usa')

## N3. Find rows most anomalous with respect to property crime arrest rate, and show the results on a map.

In [None]:
import plotly.figure_factory as ff
import math

payload={'target-column': 'Opioid_Deaths', 'context-columns': ['"Total Property Crime Arrests Rate (per 100000 Population)"']}
headers={'content-type': 'application/json'}

response = requests.post('http://bayesrest:5000/find-anomalies', json=payload, headers=headers)
rows = [{"rowid": row[0], 
         "fips": rowid_to_fips[row[0]],
         "probability": math.floor(math.log(row[1]))} 
        for row in response.json() 
        if row[1] is not None]
rows[:5]

In [None]:
fips = [row['fips'] for row in rows]
probabilities = [row['probability'] for row in rows]

In [None]:
colorscale = ["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1",
              "#85bcdb","#6baed6","#57a0ce","#4292c6","#3082be","#2171b5","#1361a9",
              "#08519c","#0b4083","#08306b"]

In [None]:
binning_endpoints = list(np.linspace(min(probabilities), max(probabilities), len(colorscale) - 1))
binning_endpoints

In [None]:
import plotly.offline

fig = ff.create_choropleth(
    fips=fips, 
    values=probabilities,
    scope=['usa'],
    colorscale=colorscale,
    binning_endpoints=endpts,
    county_outline={
        'color': 'rgb(15, 15, 55)', 
        'width': 0.5
    },
    show_hover=True, 
    centroid_marker={'opacity': 0},
    asp=2.9, 
    title='Anomalous counties in terms of opioid deaths, with respect to total property crime arrests rate',
    legend_title='Anomalous Score'
)
plotly.offline.iplot(fig, filename='choropleth_full_usa')
#bayesrest_data_frame = pd.DataFrame(data=resp, columns=['row_id', 'score'])