# Bayesian Database Search API Tutorial

## Preamble
Import necessary libraries.

In [None]:
import requests
import math
import pandas

Set HTTP headers to be used across all requests.

In [None]:
headers = {'content-type': 'application/json'}

Load visualization utility functions.

In [None]:
%run visualize.py

## Fetch the data table

In [None]:
http_response = requests.get('http://bayesrest:5000/table-data', ())
assert http_response.status_code == 200

response_json = http_response.json()
data = response_json['data']

df = pandas.DataFrame(
    data=data,
    columns=response_json['columns']
)
df.index = df['rowid']

def with_columns(rdf, columns=[]):
    return rdf.merge(
            df.loc[:, ['rowid'] + columns], 
            on='rowid', 
            how='left'
    )

df[:5]

## Columns most predictive of `Opioid_Deaths`

#### Fetch

In [None]:
fac_payload = dict(column='Opioid_Deaths')
fac_response = requests.post(
    'http://bayesrest:5000/find-associated-columns',
    json=fac_payload, 
    headers=headers
)
assert fac_response.status_code == 200

#### Bar chart

In [None]:
associated_columns = fac_response.json()
fac_bar_chart = bar_chart(
    associated_columns[:25],
    title='Columns most predictive of Opioid_Deaths', 
    x_axis='Relevance to Opioid_Deaths'
)
offline.iplot(fac_bar_chart)

## Note the difference between variables predictive of Opioid_Deaths and variables predictive of Total Property Crimes Rate (per 100000 Population)

#### Fetch

In [None]:
fac_payload_2 = dict(column='Total Property Crimes Rate (per 100000 Population)')
fac_response_2 = requests.post(
    'http://bayesrest:5000/find-associated-columns',
    json=fac_payload_2, 
    headers=headers
)
assert fac_response_2.status_code == 200

#### Bar Chart

In [None]:
associated_columns_2 = fac_response_2.json()
fac_bar_chart_2 = bar_chart(
    associated_columns_2[:25],
    title='Columns most predictive of Property Crime Rate', 
    x_axis='Relevance to Property Crime Rate'
)
offline.iplot(fac_bar_chart_2)

## Counties with unlikely numbers of opioid deaths

### Unlikely counties without context

#### Fetch

In [None]:
fa0_payload = {
        'target-column': 'Opioid_Deaths', 
        'context-columns': []
}
fa0_response = requests.post(
    'http://bayesrest:5000/find-anomalies', 
    json=fa0_payload, 
    headers={'content-type': 'application/json'}
)
assert fa0_response.status_code == 200

In [None]:
fa0_df = pandas.DataFrame(fa0_response.json(), columns=['rowid', 'probability'])
fa0_df = fa0_df[fa0_df['probability'].notnull()]
fa0_df = with_columns(fa0_df, ['state_county_fips', 'Opioid_Deaths', 'Location'])
fa0_df[:5]

#### Choropleth

In [None]:
fa0_choropleth = choropleth(
    fa0_df['state_county_fips'], 
    fa0_df['probability'].transform(lambda p: math.log(p)), 
    title='Counties with unlikely values for Opioid_Deaths'
)
offline.iplot(fa0_choropleth)

#### Scatterplot

In [None]:
fa0_scatter = scatterplot(
    xs=fa0_df['Opioid_Deaths'],
    ys=fa0_df['probability'],
    text=fa0_df['Location'],
    x_axis='Opioid_Deaths', 
    y_axis='Probability Score'
)
offline.iplot(fa0_scatter)

### Unlikely counties in the context of predictively relevant columns

#### Fetch

In [None]:
fa1_payload = {
    'target-column': 'Opioid_Deaths', 
    'context-columns': [
        '"Trump 2016"', 
        '"Total Population: Foreign Born: Not a Citizen"', 
        '"Families: Income in  below poverty level: Married Couple Family: with Related Child Living  Bellow Poverty Level"'
    ]
}
fa1_response = requests.post('http://bayesrest:5000/find-anomalies', json=fa1_payload, headers=headers)
assert fa1_response.status_code == 200

In [None]:
fa1_df = pandas.DataFrame(fa1_response.json(), columns=['rowid', 'probability'])
fa1_df = fa1_df[fa1_df['probability'].notnull()]
fa1_df = with_columns(fa1_df, ['state_county_fips', 'Opioid_Deaths', 'Location'])
fa1_df[:5]

#### Choropleth

In [None]:
fa1_choropleth = choropleth(
    fa1_df['state_county_fips'], 
    fa1_df['probability'].transform(lambda p: math.log(p)), 
    title='''Anomalous counties in terms of opioid deaths, in the context of support for Trump, 
    <br>density of immigrants, and poverty'''
)
offline.iplot(fa1_choropleth)

#### Scatterplot

In [None]:
fa1_scatter = scatterplot(
    fa1_df['Opioid_Deaths'],
    fa1_df['probability'],
    fa1_df['Location'],
    x_axis='Opioid_Deaths', 
    y_axis='Probability Score',
    title='''Anomalous counties in terms of opioid deaths, in the context of support for Trump, 
    <br> density of immigrants, and poverty'''
)
offline.iplot(fa1_scatter)

### Adding more variables pushes more counties from the middle to the either end of our score range

#### Fetch

In [None]:
fa2_payload = {
    'target-column': 'Opioid_Deaths', 
    'context-columns': [
        '"Trump 2016"', 
        '"Total Population: Foreign Born: Not a Citizen"', 
        '"Families: Income in  below poverty level: Married Couple Family: with Related Child Living  Bellow Poverty Level"',
        'Population',
        '"Total votes 2016"',
        '"Clinton 2016"',
        '"Obama 2012"',
        '"Romney 2012"',
        '"Total votes 2012"',
        '"Dem AVG"',
        '"Total Arrests (Juveniles Only)"'
    ]
}
fa2_response = requests.post('http://bayesrest:5000/find-anomalies', json=fa1_payload, headers=headers)
assert fa2_response.status_code == 200

In [None]:
fa2_df = pandas.DataFrame(fa2_response.json(), columns=['rowid', 'probability'])
fa2_df = fa2_df[fa2_df['probability'].notnull()]
fa2_df = with_columns(fa2_df, ['state_county_fips', 'Opioid_Deaths', 'Location'])
fa2_df[:5]

#### Choropleth

In [None]:
fa2_choropleth = choropleth(
    fa2_df['state_county_fips'], 
    fa2_df['probability'].transform(lambda p: math.log(p)), 
    title='''Anomalous counties in terms of opioid deaths, in the context of the top 10 most relevant variables'''
)
offline.iplot(fa2_choropleth)

#### Scatterplot

In [None]:
fa2_scatter = scatterplot(
    fa2_df['Opioid_Deaths'],
    fa2_df['probability'],
    fa2_df['Location'],
    
    x_axis='Opioid_Deaths', 
    y_axis='Probability Score',
    title='''Anomalous counties in terms of opioid deaths, in the context of the top 10 most relevant columns'''
)
offline.iplot(fa2_scatter)

## Counties similar to a county with a low number of opioid deaths and low probability density of opioid deaths

Here we'll focus on counties similar to a county with a low number of opioid deaths and low probability density of opioid deaths: **Presidio County, Texas**.

#### Fetch

In [None]:
fp1_payload = {
    'target-row': 2710, 
    'context-column': 'Opioid_Deaths'
}

fp1_response = requests.post(
    'http://bayesrest:5000/find-peers', 
    json=fp1_payload, 
    headers=headers)
assert fp1_response.status_code == 200

In [None]:
fp1_df = pandas.DataFrame(fp1_response.json(), columns=['rowid', 'similarity'])
fp1_df = fp1_df[fp1_df['similarity'].notnull()]
fp1_df = with_columns(fp1_df, ['state_county_fips', 'Opioid_Deaths', 'Location'])
fp1_df[:5]

#### Choropleth

In [None]:
fp1_choropleth = choropleth(
    fips=fp1_df['state_county_fips'], 
    values=fp1_df['similarity'], 
    legend_title='Similarity Score',
    title='Counties Similar to Presidio County, Texas with respect to opioid deaths',
    color_scale=["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1", "#85bcdb","#6baed6","#57a0ce","#4292c6"]
)
offline.iplot(fp1_choropleth)

#### Scatterplot

In [None]:
fp1_scatter = scatterplot(
    xs=fp1_df['Opioid_Deaths'],
    x_axis='Opioid_Deaths', 
    ys=fp1_df['similarity'],
    y_axis='Similarity Score',
    text=fp1_df['Location'],
    title='Counties Similar to Presidio County, Texas with respect to opioid deaths'
)
offline.iplot(fp1_scatter)

## Counties similar to a county with a low number of opioid deaths and high probability density of opioid deaths

Here we'll focus on counties similar to a county with a low number of opioid deaths and **high** probability density of opioid deaths: **Starr County, Texas**

#### Fetch

In [None]:
fp2_payload = {
    'target-row': 2735, 
    'context-column': 'Opioid_Deaths'
}

fp2_response = requests.post(
    'http://bayesrest:5000/find-peers', 
    json=fp2_payload, 
    headers=headers)
assert fp2_response.status_code == 200

In [None]:
fp2_df = pandas.DataFrame(fp2_response.json(), columns=['rowid', 'similarity'])
fp2_df = fp2_df[fp2_df['similarity'].notnull()]
fp2_df = with_columns(fp2_df, ['state_county_fips', 'Opioid_Deaths', 'Location'])
fp2_df[:5]

#### Choropleth

In [None]:
fp2_choropleth = choropleth(
    fips=fp2_df['state_county_fips'], 
    values=fp2_df['similarity'], 
    legend_title='Similarity Score',
    title='Counties Similar to Starr County, Texas with respect to opioid deaths',
    color_scale=["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1", "#85bcdb","#6baed6","#57a0ce","#4292c6"]
)
offline.iplot(fp2_choropleth)

#### Scatterplot

In [None]:
fp2_scatter = scatterplot(
    xs=fp2_df['Opioid_Deaths'],
    x_axis='Opioid_Deaths', 
    ys=fp2_df['similarity'],
    y_axis='Similarity Score',
    text=fp2_df['Location'],
    title='Counties Similar to Starr County, Texas with respect to opioid deaths'
)
offline.iplot(fp2_scatter)

## Counties similar to a county with a high number of opioid deaths

Here we'll focus on counties similar to a county with a high number of opioid deaths: <b>Los Angeles County, California.</b>

#### Fetch

In [None]:
fp3_payload = {
    'target-row': 203,
    'context-column': 'Opioid_Deaths'
}

fp3_response = requests.post(
    'http://bayesrest:5000/find-peers', 
    json=fp3_payload, 
    headers=headers)
assert fp3_response.status_code == 200

In [None]:
fp3_df = pandas.DataFrame(fp3_response.json(), columns=['rowid', 'similarity'])
fp3_df = fp3_df[fp3_df['similarity'].notnull()]
fp3_df = with_columns(fp3_df, ['state_county_fips', 'Opioid_Deaths', 'Location'])
fp3_df[:5]

#### Choropleth

In [None]:
fp3_choropleth = choropleth(
    fips=fp3_df['state_county_fips'], 
    values=fp3_df['similarity'], 
    legend_title='Similarity Score',
    title='Counties Similar to Los Angeles County, California with respect to opioid deaths',
    color_scale=["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1", "#85bcdb","#6baed6","#57a0ce","#4292c6"]
)
offline.iplot(fp3_choropleth)

#### Scatterplot

In [None]:
fp3_scatter = scatterplot(
    xs=fp3_df['Opioid_Deaths'],
    x_axis='Opioid_Deaths', 
    ys=fp3_df['similarity'],
    y_axis='Similarity Score',
    text=fp3_df['Location'],
    title='Counties Similar to Los Aangeles County, California with respect to opioid deaths'
)
offline.iplot(fp3_scatter)