# Bayesian Database Search API Tutorial

## Preamble
Import necessary libraries.

In [None]:
import requests
import math
import pandas

Set HTTP headers to be used across all requests.

In [None]:
headers = {'content-type': 'application/json'}

Load visualization utility functions.

In [None]:
%run visualize.py

## The data

In [None]:
http_response = requests.get('http://bayesrest:5000/table-data', ())
assert http_response.status_code == 200

response_json = http_response.json()
data = response_json['data']

df = pandas.DataFrame(
    data=data,
    columns=response_json['columns']
)
df.index = df['rowid']

def with_columns(rdf, columns=[]):
    return rdf.merge(
            df.loc[:, ['rowid'] + columns], 
            on='rowid', 
            how='left'
    )

df[:5]

## Columns most predictive of `Opioid_Deaths`

In [None]:
fac0_column = 'Opioid_Deaths'

#### Fetch

In [None]:
fac0_payload = dict(column=fac0_column)
fac0_response = requests.post(
    'http://bayesrest:5000/find-associated-columns',
    json=fac0_payload, 
    headers=headers
)
assert fac0_response.status_code == 200

#### Bar chart

In [None]:
od_predictive_columns = fac0_response.json()
fac0_bar_chart = bar_chart(
    od_predictive_columns[:25],
    title='Columns most predictive of {}'.format(fac0_column), 
    x_axis='Relevance to {}'.format(fac0_column)
)
offline.iplot(fac0_bar_chart)

## Columns most predictive of `Total Property Crimes Rate (per 100000 Population)`

#### Fetch

In [None]:
fac1_column = 'Total Property Crimes Rate (per 100000 Population)'
fac1_payload = dict(column=fac1_column)
fac1_response = requests.post(
    'http://bayesrest:5000/find-associated-columns',
    json=fac1_payload, 
    headers=headers
)
assert fac1_response.status_code == 200

#### Bar Chart

In [None]:
fac1_columns = fac1_response.json()
fac1_bar_chart = bar_chart(
    fac1_columns[:25],
    title='Columns most predictive of {}'.format(fac1_column), 
    x_axis='Relevance to {}'.format(fac1_column)
)
offline.iplot(fac1_bar_chart)

## Counties with unlikely numbers of opioid deaths

### Unlikely counties without context

In [None]:
fa0_target_column = 'Opioid_Deaths'

#### Fetch

In [None]:
fa0_payload = {
    'target-column': fa0_target_column, 
    'context-columns': []
}
fa0_response = requests.post(
    'http://bayesrest:5000/find-anomalies', 
    json=fa0_payload, 
    headers={'content-type': 'application/json'}
)
assert fa0_response.status_code == 200

In [None]:
fa0_df = pandas.DataFrame(fa0_response.json(), columns=['rowid', 'probability'])
fa0_df = fa0_df[fa0_df['probability'].notnull()]
fa0_df = with_columns(fa0_df, ['state_county_fips', 'Location', fa0_target_column])
fa0_df[:5]

#### Choropleth

In [None]:
fa0_choropleth = choropleth(
    fa0_df['state_county_fips'], 
    fa0_df['probability'].transform(lambda p: math.log(p)), 
    title='Counties with unlikely values for {}'.format(fa0_target_column)
)
offline.iplot(fa0_choropleth)

#### Scatterplot

In [None]:
fa0_scatter = scatterplot(
    xs=fa0_df[fa0_target_column],
    ys=fa0_df['probability'],
    text=fa0_df['Location'],
    x_axis=fa0_target_column, 
    y_axis='Probability Score'
)
offline.iplot(fa0_scatter)

### Unlikely counties in the context of predictively relevant columns

In [None]:
fa1_target_column = 'Opioid_Deaths'

#### Fetch

In [None]:
fa1_payload = {
    'target-column': fa1_target_column, 
    'context-columns': [
        '"Trump 2016"', 
        '"Total Population: Foreign Born: Not a Citizen"', 
        '"Families: Income in  below poverty level: Married Couple Family: with Related Child Living  Bellow Poverty Level"'
    ]
}
fa1_response = requests.post('http://bayesrest:5000/find-anomalies', json=fa1_payload, headers=headers)
assert fa1_response.status_code == 200

In [None]:
fa1_df = pandas.DataFrame(fa1_response.json(), columns=['rowid', 'probability'])
fa1_df = fa1_df[fa1_df['probability'].notnull()]
fa1_df = with_columns(fa1_df, ['state_county_fips', 'Location', fa1_target_column])
fa1_df[:5]

#### Choropleth

In [None]:
fa1_choropleth = choropleth(
    fa1_df['state_county_fips'], 
    fa1_df['probability'].transform(lambda p: math.log(p)), 
    title='''Anomalous counties in terms of {}, in the context of support for Trump, 
    <br>density of immigrants, and poverty'''.format(fa1_target_column)
)
offline.iplot(fa1_choropleth)

#### Scatterplot

In [None]:
fa1_scatter = scatterplot(
    fa1_df[fa1_target_column],
    fa1_df['probability'],
    fa1_df['Location'],
    x_axis=fa1_target_column, 
    y_axis='Probability Score',
    title='''Anomalous counties in terms of {}, in the context of support for Trump, 
    <br> density of immigrants, and poverty'''.format(fa1_target_column)
)
offline.iplot(fa1_scatter)

### Unlikely counties in the context of the top 10 most relevant columns

In [None]:
fa2_target_column = 'Opioid_Deaths'

#### Fetch

In [None]:
fa2_context_columns = [column['column'] for column in od_predictive_columns[1:11]]
fa2_payload = {
    'target-column': fa2_target_column, 
    'context-columns': fa2_context_columns
}
fa2_response = requests.post('http://bayesrest:5000/find-anomalies', json=fa1_payload, headers=headers)
assert fa2_response.status_code == 200

In [None]:
fa2_df = pandas.DataFrame(fa2_response.json(), columns=['rowid', 'probability'])
fa2_df = fa2_df[fa2_df['probability'].notnull()]
fa2_df = with_columns(fa2_df, ['state_county_fips', 'Location', fa2_target_column])
fa2_df[:5]

#### Choropleth

In [None]:
fa2_choropleth = choropleth(
    fa2_df['state_county_fips'], 
    fa2_df['probability'].transform(lambda p: math.log(p)), 
    title='Anomalous counties in terms of {}, in the context of the top 10 most relevant variables'.format(fa2_target_column)
)
offline.iplot(fa2_choropleth)

#### Scatterplot

In [None]:
fa2_scatter = scatterplot(
    fa2_df[fa2_target_column],
    fa2_df['probability'],
    fa2_df['Location'],
    
    x_axis=fa2_target_column, 
    y_axis='Probability Score',
    title='Anomalous counties in terms of {}, in the context of the top 10 most relevant columns'.format(fa2_target_column)
)
offline.iplot(fa2_scatter)

## Counties similar to a county with a low number of opioid deaths and low probability density of opioid deaths

In [None]:
def location_to_rowid(location):
    rowid = df.loc[df['Location'] == location]['rowid'].iloc[0]
    return numpy.asscalar(rowid)

In [None]:
fp1_context_column = 'Opioid_Deaths'
fp1_location = 'Presidio County, Texas'
fp1_rowid = location_to_rowid(fp1_location)
assert fp1_rowid is not None

#### Fetch

In [None]:
fp1_payload = {
    'target-row': fp1_rowid, 
    'context-column': fp1_context_column
}

fp1_response = requests.post(
    'http://bayesrest:5000/find-peers', 
    json=fp1_payload, 
    headers=headers)
assert fp1_response.status_code == 200

In [None]:
fp1_df = pandas.DataFrame(fp1_response.json(), columns=['rowid', 'similarity'])
fp1_df = fp1_df[fp1_df['similarity'].notnull()]
fp1_df = with_columns(fp1_df, ['state_county_fips', 'Location', fp1_context_column])
fp1_df[:5]

#### Choropleth

In [None]:
fp1_choropleth = choropleth(
    fips=fp1_df['state_county_fips'], 
    values=fp1_df['similarity'], 
    legend_title='Similarity Score',
    title='Counties similar to {} with respect to {}'.format(fp1_location, fp1_context_column),
    color_scale=["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1", "#85bcdb","#6baed6","#57a0ce","#4292c6"]
)
offline.iplot(fp1_choropleth)

#### Scatterplot

In [None]:
fp1_scatter = scatterplot(
    xs=fp1_df[fp1_context_column],
    x_axis=fp1_context_column, 
    ys=fp1_df['similarity'],
    y_axis='Similarity Score',
    text=fp1_df['Location'],
    title='Counties similar to {} with respect to {}'.format(fp1_location, fp1_context_column)
)
offline.iplot(fp1_scatter)

## Counties similar to a county with a low number of opioid deaths and high probability density of opioid deaths

#### Fetch

In [None]:
fp2_context_column = 'Opioid_Deaths'
fp2_location = 'Starr County, Texas'
fp2_rowid = location_to_rowid(fp2_location)
assert fp2_rowid is not None

In [None]:
fp2_payload = {
    'target-row': fp2_rowid, 
    'context-column': fp2_context_column
}

fp2_response = requests.post(
    'http://bayesrest:5000/find-peers', 
    json=fp2_payload, 
    headers=headers)
assert fp2_response.status_code == 200

In [None]:
fp2_df = pandas.DataFrame(fp2_response.json(), columns=['rowid', 'similarity'])
fp2_df = fp2_df[fp2_df['similarity'].notnull()]
fp2_df = with_columns(fp2_df, ['state_county_fips', 'Location', fp2_context_column])
fp2_df[:5]

#### Choropleth

In [None]:
fp2_choropleth = choropleth(
    fips=fp2_df['state_county_fips'], 
    values=fp2_df['similarity'], 
    legend_title='Similarity Score',
    title='Counties Similar to {} with respect to {}'.format(fp2_location, fp2_context_column),
    color_scale=["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1", "#85bcdb","#6baed6","#57a0ce","#4292c6"]
)
offline.iplot(fp2_choropleth)

#### Scatterplot

In [None]:
fp2_scatter = scatterplot(
    xs=fp2_df[fp2_context_column],
    x_axis=fp2_context_column, 
    ys=fp2_df['similarity'],
    y_axis='Similarity Score',
    text=fp2_df['Location'],
    title='Counties Similar to {} with respect to {}'.format(fp2_location, fp2_context_column)
)
offline.iplot(fp2_scatter)

## Counties similar to a county with a high number of opioid deaths

Here we'll focus on counties similar to a county with a high number of opioid deaths: <b>Los Angeles County, California.</b>

#### Fetch

In [None]:
fp3_context_column = 'Opioid_Deaths'
fp3_location = 'Los Angeles County, California'
fp3_rowid = location_to_rowid(fp3_location)
assert fp3_rowid is not None

In [None]:
fp3_payload = {
    'target-row': fp3_rowid,
    'context-column': fp3_context_column
}

fp3_response = requests.post(
    'http://bayesrest:5000/find-peers', 
    json=fp3_payload, 
    headers=headers)
assert fp3_response.status_code == 200

In [None]:
fp3_df = pandas.DataFrame(fp3_response.json(), columns=['rowid', 'similarity'])
fp3_df = fp3_df[fp3_df['similarity'].notnull()]
fp3_df = with_columns(fp3_df, ['state_county_fips', 'Location', fp3_context_column])
fp3_df[:5]

#### Choropleth

In [None]:
fp3_choropleth = choropleth(
    fips=fp3_df['state_county_fips'], 
    values=fp3_df['similarity'], 
    legend_title='Similarity Score',
    title='Counties similar to {} with respect to {}'.format(fp3_location, fp3_context_column),
    color_scale=["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1", "#85bcdb","#6baed6","#57a0ce","#4292c6"]
)
offline.iplot(fp3_choropleth)

#### Scatterplot

In [None]:
fp3_scatter = scatterplot(
    xs=fp3_df[fp3_context_column],
    x_axis=fp3_context_column, 
    ys=fp3_df['similarity'],
    y_axis='Similarity Score',
    text=fp3_df['Location'],
    title='Counties Similar to {} with respect to {}'.format(fp3_location, fp3_context_column)
)
offline.iplot(fp3_scatter)