In [1]:
# Import the required libraries 

from bq_helper import BigQueryHelper
import bq_helper

from plotly.offline import init_notebook_mode, iplot
from wordcloud import WordCloud
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import plotly.plotly as py
from plotly import tools

import warnings
warnings.filterwarnings("ignore")

import pandas as pd
init_notebook_mode(connected=True)

medicare = bq_helper.BigQueryHelper(active_project="bigquery-public-data", dataset_name="cms_medicare")
bq_assistant = BigQueryHelper("bigquery-public-data", "cms_medicare")

In [2]:
bq_assistant.list_tables()

[u'home_health_agencies_2013',
 u'home_health_agencies_2014',
 u'hospice_providers_2014',
 u'hospital_general_info',
 u'inpatient_charges_2011',
 u'inpatient_charges_2012',
 u'inpatient_charges_2013',
 u'inpatient_charges_2014',
 u'inpatient_charges_2015',
 u'nursing_facilities_2013',
 u'nursing_facilities_2014',
 u'outpatient_charges_2011',
 u'outpatient_charges_2012',
 u'outpatient_charges_2013',
 u'outpatient_charges_2014',
 u'outpatient_charges_2015',
 u'part_d_prescriber_2014',
 u'physicians_and_other_supplier_2012',
 u'physicians_and_other_supplier_2013',
 u'physicians_and_other_supplier_2014',
 u'physicians_and_other_supplier_2015',
 u'referring_durable_medical_equip_2013',
 u'referring_durable_medical_equip_2014']

In [3]:
# How many nurrsing facilitins in different States 
query1 = """SELECT
state, COUNT(state) as total_facilities
FROM
  `bigquery-public-data.cms_medicare.nursing_facilities_2014`
  GROUP BY 
  state
ORDER BY
  total_facilities DESC;"""
response1 = medicare.query_to_pandas_safe(query1)
response1.head(10)

Unnamed: 0,state,total_facilities
0,TX,1200
1,CA,1084
2,OH,936
3,IL,709
4,PA,703
5,FL,688
6,NY,618
7,IN,511
8,MO,497
9,MI,424


In [4]:
scl = [[0.0, 'rgb(248,255,206)'],[0.2, 'rgb(203,255,205)'],
       [0.4, 'rgb(155,255,164)'], [0.6, 'rgb(79,255,178)'],
       [0.8, 'rgb(15,183,132)'], [1, '#008059']]
data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = response1.state,
        z = response1.total_facilities,
        locationmode = 'USA-states',
        text = response1.state,
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Facilities in Different States")
        )
       ]

layout = dict(
        title = 'Nursing Facilities in Different States',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
iplot( fig, filename='d3-cloropleth-map' )

# Average length of stay relative to  avg. HCC score

In [13]:
joined_hcc_avg_stay_q = """
SELECT 
    t1.facility_name, 
    t1.city, 
    t1.state, 
    t1.average_length_of_stays_days, 
    t2.average_hcc_score 
FROM
(
    SELECT
      facility_name, city, state, average_length_of_stays_days
    FROM
      `bigquery-public-data.cms_medicare.nursing_facilities_2014`
    ORDER BY
      average_length_of_stays_days DESC
) t1
INNER JOIN
(  
    SELECT 
      facility_name, city, state, average_hcc_score
    FROM 
      `bigquery-public-data.cms_medicare.nursing_facilities_2014`
    ORDER BY 
      average_hcc_score DESC
) t2
ON (t1.facility_name = t2.facility_name)
"""
joined_hcc_avg_stay = medicare.query_to_pandas_safe(joined_hcc_avg_stay_q)

In [17]:
joined_hcc_avg_stay.columns

Index([u'facility_name', u'city', u'state', u'average_length_of_stays_days',
       u'average_hcc_score'],
      dtype='object')

In [19]:
joined_hcc_avg_stay.average_hcc_score.corr(joined_hcc_avg_stay.average_length_of_stays_days)

0.22778056065938543

In [24]:
import statsmodels.api as sm
model = sm.OLS(joined_hcc_avg_stay.average_length_of_stays_days, joined_hcc_avg_stay.average_hcc_score).fit()

AttributeError: 'module' object has no attribute 'compat'

In [18]:
trace1 = go.Bar(
    x=joined_hcc_avg_stay.state,
    y=joined_hcc_avg_stay.average_hcc_score,
    name='Average HCC Score',
    marker=dict(color='rgb(158,202,225)'),
)
trace2 = go.Bar(
    x=joined_hcc_avg_stay.state,
    y=joined_hcc_avg_stay.average_length_of_stays_days,
    name='Average length of stay in days',
    marker=dict(color='rgb(58,22,25)'),
)

data = [trace1, trace2]
layout = go.Layout(
    barmode='group',
    title='Test',
    legend=dict(orientation="h")
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='grouped-bar')