In [101]:
#imports
import numpy as np
import pandas as pd
import math

import matplotlib.pyplot as plt
import seaborn as sns

pd.options.mode.chained_assignment = None 

In [6]:
# importing graphs
hospital_df = pd.read_csv("hosp_data.csv")
crime_df = pd.read_csv("crime_data.csv")

In [92]:
# Enum for converting state abrv to full name
# https://gist.github.com/rogerallen/1583593

us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
}

abbrev_to_us_state = dict(map(reversed, us_state_to_abbrev.items()))
us_state_list = list (us_state_to_abbrev.values())

 # Operation Find Top-Rated Hospitals (OFTRH)

 ### Abstract

Finding quality healthcare is important. The goal of this project is to provide patients with listings of top- rated hospitals near them, so they are provided with timely and quality care. With this information, another goal of this project is to attempt to explain reasoning for the such hospital ratings by comparing crime rates in the same areas. We hypothesize areas with higher crime rates will have lower funded hospitals and will therefore have lower hospital ratings and areas with lower crime rates will have better funded hospitals and therefore higher quality hospital ratings. To accomplish this, we will use the Hospital General Information data set available at this [site](https://data.cms.gov/provider-data/topics/hospitals) and extract the hospital ratings data for each state. The data provides key information on hospitals in the US including their name, state, rating and type of hospital ownership (government, proprietary, voluntary (non-profit), etc.). To determine the correlation(s) between hospital ratings and crime rates we will use the crime rate dataset from this [site](https://worldpopulationreview.com/state-rankings/crime-rate-by-state).

 ### Hypothesis
    
We hypothesize areas with higher crime rates will have lower funded hospitals and will therefore have lower hospital ratings and areas with lower crime rates will have better funded hospitals and therefore higher quality hospital ratings.

 ### Process
 
 We will have to extract the hospital data organized by state then find correlations between the organized data and the crime rates by state. 
 

In [112]:
# Takes in state input as abbr (WI, IL, etc)
def getOverallRatingsByStateAbbr(state_abbr):
    stateHospitals = hospital_df[hospital_df["State"] == state_abbr]
    # Drop `Not Available` then replace them with the mean of the dataset without `Not Available`
    cleanedHospitalRating = stateHospitals["Hospital overall rating"].replace("Not Available", np.nan).dropna().astype("int64")
    return stateHospitals["Hospital overall rating"].replace("Not Available", cleanedHospitalRating.mean()).astype("int64")

# Get all hospitals by the state
def getHospitalsByStateAbbr(state_abbr):
    return hospital_df[hospital_df["State"] == state_abbr]

# Get all of the crime rates by state
def getCrimeRateByStateAbbr(state_abbr):
    return crime_df[crime_df["state"] == abbrev_to_us_state[state_abbr]]

# Get the top rated hospital by state
def getTopRatedHospitalByStateAbbr(state_abbr):
    hospitals = getHospitalsByStateAbbr(state_abbr)
    hospitals.loc[:,"Hospital overall rating"] = hospitals["Hospital overall rating"].replace("Not Available", "0").astype("int64")
    sorted = hospitals.sort_values(by=["Hospital overall rating"], ascending=False)
    
    return sorted.iloc[0]

# Get total number of hospitals by state
def getNumberOfHospitalsByStateAbbr(state_abbr):
    return len(getHospitalsByStateAbbr(state_abbr))

In [13]:
getOverallRatingsByStateAbbr('IL').mean()

3.163157894736842

In [20]:
getCrimeRateByStateAbbr("IL")['rate'].sum()

3544.667

### Correlation 
Correlation is a statistical measure that expresses the extent to which two variables are linearly related (meaning they change together at a constant rate). It's a common tool for describing simple relationships without making a statement about cause and effect

In [17]:
for state in us_state_list:
    print (state)

AL
AK
AZ
AR
CA
CO
CT
DE
FL
GA
HI
ID
IL
IN
IA
KS
KY
LA
ME
MD
MA
MI
MN
MS
MO
MT
NE
NV
NH
NJ
NM
NY
NC
ND
OH
OK
OR
PA
RI
SC
SD
TN
TX
UT
VT
VA
WA
WV
WI
WY
DC
AS
GU
MP
PR
UM
VI


In [None]:
# get all state hospital ratings (mean) and crime rates
for state in us_state_list:
    # get rating
    getOverallRatingsByStateAbbr(state).mean()
    # add this to a list or something
    getCrimeRateByStateAbbr(state)

In [105]:
getTopRatedHospitalByStateAbbr("OH")

Facility ID                                                                      360365
Facility Name                                                             AVITA ONTARIO
Address                                                               715 RICHLAND MALL
City                                                                            ONTARIO
State                                                                                OH
ZIP Code                                                                          44906
County Name                                                                    RICHLAND
Phone Number                                                             (419) 462-4534
Hospital Type                                                      Acute Care Hospitals
Hospital Ownership                                       Voluntary non-profit - Private
Emergency Services                                                                  Yes
Meets criteria for promoting int