In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd, datetime
import numpy as np
import requests
import time
from scipy.stats import linregress
from ydata_profiling import ProfileReport
from pathlib import Path

In [None]:
# Load the CSV file into a Pandas DataFrame
city_health = pd.read_csv("../Resources/CDC_500_City_Health.csv")

# Display the data table for preview
city_health.head()

In [None]:
# review data types
city_health.info()

In [None]:
# describe data
city_health.describe()

In [None]:
# Run the Profiling Report using ydata_profiling
profile = ProfileReport(city_health,title = "City Health Profile Report")
profile.to_notebook_iframe()

In [None]:
# drop columns with null values
clean_city_health = city_health.drop(['Data_Value_Footnote_Symbol','Data_Value_Footnote','TractFIPS'],axis=1)

In [None]:
# review data types after dropping null values
clean_city_health.info()

In [None]:
# describe data after dropping null values
clean_city_health.describe()

In [None]:
# Run the Profiling Report using ydata_profiling after dropping null values
profile = ProfileReport(clean_city_health,title = "City Health Profile Report")
profile.to_notebook_iframe()

In [55]:
# Review the number of cities per state per unique MeasureID
diseases_per_state = clean_city_health.groupby('StateDesc')['MeasureId'].value_counts()


diseases_per_state


StateDesc  MeasureId
Alabama    CANCER       6
           CASTHMA      6
           CHD          6
           COPD         6
Alaska     CANCER       1
                       ..
Wisconsin  COPD         7
Wyoming    CANCER       1
           CASTHMA      1
           CHD          1
           COPD         1
Name: count, Length: 204, dtype: int64

In [64]:
# total count of diseases per state
diseases_per_state_total = clean_city_health.groupby('StateDesc')['MeasureId'].count().sort_values(ascending=False)
diseases_per_state_total



StateDesc
California       484
Texas            188
Florida          132
Illinois          72
Michigan          64
Colorado          56
Washington        56
North Carolin     56
Massachusetts     52
Arizona           48
Indiana           44
Virginia          44
Georgia           44
Ohio              36
New York          36
New Jersey        36
Utah              36
Oregon            32
Connecticut       32
Missouri          32
Pennsylvania      28
Wisconsin         28
Minnesota         28
Tennessee         24
Oklahoma          24
Alabama           24
Kansas            24
Iowa              24
Louisiana         24
Nevada            20
Arkansas          20
South Carolin     20
New Mexico        16
Rhode Island      16
Idaho             12
Nebraska           8
South Dakota       8
Montana            8
Kentucky           8
Mississippi        8
New Hampshire      8
Maine              4
North Dakota       4
Maryland           4
Alaska             4
Hawaii             4
Vermont            4
Dis

In [66]:
# number of cities per state with diseases
cities_per_state = clean_city_health.groupby('StateDesc')['CityName'].nunique().sort_values(ascending=False)
cities_per_state

StateDesc
California       121
Texas             47
Florida           33
Illinois          18
Michigan          16
Colorado          14
Washington        14
North Carolin     14
Massachusetts     13
Arizona           12
Indiana           11
Virginia          11
Georgia           11
Ohio               9
New York           9
New Jersey         9
Utah               9
Oregon             8
Connecticut        8
Missouri           8
Pennsylvania       7
Wisconsin          7
Minnesota          7
Tennessee          6
Oklahoma           6
Alabama            6
Kansas             6
Iowa               6
Louisiana          6
Nevada             5
Arkansas           5
South Carolin      5
New Mexico         4
Rhode Island       4
Idaho              3
Nebraska           2
South Dakota       2
Montana            2
Kentucky           2
Mississippi        2
New Hampshire      2
Maine              1
North Dakota       1
Maryland           1
Alaska             1
Hawaii             1
Vermont            1
Dis

In [53]:
# review state with the highest number of diseases  
diseases_per_state['California']

MeasureId
CANCER     121
CASTHMA    121
CHD        121
COPD       121
Name: count, dtype: int64

In [63]:
# cities in california with the highest number of diseases
california_cities = clean_city_health[clean_city_health['StateDesc'] == 'California']
california_cities

Unnamed: 0,Year,StateAbbr,StateDesc,CityName,GeographicLevel,DataSource,Category,UniqueID,Measure,Data_Value_Unit,...,Data_Value_Type,Data_Value,Low_Confidence_Limit,High_Confidence_Limit,PopulationCount,GeoLocation,CategoryID,MeasureId,CityFIPS,Short_Question_Text
0,2017,CA,California,Hayward,City,BRFSS,Health Outcomes,633000,Coronary heart disease among adults aged >=18 ...,%,...,Age-adjusted prevalence,4.8,4.7,4.8,144186,"(37.6329591551, -122.077051051)",HLTHOUT,CHD,633000,Coronary Heart Disease
1,2017,CA,California,Indio,City,BRFSS,Health Outcomes,636448,Chronic obstructive pulmonary disease among ad...,%,...,Age-adjusted prevalence,6.0,5.8,6.2,76036,"(33.7298067837, -116.237258141)",HLTHOUT,COPD,636448,COPD
2,2017,CA,California,Bellflower,City,BRFSS,Health Outcomes,604982,Coronary heart disease among adults aged >=18 ...,%,...,Age-adjusted prevalence,5.3,5.2,5.4,76616,"(33.8880417923, -118.127100236)",HLTHOUT,CHD,604982,Coronary Heart Disease
3,2017,CA,California,Lynwood,City,BRFSS,Health Outcomes,644574,Cancer (excluding skin cancer) among adults ag...,%,...,Age-adjusted prevalence,5.0,5.0,5.1,69772,"(33.9239616867, -118.201648375)",HLTHOUT,CANCER,644574,Cancer (except skin)
4,2017,CA,California,Redding,City,BRFSS,Health Outcomes,659920,Cancer (excluding skin cancer) among adults ag...,%,...,Age-adjusted prevalence,6.6,6.6,6.7,89861,"(40.5697591271, -122.365026322)",HLTHOUT,CANCER,659920,Cancer (except skin)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
615,2017,CA,California,Visalia,City,BRFSS,Health Outcomes,682954,Chronic obstructive pulmonary disease among ad...,%,...,Age-adjusted prevalence,6.2,6.0,6.3,124442,"(36.3270623157, -119.325273214)",HLTHOUT,COPD,682954,COPD
618,2017,CA,California,Victorville,City,BRFSS,Health Outcomes,682590,Current asthma among adults aged >=18 Years,%,...,Age-adjusted prevalence,9.6,9.5,9.8,115903,"(34.5277691103, -117.353855136)",HLTHOUT,CASTHMA,682590,Current Asthma
619,2017,CA,California,Visalia,City,BRFSS,Health Outcomes,682954,Coronary heart disease among adults aged >=18 ...,%,...,Age-adjusted prevalence,5.7,5.6,5.8,124442,"(36.3270623157, -119.325273214)",HLTHOUT,CHD,682954,Coronary Heart Disease
620,2017,CA,California,Westminster,City,BRFSS,Health Outcomes,684550,Current asthma among adults aged >=18 Years,%,...,Age-adjusted prevalence,7.7,7.6,7.8,89701,"(33.7521523769, -117.993760054)",HLTHOUT,CASTHMA,684550,Current Asthma
