<a href="https://colab.research.google.com/github/jowalz/GCP/blob/master/Chapter2/Chapter_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import Pandas
import pandas as pd

In [None]:
# Download "Heart Disease Mortality Data Among US Adults (35+) by
# State/Territory and County -- 2018-2020" dataset from CDC and load
# into a Pandas dataframe.

url = 'https://data.cdc.gov/api/views/jiwm-ppbh/rows.csv?accessType=DOWNLOAD'
heart_df = pd.read_csv(url, index_col=0)

In [None]:
# Use head() method to view first five rows of hear_df

heart_df.head()

In [None]:
# Use the info() method to see information for each column in the heart_df dataframe.

heart_df.info()

In [None]:
# Cell to download CSV file from CDC to Colab Notebook environment.

# Note: The directions ask you in Chapter 2 to download this directly from the CDC website and then
# upload the file to the Colab enviornment using the directions in Chapter 2. Please follow these
# directions your first time through this exercise as this code will be explained in Chapter 8.

!wget $url -O ./heart.csv


In [None]:
# Load downloaded CSV from CDC website into a Pandas Dataframe and view the first five lines.

heart_df = pd.read_csv('/content/heart.csv', error_bad_lines=False,
                       engine="python")

heart_df.head()

In [None]:
# Use the info() method to see information for each column in the heart_df dataframe.
# Note: The exact values of this cell may differ from the values shown in Chapter 2
# depending on when this notebook is executed.

heart_df.info()

In [None]:
# Compute the number of Null values in each column of the heart_df dataframe.

heart_df.isnull().sum()

In [None]:
# List the unique values of the Stratification2 column in the heart_df dataframe.

# Q: What do you think this column could represent?
heart_df = heart_df.reset_index()

heart_df.Stratification2.unique()

In [None]:
# Use Seaborn to draw a violin plot for the Stratification2 feature.

import seaborn as sns
sns.violinplot(x='Data_Value', y='Stratification2', data=heart_df)

In [None]:
import folium
import geopandas as gpd
import pandas as pd
import json
import requests

# Load the CDC dataset
url = 'https://data.cdc.gov/api/views/jiwm-ppbh/rows.csv?accessType=DOWNLOAD'
heart_df = pd.read_csv(url)

# Rename 'LocationID' to 'FIPS'
heart_df.rename(columns={'LocationID': 'FIPS'}, inplace=True)

# Keep only relevant columns
heart_df = heart_df[['FIPS', 'Data_Value']].dropna()

# Format FIPS codes (ensure 5-digit format)
heart_df["FIPS"] = heart_df["FIPS"].astype(str).str.zfill(5)

# Load US Counties GeoJSON
geo_url = "https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json"
geojson_data = requests.get(geo_url).json()

# Create a Folium map centered in the US
m = folium.Map(location=[37.8, -96], zoom_start=5, tiles="cartoDBpositron")

# Add Choropleth layer
folium.Choropleth(
    geo_data=geojson_data,
    name="Heart Disease Mortality Rate",
    data=heart_df,
    columns=["FIPS", "Data_Value"],
    key_on="feature.id",
    fill_color="YlOrRd",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Heart Disease Mortality Rate (per 100,000)"
).add_to(m)

# Add layer control
folium.LayerControl().add_to(m)

# Save map to HTML and display
m.save("heart_disease_map.html")
m


In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt

# Load the CDC dataset
url = 'https://data.cdc.gov/api/views/jiwm-ppbh/rows.csv?accessType=DOWNLOAD'
heart_df = pd.read_csv(url)

# Rename 'LocationID' to 'FIPS' for clarity
heart_df.rename(columns={'LocationID': 'FIPS'}, inplace=True)

# Keep only necessary columns (FIPS = county code, Data_Value = mortality rate)
heart_df = heart_df[['FIPS', 'Data_Value']].dropna()

# Ensure FIPS codes are strings and properly formatted (5 digits)
heart_df["FIPS"] = heart_df["FIPS"].astype(str).str.zfill(5)

# Load US Counties GeoJSON (FIPS-based)
geo_url = "https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json"
us_counties = gpd.read_file(geo_url)

# Merge the heart disease data with the geospatial data
merged = us_counties.merge(heart_df, left_on="id", right_on="FIPS", how="left")

# Plot the map
fig, ax = plt.subplots(figsize=(12, 8))
merged.plot(column='Data_Value', cmap='Reds', linewidth=0.5, edgecolor='black',
            legend=True, legend_kwds={'label': "Heart Disease Mortality Rate (per 100,000)"}, ax=ax)

plt.title("Heart Disease Mortality Rate by County (2018-2020)")
plt.axis("off")
plt.show()
