In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import contextily
import mapclassify
import folium
import aiohttp
import fsspec

# This loads geodataframe contaning county geometry shapes
c = "https://github.com/babdelfa/gis/blob/main/counties_geometry.zip?raw=true"
with fsspec.open(c) as file:
    county_shapes = gpd.read_file(file)
    
# Subset the data columns of interest from county_shapes GeoDataFrame
county_shapes = county_shapes[['FIPS_BEA', 'geometry']].copy()

# This loads the most recent covid19 data from Johns Hopkins University's Github
url_cases = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv"
url_deaths = "https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv"
df_cases = pd.read_csv(url_cases)
df_deaths = pd.read_csv(url_deaths)

#Rename the following columns for both dataframes using the rename method꞉
#    Admin2 to county, Province_State to state, Combined_key to county_state
df_cases.rename(columns={"Admin2": "county", "Province_State": "state", "Combined_Key": "county_state"}, inplace=True)
df_deaths.rename(columns={"Admin2": "county", "Province_State": "state", "Combined_Key": "county_state"}, inplace=True)

#Remove the following columns for df_cases adn df_deaths using the drop method꞉
# "UID", "iso2", "iso3", "code3", "FIPS", "Country_Region", "Lat", "Long_"
df_cases.drop(["UID", "iso2", "iso3", "code3", "Country_Region", "Lat", "Long_"], axis='columns', inplace=True)
df_deaths.drop(["UID", "iso2", "iso3", "code3", "Country_Region", "Lat", "Long_"], axis='columns', inplace=True)

#Melt/reshape df_cases. Columns to assign to
#   the id_vars parameter are꞉ FIPS, county, state, and county_state.
#   Make sure that var_name="dates" and value_name="cases"
df_cases_melted = pd.melt(df_cases, id_vars=["FIPS", "county", "state", "county_state"], var_name="dates", value_name="cases")

#Melt/reshape df_deaths. Columns to assign to
#   the id_vars parameter are꞉ county, FIPS, state, and county_state, Population.
#   Make sure that var_name="dates" and value_name="deaths"
df_deaths_melted = pd.melt(df_deaths, id_vars=["FIPS", "county", "state", "county_state", "Population"], var_name="dates", value_name="deaths")

# change the dates type to datetime
df_cases_melted.dates = pd.to_datetime(df_cases_melted.dates, format='%m/%d/%y')
df_deaths_melted.dates = pd.to_datetime(df_deaths_melted.dates, format='%m/%d/%y')

# Get the County name for the user
input_county = input("Enter County: ")
# Check if the county name is unique
unique_states = df_deaths_melted[df_deaths_melted['county'] == input_county]['state'].unique()

if len(unique_states) > 1:
    print(f"The county name {input_county} is not unique. It exists in these states: {unique_states}")
    input_state = input("Please enter the state for the county: ")
    state_of_county = input_state
else:
    state_of_county = unique_states[0]

# Subset data for the specified county
county_data = df_cases_melted[df_cases_melted['county'] == input_county].copy()

# Extract relevant information for the report
population = df_deaths[df_deaths['county'] == input_county]['Population'].iloc[0]
first_outbreak_date = county_data[county_data['cases'] > 0]['dates'].min()

# Calculate daily new cases
county_data.loc[:, 'daily_new_cases'] = county_data['cases'].diff().fillna(0)

# Calculate summary statistics
average_new_cases_2020 = county_data[county_data['dates'].dt.year == 2020]['daily_new_cases'].mean()
average_new_cases_2021 = county_data[county_data['dates'].dt.year == 2021]['daily_new_cases'].mean()
average_new_cases_2022 = county_data[county_data['dates'].dt.year == 2022]['daily_new_cases'].mean()
total_new_cases_2020 = county_data[county_data['dates'].dt.year == 2020]['daily_new_cases'].sum()
total_new_cases_2021 = county_data[county_data['dates'].dt.year == 2021]['daily_new_cases'].sum()
total_new_cases_2022 = county_data[county_data['dates'].dt.year == 2022]['daily_new_cases'].sum()
cumulative_total_cases_dec31_2022 = county_data['cases'].iloc[-1]

# Print the COVID-19 report
print("\n*** MIS 433 COVID19 Report ***")
print(f"Enter County: {input_county}")
print(f"Population of {input_county}, {state_of_county}: {population:,}")
print(f"First Reported Outbreak in {input_county}: {first_outbreak_date:%B %d, %Y}")
print(f"{input_county} County COVID19 Summary Statistics:")
print(f" - Average number of new cases in 2020: {average_new_cases_2020:.2f}")
print(f" - Average number of new cases in 2021: {average_new_cases_2021:.2f}")
print(f" - Average number of new cases in 2022: {average_new_cases_2022:.2f}")
print(f" - Total number of new cases in 2020: {total_new_cases_2020:,}")
print(f" - Total number of new cases in 2021: {total_new_cases_2021:,}")
print(f" - Total number of new cases in 2022: {total_new_cases_2022:,}")
print(f" - Cumulative total number of cases: {cumulative_total_cases_dec31_2022:,} (December 31, 2022)")

# Subset data for the specified county for line chart
df_plotting = df_cases_melted[df_cases_melted['county'] == input_county]

# Filter data up to 2022
df_plotting = df_plotting[df_plotting['dates'].dt.year <= 2022]

# Plot the line chart
plt.plot(df_plotting['dates'], df_plotting['cases'])
plt.xlabel('Date')
plt.ylabel('Total Number of Cases')
plt.title(f'Total COVID19 Cases for {input_county} County')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()


#Subset the data columns of interest from df
df = df_cases[['FIPS', 'county', 'state', '12/31/22']].copy()

#rename column
df.rename(columns={"12/31/22" : "cases"}, inplace=True)

#merge the geoDataFrame and DataFrame  into gdf using the common data  column.
gdf = pd.merge(county_shapes, df, left_on='FIPS_BEA', right_on='FIPS')

#subset gdf to include only the state data ,where the state is the state of input county .
state_gdf = gdf[gdf['state']==state_of_county].copy()

#rename column in state_gdf as:
#   cases to Total Cases
state_gdf.rename(columns={'cases': 'Total Cases'}, inplace=True)

# Subset state_gdf to only include three data columns:
#      County, Total Cases, and Geometry
state_gdf = state_gdf[['county', 'Total Cases', 'geometry']].copy()

#Display an interactive choropleth map of Total Cases for give county
# using the explore
state_gdf.explore(column="Total Cases", cmap="Set2", legend=True, scheme='NaturalBreaks')