In [1]:
def PopDensityDeathsScatter():
    
    # import libraries 
    import pandas as pd
    import matplotlib.pyplot as plt
    import requests
    import gmaps
    from config import gkey
    import us
    from pprint import pprint
    import scipy.stats as st
    import numpy as np
    from scipy.stats import linregress

    # Create path and locate data
    resource_path = "../../Resources"
    all_by_state_daily_path = f"{resource_path}/AllByState-Daily.csv"
    
    # Create DataFrame from the source
    all_by_state_daily_df = pd.read_csv(all_by_state_daily_path)
    all_by_state_daily_df.head()
    
    # Find the most relevant date in the DataFrame
    max_date = all_by_state_daily_df["Date"].max()
    max_date
    
    # Sort Values by Date
    max_date_df = all_by_state_daily_df.sort_values("Date", ascending = False)
    max_date_df.head()
    
    # Group main data by State Name
    grouped_state_name = max_date_df.groupby(["State Name"])
    grouped_state_name.first()

    population_density = grouped_state_name["Population Density"].mean()
    population_density

    deaths = grouped_state_name["Deaths Diff"].sum()
    deaths
    
    # Create dataframe solely on Pop Density and Deaths  
    pop_density_df = pd.DataFrame({"Population Density": population_density,
                                "Deaths Diff": deaths})
    
    # Sort Values by Pop Density
    pop_density_vs_deaths_df = pop_density_df.sort_values("Population Density", ascending = False)
    pop_density_vs_deaths_df
    
    # Now Sort by deaths
    deaths_by_pop_density_df = pop_density_df.sort_values("Deaths Diff", ascending = False)
    deaths_by_pop_density_df
    
    # Create bar graph 
    avg_pop_den= max_date_df.groupby(["State Name"])["Population Density"].mean()
    avg_death = max_date_df.groupby(["State Name"])["Deaths Diff"].sum()

    x_values = avg_pop_den
    y_values = avg_death

    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    regress_values = x_values * slope + intercept
    line_eq = "y = " + str(round(slope, 2)) + "x +" + str(round(intercept, 2))
    rsq = "The r-squared value is: " + str(round(rvalue**2, 2))

    plt.scatter(x_values, y_values, marker = "o", facecolors = "red", edgecolors = "black")
    plt.plot(x_values, regress_values, "r-")
    plt.annotate(line_eq, (200, 15000), fontsize = 15, color = "red")
    plt.annotate(rsq, (200, 12000), fontsize = 13, color = "blue")
    plt.grid()
    plt.title("Deaths vs. Avg. Population Density", fontsize = 14, fontweight = "bold")
    plt.xlabel("Avg. Population Density")
    plt.ylabel("Deaths")

    
    plt.savefig("Pop Density vs. Deaths - Scatter.png")
    plt.show()

    return