<h1 style="color: green">Covid Tracker</h1>
<h2 style="color: green">Mortality Data</h2>
<hr style="border: 1px solid darkgreen"/>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.sql.functions import col

spark = SparkSession.builder \
    .master("local") \
    .appName("Mortality Tracker") \
    .getOrCreate()

df = spark.read.format("csv") \
    .option("header", "true") \
    .option("inferSchema", "true") \
    .load('../datasets/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv') \
    .select(col("UID"), col("Admin2"), col("Province_State"), col("1/21/22")) \
    .withColumnRenamed("1/21/22", "1_21_22")

<h3 style="color: darkgreen">Total Deaths</h3>

In [None]:
us_total_df = df.select(col("1_21_22")).collect()

total = 0
for row in us_total_df:
    total = total + row[0]

print("US Total Deaths:")
print("{:,}".format(total))

<h3 style="color: darkgreen">Deaths By State</h3>

In [None]:
deaths_df = df.select(col("Province_State"), col("1_21_22")) \
    .where(~ col("Province_State").like("%Princess%")) \
    .groupBy(col("Province_State")).agg({"1_21_22": "sum"}) \
    .withColumnRenamed("sum(1_21_22)", "State_Deaths") \
    .orderBy("sum(1_21_22)", ascending = True).toPandas()

x = deaths_df["Province_State"]
y = deaths_df["State_Deaths"]

fig = plt.figure(figsize = (17,14))
plt.title("Deaths By State\n")
plt.xlabel("Deaths")
plt.ylabel("States")
plt.barh(x, y, color="green")

plt.show()

<h3 style="color: darkgreen">Top 10 Mortality Rates By State</h3>

In [None]:
percent_of_total_df = df.select(col("1_21_22"), col("Province_State")) \
    .groupBy(col("Province_State")).agg({"1_21_22": "sum"}) \
    .withColumnRenamed("sum(1_21_22)", "State_Deaths") \
    .orderBy("State_Deaths", ascending = False).limit(10).toPandas()

aggs = percent_of_total_df["State_Deaths"]
labels_list = percent_of_total_df["Province_State"]

fig = plt.figure(figsize = (11,11))
plt.title("Top Deaths By State\n", color="green", fontsize=20)
plt.pie(aggs, labels = aggs, textprops = {"color": "green"}, wedgeprops = {"edgecolor": "white"})
plt.legend(labels_list, loc="center left", title="States", bbox_to_anchor=(1.0, 0, 0.5, 1.0))

plt.show()