<h1 style="color: green">Covid Tracker</h1>
<h2 style="color: green">Infections Data</h2>
<hr style="border: 1px solid darkgreen"/>

<h3 style="color: green">Setup Imports</h3>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.sql.functions import col

spark = SparkSession.builder \
    .master("local") \
    .appName("Infections Tracker") \
    .getOrCreate()

df = spark.read.format("csv") \
    .option("header", "true") \
    .option("inferSchema", "true") \
    .load('../datasets/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv') \
    .select(col("UID"), col("Admin2"), col("Province_State"), col("1/21/22")) \
    .withColumnRenamed("1/21/22", "1_21_22")

<h3 style="color: green">Infections Total In USA</h3>

In [None]:
us_total_df = df.select(col("1_21_22")).collect()

total = 0
for row in us_total_df:
    total = total + row[0]

print("US Total Infections:")
print("{:,}".format(total))

<h3 style="color: green">Highest Infection Rates By State</h3>

In [None]:
infections_df = df.select(col("Province_State"), col("1_21_22")) \
    .where(~ col("Province_State").like("%Princess%")) \
    .groupBy(col("Province_State")).agg({"1_21_22": "sum"}) \
    .withColumnRenamed("sum(1_21_22)", "State_Infections") \
    .orderBy("State_Infections", ascending = False).toPandas()


x = infections_df["Province_State"]
y = infections_df["State_Infections"]

fig = plt.figure(figsize = (20,7))
plt.title('Infections By State\n')
plt.bar(x, y, color='green')

plt.show()

In [None]:
percent_of_total_df = df.select(col("1_21_22"), col("Province_State")) \
    .groupBy(col("Province_State")).agg({"1_21_22": "sum"}) \
    .withColumnRenamed("sum(1_21_22)", "State_Infections") \
    .withColumn("Percent_Total", col("State_Infections") / total * 100) \
    .orderBy("State_Infections", ascending = False).limit(10).toPandas()

aggs = percent_of_total_df["State_Infections"]
labels_list = percent_of_total_df["Province_State"]

plt.title("Percentage of Infections By State\n", color="green", fontsize=23)
plt.pie(aggs, autopct = "%1.1f%%", wedgeprops = {"edgecolor": "black", "width": 0.6})
plt.legend(labels_list, loc="center left", title="States", bbox_to_anchor=(1.0, 0, 0.5, 1.0))

plt.show()

<h3 style="color: green">Clean Up / Close</h3>

In [None]:
spark.catalog.dropTempView("RecoveryInfo")
spark.stop()