In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
from pathlib import Path

import helper
import postgres

import plotly.graph_objects as go
import plotly.express as px
import pandas as pd


In [3]:
def age_data_stats(file):

    with open(file, "r") as f:
        age_groups = pd.read_json(file)
        age_groups = age_groups["total_age_groups"]
        columns = ["cases", "critical", "deaths"]
        age_groups = pd.DataFrame(age_groups.tolist()).T

    return age_groups.rename(columns={0: columns[0], 1: columns[1], 2: columns[2]})


In [9]:
root = "../data/covid19-data-greece/data/greece"
file = Path(root, "general/timeseries_greece.json")

with open(file, "r") as f:
    data = json.loads(f.read())

    postgres.create_tables_json()
    df = pd.json_normalize(data, record_path="Greece")

    if "date" in df.columns:
        df["date"] = pd.to_datetime(df["date"])

        res = helper.last_entry("covid_data_greece")
        df = df[df["date"] > str(res["date"].values[0])]
        postgres.import_data(df, "covid_data_greece")

Creating connection to the PostgreSQL database...
Table covid_data_greece already exists! Skipping...
Creating connection to the PostgreSQL database...
Creating connection to the PostgreSQL database...


In [10]:
cases = helper.fetch_data_from_database(table="covid_data_greece", date_column="date")
cases.describe().T


Creating connection to the PostgreSQL database...
Opening connection...
Data Fetched


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
confirmed,809.0,507833.687268,739628.678983,0.0,5942.0,192270.0,630784.0,3164995.0
recovered,809.0,30480.990111,41965.750968,0.0,0.0,1374.0,93764.0,93764.0
deaths,809.0,8732.912237,8600.213557,0.0,214.0,6534.0,14466.0,28076.0


In [6]:
cases.dtypes


date         datetime64[ns]
confirmed           float64
recovered           float64
deaths              float64
dtype: object

In [7]:
cases = cases.convert_dtypes()


In [8]:
cases.iloc[:, 1:] = cases.iloc[:, 1:].diff().astype(pd.Int64Dtype())
# First confirmed case was on 2020-02-26 (at index 35)
cases = cases.loc[35:, :]


In [9]:
cases["month"] = cases["date"].dt.month
cases["year"] = cases["date"].dt.year
cases["year_month"] = cases["date"].dt.strftime("%Y-%m")


In [10]:
file = Path(root, "age_distribution/age_data_history.json")

with open(file, "r") as f:
    data = json.loads(f.read())
    ages = pd.json_normalize(data=data, sep="_")
    ages["date"] = pd.to_datetime(ages["date"])
    ages.iloc[1:, 1:] = ages.iloc[:, 1:].diff().astype(pd.Int64Dtype())

    cases = cases.merge(right=ages[ages.columns[:5]], how="outer", on="date").fillna(0)


In [11]:
px.line(data_frame=cases, x="date", y="confirmed", width=1000, height=500)


In [12]:
grouped_cases = cases.groupby(by="year_month", as_index=False).sum()

grouped_cases.drop(labels="confirmed", axis=1, inplace=True)
grouped_cases.columns = grouped_cases.columns.str.replace("cases_", "")


In [13]:
srt_idx = grouped_cases.columns.get_loc("0-17")
melted = grouped_cases.melt(
    id_vars="year_month",
    value_vars=grouped_cases.columns[-4:],
    var_name="cases",
    value_name="confirmed",
    ignore_index=True,
)


In [14]:
fig = px.bar(
    grouped_cases,
    x="year_month",
    y="deaths",
    title="Death Count per Month",
    text=grouped_cases["deaths"],
    width=1200,
    height=600,
)
fig.update_traces(texttemplate="%{text:.3s}", textposition="outside")

fig.update_layout(
    xaxis=dict(dtick="M1", tickformat="%b-%Y", tickangle=-45, title=""),
    uniformtext_minsize=8,
    uniformtext_mode="hide",
)

fig.show()


In [15]:
fig = px.bar(
    data_frame=melted,
    x="year_month",
    y="confirmed",
    color="cases",
    title="Confirmed COVID-19 Cases per Month",
)

fig.update_layout(
    barmode="group",
    xaxis=dict(dtick="M1", tickformat="%b-%Y", tickangle=-45, title=""),
    width=1200,
    height=600,
)

fig.show()


In [16]:
file = f"{root}/age_distribution/age_data.json"
age_groups = age_data_stats(file)
titles = [
    "Age Distribution of COVID-19 Confirmed Cases",
    "Critical Cases per Age Group",
    "Death count",
]

fig = go.Figure().set_subplots(
    rows=1, cols=3, specs=[[{"type": "pie"}, {"type": "pie"}, {"type": "pie"}]]
)
for idx, col in enumerate(age_groups.columns):
    fig.add_trace(
        go.Pie(labels=age_groups.index, values=age_groups[col], title=titles[idx]),
        row=1,
        col=idx + 1,
    )

fig.update_layout(width=1200, height=500)
fig.show()
