In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import helper
import postgres
import json

import plotly.graph_objects as go
import plotly.express as px
import pandas as pd

  from pandas import Int64Index as NumericIndex


In [5]:
def age_data_stats(file):

    with open(file, "r") as f:
        age_groups = pd.read_json(file)
        age_groups = age_groups["total_age_groups"]
        columns = ["cases", "critical", "deaths"]
        age_groups = pd.DataFrame(age_groups.tolist()).T

    return age_groups.rename(columns={0: columns[0], 1: columns[1], 2: columns[2]})


def driver_code(json_path, record_path=None):

    with open(json_path, "r") as f:
        data = json.loads(f.read())
        
        postgres.create_tables_json()
        df = pd.json_normalize(data, record_path=record_path)

        if "date" in df.columns:
            df["date"] = pd.to_datetime(df["date"])
        
        # postgres.import_data(df, "covid_data_greece")
        return df

In [8]:
cases = driver_code('../../Data/covid19-data-greece/data/greece/general/timeseries_greece.json', 'Greece')

Creating connection to the PostgreSQL database...
Table covid_data_greece already exists! Skipping...


In [9]:
cases.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 792 entries, 0 to 791
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   date       792 non-null    datetime64[ns]
 1   confirmed  792 non-null    int64         
 2   recovered  792 non-null    int64         
 3   deaths     792 non-null    int64         
dtypes: datetime64[ns](1), int64(3)
memory usage: 24.9 KB


In [11]:
cases['month'] = cases['date'].dt.month
cases['year'] = cases['date'].dt.year

In [14]:
cases[cases['month'] == 4]

Unnamed: 0,date,confirmed,recovered,deaths,month,year
70,2020-04-01,1156,52,38,4,2020
71,2020-04-02,1156,61,38,4,2020
72,2020-04-03,1613,78,59,4,2020
73,2020-04-04,1673,78,68,4,2020
74,2020-04-05,1735,78,73,4,2020
75,2020-04-06,1755,269,79,4,2020
76,2020-04-07,1832,269,81,4,2020
77,2020-04-08,1884,269,83,4,2020
78,2020-04-09,1955,269,86,4,2020
79,2020-04-10,2009,269,90,4,2020


In [None]:
file = "../../Data/covid19-data-greece/data/greece/age_distribution/age_data_history.json"

with open(file, "r") as f:
    data = json.loads(f.read())
    ages = pd.json_normalize(data=data, sep="_")
    ages["date"] = pd.to_datetime(ages["date"])

cases = cases.merge(right=ages[ages.columns[:5]], how="outer", on="date").fillna(0)

for col in cases.columns[4:8]:
    cases[col] = cases[col].astype("int64")

cases["year_month"] = cases["date"].dt.strftime("%Y-%m")
grouped_cases = cases.groupby(by="year_month", as_index=False).sum()

grouped_cases["cases_unknown"] = grouped_cases["confirmed"] - grouped_cases[
    grouped_cases.columns[-4:]
].sum(axis=1)
grouped_cases.drop(labels="confirmed", axis=1, inplace=True)

grouped_cases.columns = grouped_cases.columns.str.replace("cases_", "")

In [None]:
melted = grouped_cases.melt(id_vars='year_month', value_vars=grouped_cases.columns[3:], var_name='cases')

In [None]:
fig = px.bar(
    grouped_cases,
    x='year_month',
    y='deaths',
    title="Death Count per Month",
    text=grouped_cases['deaths'],
    width = 1200, height = 600
)
fig.update_traces(texttemplate="%{text:.3s}", textposition="outside")

fig.update_layout(
    xaxis=dict(dtick="M1", tickformat="%b-%Y", tickangle=-45, title=""),
    uniformtext_minsize=8,
    uniformtext_mode="hide"
)

fig.show()


In [None]:
fig = px.bar(
    data_frame=melted,
    x="year_month",
    y="value",
    color="cases",
    title="Confirmed COVID-19 Cases per Month",
)

fig.update_layout(
    barmode="group",
    xaxis=dict(dtick="M1", tickformat="%b-%Y", tickangle=-45, title=""),
    width=1200,
    height=600
)

fig.show()


In [None]:
file = "../../Data/covid19-data-greece/data/greece/age_distribution/age_data.json"
age_groups = age_data_stats(file)
titles = [
    "Age Distribution of COVID-19 Confirmed Cases",
    "Critical Cases per Age Group",
    "Death count",
]

fig = go.Figure().set_subplots(rows=1, cols=3, specs=[[{"type": "pie"}, {"type": "pie"}, {"type": "pie"}]])
for idx, col in enumerate(age_groups.columns):
    fig.add_trace(
        go.Pie(labels=age_groups.index, values=age_groups[col], title=titles[idx]), row=1, col=idx + 1,
    )
fig.update_layout(width=1200, height=500)

fig.show()