In [1]:
import psycopg2  # Connect to PostgreSQL

import numpy as np  # Data Manipulation
import pandas as pd  # Data Manipulation

import os
import json
from dotenv import load_dotenv  # Load .env file with DB Credentials
from io import StringIO

# Data Visualization
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go


In [10]:
def make_connection():
    """ 
    Connect to the PostgreSQL database server 
    """

    load_dotenv()

    DB_NAME = os.getenv("DB_NAME")
    DB_USER = os.getenv("DB_USER")
    DB_PASSWORD = os.getenv("DB_PASSWORD")
    HOST = os.getenv("HOST")
    PORT = os.getenv("PORT")

    try:
        print("Connecting to the PostgreSQL database...")
        conn = psycopg2.connect(
            host=HOST, database=DB_NAME, user=DB_USER, password=DB_PASSWORD, port=PORT
        )

        print("Connection successful\n")
        return conn

    except (Exception, psycopg2.DatabaseError) as error:
        print(f"Error while connecting to PostgreSQL:\n{error}")
        return -1


def create_tables(conn):

    queries = (
        """ 
        CREATE TABLE IF NOT EXISTS covid_data_greece (
            date date PRIMARY KEY,
            confirmed numeric(7, 0),
            recovered numeric(5, 0),
            deaths numeric(5, 0)
        )
        """
    )

    try:
        cursor = conn.cursor()
        for query in queries:
            cursor.execute(query)
        conn.commit()

        print(
            f"Table `covid_data_greece` created successfully!\n-----------------------------------------------"
        )

    except (Exception, psycopg2.DatabaseError) as error:
        print("Error while creating tables!\nRolling back changes...\n", error)
        conn.rollback()

    return cursor


def import_data(conn, cursor, table_name, df):

    buffer = StringIO()
    df.to_csv(buffer, header=False, index=False)
    buffer.seek(0)

    try:
        cursor.execute(f"TRUNCATE {table_name} CASCADE;")
        print(f"\nTruncated table: {table_name}")

        df.where(pd.notnull(df), None)

        cursor.copy_expert(f"COPY {table_name} from STDIN CSV QUOTE '\"'", buffer)
        conn.commit()
        print("Done!\n---------------------------")
        return df

    except (Exception, psycopg2.DatabaseError) as error:
        print(f"Error:\n{error}")
        conn.rollback()
        return -1


def age_data_stats(file):

    with open(file, "r") as f:
        age_groups = pd.read_json(file)
        age_groups = age_groups["total_age_groups"]
        columns = ["cases", "critical", "deaths"]
        age_groups = pd.DataFrame(age_groups.tolist()).T

    return age_groups.rename(columns={0: columns[0], 1: columns[1], 2: columns[2]})


def driver_code(json_path, record_path=None):

    with make_connection() as conn, open(json_path, "r") as f:
        data = json.loads(f.read())
        cursor = create_tables(conn)
        df = pd.json_normalize(data, record_path=record_path)

        if "date" in df.columns:
            df["date"] = pd.to_datetime(df["date"])

        import_data(conn, cursor, "covid_data_greece", df)
        cursor.close()

        return df


In [11]:
cases = driver_code('../Data/covid19-data-greece/data/greece/general/timeseries_greece.json', 'Greece')
cases.info()

Connecting to the PostgreSQL database...
Connection successful

Table `covid_data_greece` created successfully!
-----------------------------------------------

Truncated table: covid_data_greece
Done!
---------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 691 entries, 0 to 690
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   date       691 non-null    datetime64[ns]
 1   confirmed  691 non-null    int64         
 2   recovered  691 non-null    int64         
 3   deaths     691 non-null    int64         
dtypes: datetime64[ns](1), int64(3)
memory usage: 21.7 KB


In [12]:
file = "../Data/covid19-data-greece/data/greece/age_distribution/age_data_history.json"

with open(file, "r") as f:
    data = json.loads(f.read())
    ages = pd.json_normalize(data=data, sep="_")
    ages["date"] = pd.to_datetime(ages["date"])

cases = cases.merge(right=ages[ages.columns[:5]], how="outer", on="date").fillna(0)

for col in cases.columns[4:8]:
    cases[col] = cases[col].astype("int64")

cases["year_month"] = cases["date"].dt.strftime("%Y-%m")
grouped_cases = cases.groupby(by="year_month", as_index=False).sum()

grouped_cases["cases_unknown"] = grouped_cases["confirmed"] - grouped_cases[
    grouped_cases.columns[-4:]
].sum(axis=1)
grouped_cases.drop(labels="confirmed", axis=1, inplace=True)

grouped_cases.columns = grouped_cases.columns.str.replace("cases_", "")

In [13]:
melted = grouped_cases.melt(id_vars='year_month', value_vars=grouped_cases.columns[3:], var_name='cases')

In [18]:
def single_barchart(column):

    fig = px.bar(
        grouped_cases,
        x='year_month',
        y=column,
        title="Recovered COVID-19 Cases per Month",
        text=grouped_cases[column],
    )
    fig.update_traces(texttemplate="%{text:.2s}", textposition="outside")

    fig.update_layout(
        xaxis=dict(dtick="M1", tickformat="%b-%Y", tickangle=-45, title=""),
        yaxis=dict(title=column.title()),
        uniformtext_minsize=8,
        uniformtext_mode="hide",
        height=600,
    )

    fig.show()


In [19]:
single_barchart('recovered')

In [22]:
fig = px.bar(
    data_frame=melted,
    x="year_month",
    y="value",
    color="cases",
    title="Confirmed COVID-19 Cases per Month",
)

fig.update_layout(
    barmode="group",
    xaxis=dict(dtick="M1", tickformat="%b-%Y", tickangle=-45, title=""),
    width=1500,
    height=800
)

fig.show()


In [17]:
file = '../Data/covid19-data-greece/data/greece/age_distribution/age_data.json'
age_groups = age_data_stats(file)

fig = make_subplots(
    rows=1, cols=3, specs=[[{"type": "pie"}, {"type": "pie"}, {"type": "pie"}]]
)
titles = [
    "Age Distribution of COVID-19 Confirmed Cases",
    "Critical Cases per Age Group",
    "Death count",
]

for idx, col in enumerate(age_groups.columns):
    fig.add_trace(
        go.Pie(labels=age_groups.index, values=age_groups[col], title=titles[idx]),
        row=1,
        col=idx + 1,
    )

fig.show()
