# Covid-19 Dashboard Project
## Python, SQL and Tableau: Data Extraction, Key Metrics, Dashboard
- Data Source: Our World In Data 
- Dataset: Covid-19 
- Export Option: SQL database
- Data Transformations: format data types (string, datetime, float)
- Future changes: Fill in NA values in agg. columns with cummulative values

In [1]:
# imports
import pandas as pd
import os
from datetime import date, timedelta


In [2]:
# Setup
os.chdir('/home/jovyan/work')

uri = 'https://github.com/owid/covid-19-data/blob/master/public/data/owid-covid-data.csv?raw=true'

In [3]:
# check if last updated is 24 hrs old
def is24hr_old(filename):
    epoch = os.path.getmtime(f'data/{filename}.csv')
    org = date.fromtimestamp(epoch)
    is_old = org + timedelta(days=1) < date.today()
    return is_old

# Cache dataset to local csv and refresh if older than 1 day
def refresh_data(filename):
    if f'{filename}.csv' not in os.listdir('data') or is24hr_old(filename):
        df = pd.read_csv(uri)
        df.to_csv(f'data/{filename}.csv')    
    else:
        df = pd.read_csv(f'data/{filename}.csv')
        
    return df

In [4]:
df = refresh_data('owid-covid')
df.columns

Index(['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases',
       'new_cases_smoothed', 'total_deaths', 'new_deaths',
       'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients',
       'icu_patients_per_million', 'hosp_patients',
       'hosp_patients_per_million', 'weekly_icu_admissions',
       'weekly_icu_admissions_per_million', 'weekly_hosp_admissions',
       'weekly_hosp_admissions_per_million', 'total_tests', 'new_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'positive_rate', 'tests_per_case', 'tests_units', 'total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated', 'total_boosters',
       'new_vaccinations', 'new_vaccinations_smoothed',
       't

In [6]:
# transformation
dft = df.fillna(0)
dft.iso_code = df.iso_code.astype('string')
dft.continent = df.continent.astype('string')
dft.location = df.location.astype('string')
dft.date = pd.to_datetime(dft.date)
dft.tests_units = df.tests_units.astype('string')
dft.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 216578 entries, 0 to 216577
Data columns (total 67 columns):
 #   Column                                      Non-Null Count   Dtype         
---  ------                                      --------------   -----         
 0   iso_code                                    216578 non-null  string        
 1   continent                                   204113 non-null  string        
 2   location                                    216578 non-null  string        
 3   date                                        216578 non-null  datetime64[ns]
 4   total_cases                                 216578 non-null  float64       
 5   new_cases                                   216578 non-null  float64       
 6   new_cases_smoothed                          216578 non-null  float64       
 7   total_deaths                                216578 non-null  float64       
 8   new_deaths                                  216578 non-null  float64      

In [11]:
from sqlalchemy import create_engine
engine = create_engine('postgresql://postgres:headband@192.168.0.151:5432/mydb')

In [13]:
def update_server(table_name):
    try:
        dft.to_sql(table_name,con=engine,index=False,if_exists='replace')
        print('Successfully updated server')
    except:
        print('Got an error while updating server')

In [14]:
update_server('owid_covid')