In [1]:
#Imports
import requests
import pandas as pd
from pandas import json_normalize
import mysql.connector
import pymysql
from sqlalchemy import create_engine
import warnings
from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

## COVID-19 Daily Summary Statistics by Country

In [2]:
#Fetching the daily covid data from Postman API
api = r'https://api.covid19api.com/summary'
r = requests.get(api)

In [3]:
#Flattening JSON to Dataframe
daily_df = json_normalize(r.json(), record_path = 'Countries')

## Contry Data

In [4]:
daily_df['country_id'] = daily_df.groupby(['Country']).ngroup()

In [5]:
country_data = daily_df[['Country', 'country_id']]

In [6]:
region_data = pd.read_csv('countries.csv')

In [7]:
country_data = pd.merge(country_data, region_data, how = 'left', on='Country', indicator = True)

In [8]:
for country in zip(country_data[country_data['_merge'] == 'left_only']['Country'], ['Congo', 'Congo, Democratic Republic of', 'Ivory Coast', 'Vatican City', 'Laos', 'Syria', 'Venezuela', 'Vietnam']):
    country_data['Country'] = country_data['Country'].replace(country[0], country[1])

In [9]:
country_data = country_data.drop(['Continent'], axis = 1)
country_data = pd.merge(country_data, region_data, how = 'left', on='Country')

## COVID-19 Vaccinations By Country

In [10]:
vaccines_url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv'
vaccinations = pd.read_csv(vaccines_url)

In [11]:
vaccinations = vaccinations[~vaccinations.location.isin(
                            ['World','Asia', 'Europe', 'European Union', 'Africa', 'North America', 'South America',
                             'Oceania', 'Cayman Islands', 'Gibraltar', 'Bermuda', 'Isle of Man', 'Guernsey',
                             'Faeroe Islands', 'Jersey', 'Saint Helena', 'Anguilla', 'Macao', 'Greenland', 'Montserrat',
                             'Turks and Caicos Islands', 'Hong Kong', 'Falkland Islands', 'Northern Cyprus'])]

In [12]:
for country in zip(['Russia', 'United States', 'Czechia', 'South Korea', "Cote d'Ivoire", 'Iran'], ['Russian Federation', 'United States of America', 'Czech Republic', 'Korea (South)', 'Ivory Coast', 'Iran, Islamic Republic of']):
    vaccinations['location'] = vaccinations['location'].replace(country[0], country[1])

In [14]:
vaccinations = pd.merge(vaccinations, country_data[['Country', 'country_id']], left_on = 'location', right_on = 'Country', indicator = True)

In [15]:
vaccinations['country_id'].loc[vaccinations[vaccinations['location'].isin(['England', 'Wales', 'Scotland', 'Northern Ireland'])].index] = 180

## Data Cleaning/Wrangling

In [16]:
daily_df = daily_df.drop(['ID', 'Country', 'Slug', 'NewRecovered', 'TotalRecovered', 'CountryCode'], axis = 1)
daily_df.columns = ['new_confirmed', 'total_confirmed', 'new_deaths', 'total_deaths', 'date', 'country_id']
daily_df['date'] = daily_df['date'].apply(lambda x: x[0:10])

In [17]:
country_data = country_data.drop(['_merge'], axis = 1)
country_data.columns = ['country', 'country_id', 'region']

In [18]:
vaccinations = vaccinations.drop(['iso_code', 'daily_vaccinations_raw', 'location', 'Country', '_merge'], axis = 1)

## Storing the data in a MySQL database

In [19]:
hostname = "localhost"
uname = "***"
pwd = "***"
dbname = "covid_data"

In [20]:
engine = create_engine("mysql+pymysql://{user}:{pw}@{host}/{db}"
                .format(host=hostname, db=dbname, user=uname, pw=pwd))

In [21]:
connection = engine.connect()

In [22]:
connection.execute("CREATE DATABASE IF NOT EXISTS covid_data")

<sqlalchemy.engine.result.ResultProxy at 0x1f44bb3cb00>

In [23]:
connection.execute('''
CREATE TABLE IF NOT EXISTS daily_data (
    country_id INTEGER NOT NULL,
    date DATETIME NOT NULL,
    new_confirmed INTEGER,
    total_confirmed INTEGER NOT NULL,
    new_deaths INTEGER,
    total_deaths INTEGER NOT NULL
)
''')

<sqlalchemy.engine.result.ResultProxy at 0x1f44bf31ef0>

In [24]:
connection.execute('''
CREATE TABLE IF NOT EXISTS countries (
    country_id INTEGER NOT NULL,
    country VARCHAR(50) NOT NULL,
    region VARCHAR(50) NOT NULL,
    PRIMARY KEY (country_id)
)
''')

<sqlalchemy.engine.result.ResultProxy at 0x1f44be70f28>

In [25]:
connection.execute('''
CREATE TABLE IF NOT EXISTS vaccinations (
    country_id INTEGER NOT NULL,
    date DATETIME NOT NULL,
    total_vaccinations INTEGER,
    people_vaccinated INTEGER,
    people_fully_vaccinated INTEGER,
    daily_vaccinations INTEGER,
    total_vaccinations_per_hundred DECIMAL(5,2),
    people_vaccinated_per_hundred DECIMAL(5,2),
    people_fully_vaccinated_per_hundred DECIMAL(5,2),
    daily_vaccinations_per_million DECIMAL(9,2)
)
''')

<sqlalchemy.engine.result.ResultProxy at 0x1f44be70e10>

In [26]:
daily_df.to_sql(con = engine, name = 'daily_data', index = False, if_exists='append')

In [27]:
connection.execute('ALTER TABLE daily_data ADD id SERIAL PRIMARY KEY')
connection.execute('ALTER TABLE daily_data CHANGE id id SERIAL NOT NULL FIRST')

<sqlalchemy.engine.result.ResultProxy at 0x1f44bf6ba90>

In [28]:
country_data.to_sql(con = engine, name = 'countries', index = False, if_exists = 'replace')

In [29]:
vaccinations.to_sql(con = engine, name = 'vaccinations', index = False, if_exists = 'append')

In [30]:
connection.execute('ALTER TABLE vaccinations ADD id SERIAL PRIMARY KEY')
connection.execute('ALTER TABLE vaccinations CHANGE id id SERIAL NOT NULL FIRST')

<sqlalchemy.engine.result.ResultProxy at 0x1f44bfa00b8>

In [31]:
connection.close()