In [118]:
import sqlite3
import csv
import pandas as pd

In [119]:
# Read data file
df = pd.read_csv('COVID-19 Dataset.csv')

# Convert Date from DD-MM-YYYY to YYYY-MM-DD
df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y').dt.strftime('%Y-%m-%d')

# Create an SQLite database
conn = sqlite3.connect('covid.db')

# Load data file to SQLite
df.to_sql('covid_pandemic', conn, if_exists='replace')

78386

In [120]:
# Display rows containing null values
query1 = """
SELECT * FROM covid_pandemic 
WHERE "Country/Region" IS NULL 
OR Province IS NULL 
OR Latitude IS NULL 
OR Longitude IS NULL 
OR Date IS NULL 
OR Confirmed IS NULL 
OR Deaths IS NULL 
OR Recovered IS NULL
"""
results1 = pd.read_sql_query(query1, conn)
print(results1)

Empty DataFrame
Columns: [index, Province, Country/Region, Latitude, Longitude, Date, Confirmed, Deaths, Recovered]
Index: []


In [121]:
# Replace null values' rows with 0
query2 = """
UPDATE covid_pandemic
SET "Country/Region" = '0',
    Province = '0',
    Latitude = 0,
    Longitude = 0,
    Date = '0',
    Confirmed = 0,
    Deaths = 0,
    Recovered = 0
WHERE "Country/Region" IS NULL
   OR Province IS NULL
   OR Latitude IS NULL
   OR Longitude IS NULL
   OR Date IS NULL
   OR Confirmed IS NULL
   OR Deaths IS NULL
   OR Recovered IS NULL
"""
conn.execute(query2)
conn.commit()

In [122]:
# Check total number of rows
query3 = """
SELECT COUNT(*) as Total_number_of_rows FROM covid_pandemic
"""
count = pd.read_sql_query(query3, conn)
print(count)

   Total_number_of_rows
0                 78386


In [123]:
# start_date and end_date
query4 = """
SELECT MIN(Date) AS start_date, MAX(Date) AS end_date FROM covid_pandemic
"""
Dates = pd.read_sql_query(query4, conn)
print(Dates)

   start_date    end_date
0  2020-01-22  2021-06-13


In [124]:
# Number of months present in the dataset
query5 = """
SELECT COUNT(DISTINCT strftime('%Y-%m', Date)) AS number_of_months FROM covid_pandemic
"""
number_of_months = pd.read_sql_query(query5, conn)
print(number_of_months)

   number_of_months
0                18


In [125]:
# Monthly average for confirmed, deaths, and recovered
query5 = """
SELECT strftime('%Y-%m', Date) AS MonthYear,
AVG(Confirmed) AS average_confirmed,
AVG(Deaths) AS average_deaths,
AVG(Recovered) AS average_recovered
FROM covid_pandemic
GROUP BY MonthYear
ORDER BY MonthYear;
"""

monthly_average = pd.read_sql_query(query5, conn)
print(monthly_average)

   MonthYear  average_confirmed  average_deaths  average_recovered
0    2020-01           4.145455        0.123377           0.092857
1    2020-02          15.296014        0.593596           7.032020
2    2020-03         161.130289        8.660662          27.873900
3    2020-04         505.800433       41.522294         171.642208
4    2020-05         574.849811       30.280897         318.296397
5    2020-06         859.228139       29.817532         548.791558
6    2020-07        1432.361123       35.109552         983.058232
7    2020-08        1611.842899       37.536657        1299.294721
8    2020-09        1784.587446       34.777273        1438.906710
9    2020-10        2412.199623       36.758274        1420.643067
10   2020-11        3592.194372       56.763420        1985.344589
11   2020-12        4050.439673       71.218266        2497.885002
12   2021-01        3911.228530       84.183703        1919.636992
13   2021-02        2433.363636       69.164889        1558.39

In [126]:
# Find most frequent value for confirmed, deaths, recovered each month
query6 = """
WITH MonthlyDeathFrequency AS (
    SELECT
        strftime('%Y-%m', Date) AS Month,
        Deaths,
        COUNT(Deaths) AS FREQ,
        ROW_NUMBER() OVER (PARTITION BY strftime('%Y-%m', Date) ORDER BY COUNT(Deaths) DESC) AS rn
    FROM covid_pandemic
    GROUP BY Month, Deaths
),
MonthlyConfirmedFrequency AS (
    SELECT
        strftime('%Y-%m', Date) AS Month,
        Confirmed,
        COUNT(Confirmed) AS FREQ,
        ROW_NUMBER() OVER (PARTITION BY strftime('%Y-%m', Date) ORDER BY COUNT(Confirmed) DESC) AS rn
    FROM covid_pandemic
    GROUP BY Month, Confirmed
),
MonthlyRecoveredFrequency AS (
    SELECT
        strftime('%Y-%m', Date) AS Month,
        Recovered,
        COUNT(Recovered) AS FREQ,
        ROW_NUMBER() OVER (PARTITION BY strftime('%Y-%m', Date) ORDER BY COUNT(Recovered) DESC) AS rn
    FROM covid_pandemic
    GROUP BY Month, Recovered
)
SELECT 
    df.Month, 
    df.Deaths, 
    df.FREQ AS DeathsFreq, 
    cf.Confirmed, 
    cf.FREQ AS ConfirmedFreq, 
    rf.Recovered, 
    rf.FREQ AS RecoveredFreq
FROM MonthlyDeathFrequency df
JOIN MonthlyConfirmedFrequency cf ON df.Month = cf.Month AND cf.rn = 1
JOIN MonthlyRecoveredFrequency rf ON df.Month = rf.Month AND rf.rn = 1
WHERE df.rn = 1;

"""

frequentpermonth = pd.read_sql_query(query6, conn)
print(frequentpermonth)

      Month  Deaths  DeathsFreq  Confirmed  ConfirmedFreq  Recovered  \
0   2020-01       0        1530          0           1373          0   
1   2020-02       0        4374          0           3926          0   
2   2020-03       0        3815          0           2148          0   
3   2020-04       0        2512          0           1409          0   
4   2020-05       0        2779          0           1607          0   
5   2020-06       0        2745          0           1453          0   
6   2020-07       0        2714          0           1388          0   
7   2020-08       0        2535          0           1260          0   
8   2020-09       0        2349          0           1282          0   
9   2020-10       0        2355          0           1258          0   
10  2020-11       0        2170          0           1190          0   
11  2020-12       0        2180          0           1220          0   
12  2021-01       0        2120          0           1200       

In [127]:
# The minimum values for confirmed, deaths, recovered per year
query7 = """
SELECT strftime('%Y', Date) AS Year, MIN(confirmed) AS min_confirmations, MIN(deaths) AS min_deaths, MIN(recovered) AS min_recoveries
FROM covid_pandemic
GROUP BY Year
"""

min_per_year = pd.read_sql_query(query7, conn)
print(min_per_year)

   Year  min_confirmations  min_deaths  min_recoveries
0  2020                  0           0               0
1  2021                  0           0               0


In [128]:
# The maximum values of confirmed, deaths, recovered per year
query8 = """
SELECT strftime('%Y', Date) AS Year, MAX(confirmed) AS max_confirmations, MAX(deaths) AS max_deaths, MAX(recovered) AS max_recoveries
FROM covid_pandemic
GROUP BY Year
"""

max_per_year = pd.read_sql_query(query8, conn)
print(max_per_year)

   Year  max_confirmations  max_deaths  max_recoveries
0  2020             823225        3752         1123456
1  2021             414188        7374          422436


In [129]:
# The total number of case of confirmed, deaths, recovered each month
query9 = """
SELECT strftime('%Y-%m', Date) AS MonthYear, SUM(Confirmed) AS total_confirmed, SUM(Deaths) AS total_deaths, SUM(Recovered) AS total_recovered
FROM covid_pandemic
GROUP BY MonthYear
"""

number_per_month = pd.read_sql_query(query9, conn)
print(number_per_month)

   MonthYear  total_confirmed  total_deaths  total_recovered
0    2020-01             6384           190              143
1    2020-02            68312          2651            31405
2    2020-03           769236         41346           133070
3    2020-04          2336798        191833           792987
4    2020-05          2744333        144561          1519547
5    2020-06          3969634        137757          2535417
6    2020-07          6838092        167613          4693120
7    2020-08          7694938        179200          6202833
8    2020-09          8244794        160671          6647749
9    2020-10         11515841        175484          6782150
10   2020-11         16595938        262247          9172292
11   2020-12         19336799        339996         11924903
12   2021-01         18672205        401893          9164347
13   2021-02         10492664        298239          6719785
14   2021-03         13924790        282620          7888013
15   2021-04         217