### Notebook to combine .csv datasets into one sqlite database for Project 2

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# File to Load (Remember to Change These)
ebola_data_to_load = "Data/ebola_data.csv"
swineflu_data_to_load = "Data/swineflu_data-W.csv"
covid19_data_to_load = "Data/merged_covid_19_data.csv"
lookup_countries_to_load = "Data/country_lookup_table.csv"
centroid_data_to_load = "Data/clean_country_centroids_az8.csv"

# Read Population Data from "Estimates" sheet
ebola_data = pd.read_csv(ebola_data_to_load)
swineflu_data = pd.read_csv(swineflu_data_to_load)
covid19_data = pd.read_csv(covid19_data_to_load)
lookup_data = pd.read_csv(lookup_countries_to_load, encoding="ISO-8859-1")
centroid_data = pd.read_csv(centroid_data_to_load)

In [2]:
ebola_data.head()

Unnamed: 0,Pandemic,Country,Year,Cases,Deaths,Lon,Lat,population
0,Ebola,Guinea,2014,2397.0,1433.0,-10.940666,10.436216,11150970000000.0
1,Ebola,Guinea,2015,3351.0,2083.0,-10.940666,10.436216,11432100.0
2,Ebola,Guinea,2016,3351.0,2083.0,-10.940666,10.436216,11738.43
3,Ebola,Italy,2015,1.0,0.0,12.070013,42.796626,60578490.0
4,Ebola,Italy,2016,1.0,0.0,12.070013,42.796626,60663.07


In [3]:
covid19_data1 = covid19_data[['Pandemic', 'Country', 'Year', 'Cases', 'Deaths', 'Lon', 'Lat', 'population']]
covid19_data1

Unnamed: 0,Pandemic,Country,Year,Cases,Deaths,Lon,Lat,population
0,covid19,Afghanistan,2020,363,3,66.004734,33.835231,35383030.0
1,covid19,Albania,2020,851,26,20.049834,41.14245,2886427.0
2,covid19,Algeria,2020,1485,113,2.617323,28.158938,40551400.0
3,covid19,Andorra,2020,720,3,1.560544,42.542291,77295.0
4,covid19,Angola,2020,11,0,17.537368,-12.293361,28842480.0
5,covid19,Antigua and Barbuda,2020,16,0,,,94520.0
6,covid19,Argentina,2020,1707,41,-65.179807,-35.381349,43508460.0
7,covid19,Armenia,2020,1370,0,44.929933,40.289526,2936147.0
8,covid19,Aruba,2020,19,0,-69.982677,12.52088,104865.0
9,covid19,Australia,2020,11286,93,134.491,-25.732887,24262710.0


In [8]:
import sqlite3
import os

conn = sqlite3.connect('Data/Pandemic_data.db')
c = conn.cursor()

covid19_data1.to_sql('PANDEMICS', conn, if_exists='replace', index=False)
ebola_data.to_sql('PANDEMICS', conn, if_exists='append', index=False)
swineflu_data.to_sql('PANDEMICS', conn, if_exists='append', index=False)

In [9]:
for row in c.execute('SELECT * FROM PANDEMICS;'):
    print(row)

# Be sure to close the connection
c.close()

('covid19', 'Afghanistan', 2020, 363, 3, 66.00473366, 33.83523073, 35383028.0)
('covid19', 'Albania', 2020, 851, 26, 20.04983396, 41.14244989, 2886427.0)
('covid19', 'Algeria', 2020, 1485, 113, 2.61732301, 28.15893849, 40551398.0)
('covid19', 'Andorra', 2020, 720, 3, 1.56054378, 42.54229102, 77295.0)
('covid19', 'Angola', 2020, 11, 0, 17.53736768, -12.29336054, 28842482.000000004)
('covid19', 'Antigua and Barbuda', 2020, 16, 0, None, None, 94520.0)
('covid19', 'Argentina', 2020, 1707, 41, -65.17980692, -35.3813488, 43508459.0)
('covid19', 'Armenia', 2020, 1370, 0, 44.92993276, 40.28952569, 2936147.0)
('covid19', 'Aruba', 2020, 19, 0, -69.98267711, 12.52088038, 104865.0)
('covid19', 'Australia', 2020, 11286, 93, 134.49100008, -25.73288704, 24262710.0)
('covid19', 'Austria', 2020, 27808, 99, 14.1264761, 47.58549439, 8747306.0)
('covid19', 'Azerbaijan', 2020, 574, 12, 47.54599879, 40.28827235, 9736041.0)
('covid19', 'Bahamas', 2020, 26, 0, -76.62843038, 24.29036702, 377923.0)
('covid19', 