### Notebook to combine .csv datasets into one sqlite database for Project 2

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import sqlite3
import os
import json

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# File to Load (Remember to Change These)
ebola_data_to_load = "Data/ebola_data.csv"
swineflu_data_to_load = "Data/swineflu_data-W.csv"
covid19_data_to_load = "Data/merged_covid_19_data.csv"
lookup_countries_to_load = "Data/country_lookup_table.csv"
centroid_data_to_load = "Data/clean_country_centroids_az8.csv"

# Read Population Data from "Estimates" sheet
ebola_data = pd.read_csv(ebola_data_to_load)
swineflu_data = pd.read_csv(swineflu_data_to_load)
covid19_data = pd.read_csv(covid19_data_to_load)
lookup_data = pd.read_csv(lookup_countries_to_load, encoding="ISO-8859-1")
centroid_data = pd.read_csv(centroid_data_to_load)

In [2]:
covid19_data1 = covid19_data[['Pandemic', 'Country', 'Year', 'Cases', 'Deaths', 'Lon', 'Lat', 'population']]
covid19_data1

Unnamed: 0,Pandemic,Country,Year,Cases,Deaths,Lon,Lat,population
0,Covid19,Afghanistan,2020,1051,25,66.004734,33.835231,35383030.0
1,Covid19,Albania,2020,1989,76,20.049834,41.14245,2886427.0
2,Covid19,Algeria,2020,4112,280,2.617323,28.158938,40551400.0
3,Covid19,Andorra,2020,2411,27,1.560544,42.542291,77295.0
4,Covid19,Angola,2020,41,4,17.537368,-12.293361,28842480.0
5,Covid19,Antigua and Barb.,2020,54,0,-61.794693,17.2775,1000.0
6,Covid19,Argentina,2020,5440,131,-65.179807,-35.381349,43508460.0
7,Covid19,Armenia,2020,3567,9,44.929933,40.289526,2936147.0
8,Covid19,Aruba,2020,19,0,-69.982677,12.52088,104865.0
9,Covid19,Australia,2020,31588,174,134.491,-25.732887,24262710.0


In [3]:
frames = [covid19_data1, ebola_data, swineflu_data]

combined_data = pd.concat(frames)
combined_data.reset_index(drop=True, inplace=True)
combined_data

Unnamed: 0,Pandemic,Country,Year,Cases,Deaths,Lon,Lat,population
0,Covid19,Afghanistan,2020,1051,25,66.004734,33.835231,35383030.0
1,Covid19,Albania,2020,1989,76,20.049834,41.14245,2886427.0
2,Covid19,Algeria,2020,4112,280,2.617323,28.158938,40551400.0
3,Covid19,Andorra,2020,2411,27,1.560544,42.542291,77295.0
4,Covid19,Angola,2020,41,4,17.537368,-12.293361,28842480.0
5,Covid19,Antigua and Barb.,2020,54,0,-61.794693,17.2775,1000.0
6,Covid19,Argentina,2020,5440,131,-65.179807,-35.381349,43508460.0
7,Covid19,Armenia,2020,3567,9,44.929933,40.289526,2936147.0
8,Covid19,Aruba,2020,19,0,-69.982677,12.52088,104865.0
9,Covid19,Australia,2020,31588,174,134.491,-25.732887,24262710.0


In [4]:

conn = sqlite3.connect('Data/pandemic_data.sqlite')
c = conn.cursor()

#covid19_data1.to_sql('pandemics', conn, if_exists='replace', index=True)
#ebola_data.to_sql('pandemics', conn, if_exists='append', index=True)
#swineflu_data.to_sql('pandemics', conn, if_exists='append', index=True)
combined_data.to_sql('pandemics', conn, if_exists='append', index=False)

In [5]:
c.execute('SELECT * FROM pandemics;')
data = c.fetchall()
json_data = json.dumps(data)
print(json_data)

# Be sure to close the connection
c.close()

[[1, "Covid19", "Afghanistan", 2020, 1051, 25, 66.00473366, 33.83523073, 35383028], [2, "Covid19", "Albania", 2020, 1989, 76, 20.04983396, 41.14244989, 2886427], [3, "Covid19", "Algeria", 2020, 4112, 280, 2.61732301, 28.15893849, 40551398], [4, "Covid19", "Andorra", 2020, 2411, 27, 1.56054378, 42.54229102, 77295], [5, "Covid19", "Angola", 2020, 41, 4, 17.53736768, -12.29336054, 28842482.000000004], [6, "Covid19", "Antigua and Barb.", 2020, 54, 0, -61.79469343, 17.277499600000002, 1000], [7, "Covid19", "Argentina", 2020, 5440, 131, -65.17980692, -35.3813488, 43508459], [8, "Covid19", "Armenia", 2020, 3567, 9, 44.92993276, 40.28952569, 2936147], [9, "Covid19", "Aruba", 2020, 19, 0, -69.98267711, 12.52088038, 104865], [10, "Covid19", "Australia", 2020, 31588, 174, 134.49100008, -25.73288704, 24262710], [11, "Covid19", "Austria", 2020, 74639, 498, 14.1264761, 47.58549439, 8747306], [12, "Covid19", "Azerbaijan", 2020, 1618, 32, 47.54599879, 40.28827235, 9736041], [13, "Covid19", "Bahamas", 