In [1]:
import pandas as pd
from sqlalchemy import create_engine
import sqlite3
import json
import requests

## Part 1: Scrape Coronavirus Mainland China Data from wikipedia

In [11]:
url = 'https://en.wikipedia.org/wiki/Timeline_of_the_2019%E2%80%9320_coronavirus_outbreak#Case_statistics'

In [12]:
tables = pd.read_html(url)
# Since the 1st table on Wiki includes three tables, the new_confirmed table is the 4th.

### 1. New Confirmed Cases of Coronavirus in Mainland China by Provincial Divisions

In [25]:
# Since the 1st table on Wiki includes three tables, the new_confirmed table is the 39th.
# Check the infomation on in the Dataframe
df_China_new_confirmed_original = tables[38]
df_China_new_confirmed_original.info()
df_China_new_confirmed_original.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43 entries, 0 to 42
Data columns (total 41 columns):
Date (CST)                           43 non-null object
Hubei                                39 non-null object
Hubei(clinical)                      11 non-null object
Guangdong                            35 non-null object
Beijing                              34 non-null object
Shanghai                             34 non-null object
Zhejiang                             33 non-null object
Tianjin                              33 non-null object
Chongqing                            33 non-null object
Jiangxi                              33 non-null object
Shandong                             33 non-null object
Henan                                33 non-null object
Hunan                                33 non-null object
Sichuan                              33 non-null object
Yunnan                               33 non-null object
Shanxi                               32 non-null object
F

Unnamed: 0,Date (CST),Hubei,Hubei(clinical),Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,...,Qinghai,Tibet,National(confirmed),National(inclusive),ExcludingHubei[50],"Wuhan,Hubei","Wuhan,Hubei(clinical)",Hubei:outside Wuhan[51],Hubei:outside Wuhan(clinical)[51],Sources
0,2020-01-11,41.0,,,,,,,,,...,,,41.0,,,41.0,,,,[52][53]
1,2020-01-12,,,,,,,,,,...,,,,,,,,,,[52][54]
2,2020-01-13,,,,,,,,,,...,,,,,,,,,,[52][55]
3,2020-01-14,,,,,,,,,,...,,,,,,,,,,[52][56]
4,2020-01-15,,,,,,,,,,...,,,,,,,,,,[52][57]


In [26]:
# remove the last column of the dataframe
df_China_new_confirmed_c = df_China_new_confirmed_original.iloc[:,:-2]
# remove the last 2 rows of the dataframe
df_China_new_confirmed_r = df_China_new_confirmed_c[:-3]
# Check the tail to see if the row&column removed
df_China_new_confirmed_r.tail()

Unnamed: 0,Date (CST),Hubei,Hubei(clinical),Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,...,Gansu,InnerMongolia,Qinghai,Tibet,National(confirmed),National(inclusive),ExcludingHubei[50],"Wuhan,Hubei","Wuhan,Hubei(clinical)",Hubei:outside Wuhan[51]
35,2020-02-15,955,888,22,5,2,5,2,7,12,...,0,2,0,0,1121,2009,166,1548,1548,295
36,2020-02-16,1933,1933,6,1,3,4,2,7,5,...,0,2,0,0,2048,2048,115,1690,1690,243
37,2020-02-17,1807,1807,6,6,2,1,1,2,3,...,1,1,0,0,1886,1886,79,1600,1600,207
38,2020-02-18,1693,1693,3,6,0,1,3,2,1,...,0,2,0,0,1749,1749,56,1660,1660,33
39,2020-02-19,349,349,1,2,0,2,2,5,1,...,0,0,0,0,673,673,45,615,615,13


In [27]:
# # Rename the 1st column 
df_China_new_confirmed_r = df_China_new_confirmed_r.rename(columns={"Date (CST)" : "Date"})
# df_China_new_confirmed_r['Date']=df_China_new_confirmed_r['Date'].str.replace(".", "-")

df_China_new_confirmed = df_China_new_confirmed_r.copy()
# convert 'Date' column format to datetime
df_China_new_confirmed['Date'] = pd.to_datetime(df_China_new_confirmed['Date'], format= "%Y-%m-%d")
# df_China_new_confirmed.info()
# df_China_new_confirmed.head()

In [28]:
# Reset the index as the first column, this is for future use cumsum function
df_China_new_confirmed = df_China_new_confirmed.set_index(df_China_new_confirmed.columns[0])
# df_China_new_confirmed.head()

In [29]:
# fill N/A by 0
df_China_new_confirmed=df_China_new_confirmed.fillna(0)
# df_China_new_confirmed

In [30]:
df_China_new_confirmed_final = df_China_new_confirmed.astype(int) 
# df_China_new_confirmed_final.info()
# df_China_new_confirmed_final.tail()

* __The China Accumulated Confirmed Cases Table__ (Database)

In [32]:
# Sum the daliy confirmed patient
df_China_confirmed_final = df_China_new_confirmed_final.cumsum()
# Since the cumsum() function only works with integer then we change the index back as column
df_China_confirmed_final=df_China_confirmed_final.reset_index()
df_China_confirmed_final

Unnamed: 0,Date,Hubei,Hubei(clinical),Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,...,Gansu,InnerMongolia,Qinghai,Tibet,National(confirmed),National(inclusive),ExcludingHubei[50],"Wuhan,Hubei","Wuhan,Hubei(clinical)",Hubei:outside Wuhan[51]
0,2020-01-11,41,0,0,0,0,0,0,0,0,...,0,0,0,0,41,0,0,41,0,0
1,2020-01-12,41,0,0,0,0,0,0,0,0,...,0,0,0,0,41,0,0,41,0,0
2,2020-01-13,41,0,0,0,0,0,0,0,0,...,0,0,0,0,41,0,0,41,0,0
3,2020-01-14,41,0,0,0,0,0,0,0,0,...,0,0,0,0,41,0,0,41,0,0
4,2020-01-15,41,0,0,0,0,0,0,0,0,...,0,0,0,0,41,0,0,41,0,0
5,2020-01-16,45,0,0,0,0,0,0,0,0,...,0,0,0,0,45,0,0,45,0,0
6,2020-01-17,62,0,0,0,0,0,0,0,0,...,0,0,0,0,62,0,0,62,0,0
7,2020-01-18,121,0,0,0,0,0,0,0,0,...,0,0,0,0,121,0,0,121,0,0
8,2020-01-19,198,0,1,0,0,0,0,0,0,...,0,0,0,0,199,0,1,198,0,0
9,2020-01-20,270,0,14,5,2,0,0,0,0,...,0,0,0,0,291,0,21,258,0,12


* __The China Accumulated Confirmed Cases Transform Table__ (Database)

* __The China New Confirmed Cases Table__ (Database)

In [11]:
df_China_new_confirmed_final=df_China_new_confirmed_final.reset_index()
df_China_new_confirmed_final.tail()

Unnamed: 0,Date,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
21,2020-02-01,1921,69,27,8,62,8,15,47,19,...,8,34,6,15,15,2,5,4,2,0
22,2020-02-02,2103,121,29,26,63,11,50,58,34,...,9,35,8,26,8,4,11,7,2,0
23,2020-02-03,2345,114,16,5,105,4,25,85,11,...,13,37,11,34,14,5,4,1,2,0
24,2020-02-04,3156,73,25,25,66,9,29,72,28,...,9,33,13,35,23,3,2,7,2,0
25,2020-02-05,2987,74,21,21,59,2,23,52,45,...,22,32,5,37,8,4,5,0,1,0


### 2. New deaths from coronavirus in mainland China by provincial divisions
##### Repeat the same step as above

In [14]:
# Table of new death
df_China_new_deathes_original = tables[4]
df_China_new_deathes_original.tail()

Unnamed: 0,Date (CST),Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,...,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet,National
22,2020-02-02,56.0,,,,,,1.0,,,...,,,,,,,,,,57[55]
23,2020-02-03,64.0,,,,,,,,,...,,,,,,,,,,64[56]
24,2020-02-04,65.0,,,,,,,,,...,,,,,,,,,,65[57]
25,2020-02-05,71.0,,,,,1.0,,,,...,,,1.0,,,,,,,73[51]
26,Net,549.0,,1.0,1.0,,1.0,1.0,1.0,,...,,,3.0,,,,,,,563


In [15]:
# remove the last column of the dataframe
df_China_new_deathes_c = df_China_new_deathes_original.iloc[:,:-1]
# remove the last 2 rows of the dataframe
df_China_new_deathes_r = df_China_new_deathes_c[:-1]
# df_China_new_deathes_r.tail()

In [16]:
# # Rename the 1st column 
df_China_new_deathes_r = df_China_new_deathes_r.rename(columns={"Date (CST)" : "Date"})
# df_China_new_confirmed_r['Date']=df_China_new_confirmed_r['Date'].str.replace(".", "-")

df_China_new_deathes = df_China_new_deathes_r.copy()
# convert 'Date' column format to datetime
df_China_new_deathes['Date'] = pd.to_datetime(df_China_new_deathes['Date'], format= "%Y-%m-%d")

In [17]:
# Reset the index as the first column, this is for future use cumsum function
df_China_new_deathes = df_China_new_deathes.set_index(df_China_new_deathes.columns[0])

In [18]:
# fill N/A by 0
df_China_new_deathes=df_China_new_deathes.fillna(0)

In [19]:
df_China_new_deathes_final = df_China_new_deathes.astype(int) 

* __The China Accumulated Deathes Cases Table__ (Database)

In [20]:
# Sum the daliy confirmed patient
df_China_deathes_final = df_China_new_deathes_final.cumsum()
# Since the cumsum() function only works with integer then we change the index back as column
df_China_deathes_final=df_China_deathes_final.reset_index()
df_China_deathes_final.tail()

Unnamed: 0,Date,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
21,2020-02-01,294,0,1,1,0,0,0,1,0,...,1,0,0,2,0,0,0,0,0,0
22,2020-02-02,350,0,1,1,0,0,1,1,0,...,1,0,0,2,0,0,0,0,0,0
23,2020-02-03,414,0,1,1,0,0,1,1,0,...,1,0,0,2,0,0,0,0,0,0
24,2020-02-04,479,0,1,1,0,0,1,1,0,...,1,0,0,2,0,0,0,0,0,0
25,2020-02-05,550,0,1,1,0,1,1,1,0,...,1,0,0,3,0,0,0,0,0,0


* __The China New Deathes Cases Table__ (Database)

In [21]:
df_China_new_deathes_final=df_China_new_deathes_final.reset_index()
df_China_new_deathes_final.tail()

Unnamed: 0,Date,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
21,2020-02-01,45,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
22,2020-02-02,56,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
23,2020-02-03,64,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
24,2020-02-04,65,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
25,2020-02-05,71,0,0,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,0,0


### 3. New recovered cases of coronavirus in mainland China by provincial divisions
##### Repeat the same step as above

In [22]:
# Table of new recovered
df_China_new_recovered_original = tables[5]
df_China_new_recovered_original.tail()

Unnamed: 0,Date (CST),Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,...,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet,National
22,2020-02-02,80.0,2.0,3.0,,13.0,1.0,4.0,8.0,1.0,...,1.0,,,,,3.0,,,,147[55]
23,2020-02-03,101.0,6.0,11.0,,12.0,,2.0,1.0,1.0,...,1.0,,,1.0,,3.0,,,,157[58]
24,2020-02-04,125.0,12.0,1.0,2.0,15.0,1.0,5.0,8.0,6.0,...,5.0,,2.0,1.0,,1.0,,,,262[57]
25,2020-02-05,113.0,,,,,,,,,...,,,,,,,,,,261[51]
26,Net,633.0,32.0,23.0,12.0,63.0,2.0,14.0,27.0,13.0,...,13.0,1.0,4.0,2.0,,4.0,1.0,,,1153


In [23]:
# remove the last column of the dataframe
df_China_new_recovered_c = df_China_new_recovered_original.iloc[:,:-1]
# remove the last row of the dataframe
df_China_new_recovered_r = df_China_new_recovered_c[:-1]
# df_China_new_recovered_r.tail()

In [25]:
# # Rename the 1st column 
df_China_new_recovered_r = df_China_new_recovered_r.rename(columns={"Date (CST)" : "Date"})

df_China_new_recovered = df_China_new_recovered_r.copy()
# convert 'Date' column format to datetime
df_China_new_recovered['Date'] = pd.to_datetime(df_China_new_recovered['Date'], format= "%Y-%m-%d")

In [26]:
# Reset the index as the first column, this is for future use cumsum function
df_China_new_recovered = df_China_new_recovered.set_index(df_China_new_recovered.columns[0])

In [27]:
# fill N/A by 0
df_China_new_recovered=df_China_new_recovered.fillna(0)

In [28]:
df_China_new_recovered_final = df_China_new_recovered.astype(int) 

* __The China Accumulated Recovered Cases Table__ (Database)

In [29]:
# Sum the daliy recovered patient
df_China_recovered_final = df_China_new_recovered_final.cumsum()
# Since the cumsum() function only works with integer then we change the index back as column
df_China_recovered_final=df_China_recovered_final.reset_index()
df_China_recovered_final.tail()

Unnamed: 0,Date,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
21,2020-02-01,215,12,8,11,23,0,3,10,5,...,0,6,1,2,0,0,0,1,0,0
22,2020-02-02,295,14,11,11,36,1,7,18,6,...,3,7,1,2,0,0,3,1,0,0
23,2020-02-03,396,20,22,11,48,1,9,19,7,...,3,8,1,2,1,0,6,1,0,0
24,2020-02-04,521,32,23,13,63,2,14,27,13,...,4,13,1,4,2,0,7,1,0,0
25,2020-02-05,634,32,23,13,63,2,14,27,13,...,4,13,1,4,2,0,7,1,0,0


* __The China New Recovered Cases Table__ (Database)

In [30]:
df_China_new_recovered_final=df_China_new_recovered_final.reset_index()
df_China_new_recovered_final.tail()

Unnamed: 0,Date,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
21,2020-02-01,49,2,4,1,8,0,2,1,2,...,0,1,0,2,0,0,0,1,0,0
22,2020-02-02,80,2,3,0,13,1,4,8,1,...,3,1,0,0,0,0,3,0,0,0
23,2020-02-03,101,6,11,0,12,0,2,1,1,...,0,1,0,0,1,0,3,0,0,0
24,2020-02-04,125,12,1,2,15,1,5,8,6,...,1,5,0,2,1,0,1,0,0,0
25,2020-02-05,113,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Part 2: Import Coronavirus Mainland China Data Tables to SQLite

* __Table 1: The China Accumulated Confirmed Cases Table__ (Database)

### __Note : PK will add to sqlite by using "DB Broswer for SQLite"__

In [31]:
df_China_confirmed_final.columns

Index(['Date', 'Hubei', 'Guangdong', 'Beijing', 'Shanghai', 'Zhejiang',
       'Tianjin', 'Chongqing', 'Jiangxi', 'Shandong', 'Henan', 'Hunan',
       'Sichuan', 'Yunnan', 'Shanxi', 'Fujian', 'Liaoning', 'Hainan', 'Anhui',
       'Guizhou', 'Guangxi', 'Ningxia', 'Hebei', 'Jiangsu', 'Jilin',
       'Heilongjiang', 'Shaanxi', 'Xinjiang', 'Gansu', 'InnerMongolia',
       'Qinghai', 'Tibet'],
      dtype='object')

In [32]:
df_China_confirmed_final.head()

Unnamed: 0,Date,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
0,2020-01-11,41,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-01-12,41,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2020-01-13,41,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2020-01-14,41,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2020-01-15,41,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
# conn = sqlite3.connect('wuhan_pneumonia.sqlite')
# c = conn.cursor()
# c.execute("DROP TABLE IF EXISTS China_confirmed;")

In [None]:
# c.execute('''
#           CREATE TABLE China_confirmed
#           ([Date] TEXT PRIMARY KEY, 
#            [Hubei] INTEGER NOT NULL,
#            [Guangdong] INTEGER NOT NULL,
#            [Beijing] INTEGER NOT NULL,
#            [Shanghai] INTEGER NOT NULL, 
#            [Zhejiang] INTEGER NOT NULL,
#            [Tianjin] INTEGER NOT NULL, 
#            [Chongqing] INTEGER NOT NULL, 
#            [Jiangxi] INTEGER NOT NULL, 
#            [Shandong] INTEGER NOT NULL, 
#            [Henan] INTEGER NOT NULL, 
#            [Hunan] INTEGER NOT NULL,
#            [Sichuan] INTEGER NOT NULL, 
#            [Yunnan] INTEGER NOT NULL, 
#            [Shanxi] INTEGER NOT NULL, 
#            [Fujian] INTEGER NOT NULL, 
#            [Liaoning] INTEGER NOT NULL, 
#            [Hainan] INTEGER NOT NULL, 
#            [Anhui] INTEGER NOT NULL,
#            [Guizhou] INTEGER NOT NULL, 
#            [Guangxi] INTEGER NOT NULL, 
#            [Ningxia] INTEGER NOT NULL, 
#            [Hebei] INTEGER NOT NULL, 
#            [Jiangsu] INTEGER NOT NULL, 
#            [Jilin] INTEGER NOT NULL,
#            [Heilongjiang] INTEGER NOT NULL, 
#            [Shaanxi] INTEGER NOT NULL, 
#            [Xinjiang] INTEGER NOT NULL, 
#            [Gansu] INTEGER NOT NULL, 
#            [InnerMongolia] INTEGER NOT NULL,
#            [Qinghai] INTEGER NOT NULL, 
#            [Tibet] INTEGER NOT NULL
#            )
#           ''')
# conn.commit()
# conn.close()

In [33]:
engine = create_engine('sqlite:///wuhan_pneumonia.sqlite')
df_China_confirmed_final.to_sql(name='China_confirmed', con=engine, index=False , if_exists='replace')

In [34]:
df_confirmed_China = pd.read_sql_query('SELECT * FROM China_confirmed',engine)
df_confirmed_China.tail()

Unnamed: 0,Date,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
21,2020-02-01 00:00:00.000000,9074,604,183,177,661,45,262,333,225,...,104,236,23,95,116,20,40,27,11,1
22,2020-02-02 00:00:00.000000,11177,725,212,203,724,56,312,391,259,...,113,271,31,121,124,24,51,34,13,1
23,2020-02-03 00:00:00.000000,13522,839,228,208,829,60,337,476,270,...,126,308,42,155,138,29,55,35,15,1
24,2020-02-04 00:00:00.000000,16678,912,253,233,895,69,366,548,298,...,135,341,55,190,161,32,57,42,17,1
25,2020-02-05 00:00:00.000000,19665,986,274,254,954,71,389,600,343,...,157,373,60,227,169,36,62,42,18,1


* __Table 2: The China New Confirmed Cases Table__ (Database)

In [35]:
df_China_new_confirmed_final.to_sql('China_new_confirmed', engine, index=False,if_exists='replace')

In [36]:
df_new_confirmed_China = pd.read_sql_query('SELECT * FROM China_new_confirmed',engine)
df_new_confirmed_China.tail()

Unnamed: 0,Date,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
21,2020-02-01 00:00:00.000000,1921,69,27,8,62,8,15,47,19,...,8,34,6,15,15,2,5,4,2,0
22,2020-02-02 00:00:00.000000,2103,121,29,26,63,11,50,58,34,...,9,35,8,26,8,4,11,7,2,0
23,2020-02-03 00:00:00.000000,2345,114,16,5,105,4,25,85,11,...,13,37,11,34,14,5,4,1,2,0
24,2020-02-04 00:00:00.000000,3156,73,25,25,66,9,29,72,28,...,9,33,13,35,23,3,2,7,2,0
25,2020-02-05 00:00:00.000000,2987,74,21,21,59,2,23,52,45,...,22,32,5,37,8,4,5,0,1,0


* __Table 3: The China Accumulated Deathes Cases Table__ (Database)

In [37]:
df_China_deathes_final.to_sql('China_deathes', engine, index=False,if_exists='replace')

In [38]:
df_deathes_China = pd.read_sql_query('SELECT * FROM China_deathes',engine)
df_deathes_China.tail()

Unnamed: 0,Date,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
21,2020-02-01 00:00:00.000000,294,0,1,1,0,0,0,1,0,...,1,0,0,2,0,0,0,0,0,0
22,2020-02-02 00:00:00.000000,350,0,1,1,0,0,1,1,0,...,1,0,0,2,0,0,0,0,0,0
23,2020-02-03 00:00:00.000000,414,0,1,1,0,0,1,1,0,...,1,0,0,2,0,0,0,0,0,0
24,2020-02-04 00:00:00.000000,479,0,1,1,0,0,1,1,0,...,1,0,0,2,0,0,0,0,0,0
25,2020-02-05 00:00:00.000000,550,0,1,1,0,1,1,1,0,...,1,0,0,3,0,0,0,0,0,0


* __Table 4: The China New Deathes Cases Table__ (Database)

In [39]:
df_China_new_deathes_final.to_sql('China_new_deathes', engine, index=False,if_exists='replace')

In [40]:
df_new_deathes_China = pd.read_sql_query('SELECT * FROM China_new_deathes',engine)
df_new_deathes_China.tail()

Unnamed: 0,Date,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
21,2020-02-01 00:00:00.000000,45,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
22,2020-02-02 00:00:00.000000,56,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
23,2020-02-03 00:00:00.000000,64,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
24,2020-02-04 00:00:00.000000,65,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
25,2020-02-05 00:00:00.000000,71,0,0,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,0,0


* __Table 5: The China Accumulated Recovered Cases Table__ (Database)

In [41]:
df_China_recovered_final.to_sql('China_recovered', engine, index=False,if_exists='replace')

In [42]:
df_recovered_China = pd.read_sql_query('SELECT * FROM China_recovered',engine)
df_recovered_China.tail()

Unnamed: 0,Date,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
21,2020-02-01 00:00:00.000000,215,12,8,11,23,0,3,10,5,...,0,6,1,2,0,0,0,1,0,0
22,2020-02-02 00:00:00.000000,295,14,11,11,36,1,7,18,6,...,3,7,1,2,0,0,3,1,0,0
23,2020-02-03 00:00:00.000000,396,20,22,11,48,1,9,19,7,...,3,8,1,2,1,0,6,1,0,0
24,2020-02-04 00:00:00.000000,521,32,23,13,63,2,14,27,13,...,4,13,1,4,2,0,7,1,0,0
25,2020-02-05 00:00:00.000000,634,32,23,13,63,2,14,27,13,...,4,13,1,4,2,0,7,1,0,0


* __Table 6: The China New Recovered Cases Table__ (Database)

In [43]:
df_China_new_recovered_final.to_sql('China_new_recovered', engine, index=False,if_exists='replace')

In [44]:
df_new_recovered_China = pd.read_sql_query('SELECT * FROM China_new_recovered',engine)
df_new_recovered_China.tail()

Unnamed: 0,Date,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
21,2020-02-01 00:00:00.000000,49,2,4,1,8,0,2,1,2,...,0,1,0,2,0,0,0,1,0,0
22,2020-02-02 00:00:00.000000,80,2,3,0,13,1,4,8,1,...,3,1,0,0,0,0,3,0,0,0
23,2020-02-03 00:00:00.000000,101,6,11,0,12,0,2,1,1,...,0,1,0,0,1,0,3,0,0,0
24,2020-02-04 00:00:00.000000,125,12,1,2,15,1,5,8,6,...,1,5,0,2,1,0,1,0,0,0
25,2020-02-05 00:00:00.000000,113,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


* __Table 7: The China Transformed Table__ (Database)

In [45]:
df_confirmed_China['Date'] = df_confirmed_China['Date'].str[0:10]
df_confirmed_China = df_confirmed_China.set_index("Date")
df_confirmed_China.tail()

Unnamed: 0_level_0,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,Henan,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-01,9074,604,183,177,661,45,262,333,225,493,...,104,236,23,95,116,20,40,27,11,1
2020-02-02,11177,725,212,203,724,56,312,391,259,566,...,113,271,31,121,124,24,51,34,13,1
2020-02-03,13522,839,228,208,829,60,337,476,270,675,...,126,308,42,155,138,29,55,35,15,1
2020-02-04,16678,912,253,233,895,69,366,548,298,764,...,135,341,55,190,161,32,57,42,17,1
2020-02-05,19665,986,274,254,954,71,389,600,343,851,...,157,373,60,227,169,36,62,42,18,1


In [46]:
df_confirmed_China_stacked = df_confirmed_China.stack()
df_confirmed_China_stacked_new = pd.DataFrame([df_confirmed_China_stacked]).T
df_confirmed_China_stacked_new.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,0
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-02-05,Xinjiang,36
2020-02-05,Gansu,62
2020-02-05,InnerMongolia,42
2020-02-05,Qinghai,18
2020-02-05,Tibet,1


In [47]:
df_confirmed_China_stacked_final = df_confirmed_China_stacked_new.reset_index(level=[0,1])
df_confirmed_China_stacked_final.columns = ['Date', 'Province', 'Confirmed']
df_confirmed_China_final = df_confirmed_China_stacked_final.set_index('Province')
df_confirmed_China_final.tail()

Unnamed: 0_level_0,Date,Confirmed
Province,Unnamed: 1_level_1,Unnamed: 2_level_1
Xinjiang,2020-02-05,36
Gansu,2020-02-05,62
InnerMongolia,2020-02-05,42
Qinghai,2020-02-05,18
Tibet,2020-02-05,1


In [48]:
df_new_confirmed_China = pd.read_sql_query('SELECT * FROM China_new_confirmed',engine)

In [49]:
df_new_confirmed_China['Date'] = df_new_confirmed_China['Date'].str[0:10]
df_new_confirmed_China = df_new_confirmed_China.set_index("Date")
df_new_confirmed_China.tail()

Unnamed: 0_level_0,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,Henan,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-01,1921,69,27,8,62,8,15,47,19,71,...,8,34,6,15,15,2,5,4,2,0
2020-02-02,2103,121,29,26,63,11,50,58,34,73,...,9,35,8,26,8,4,11,7,2,0
2020-02-03,2345,114,16,5,105,4,25,85,11,109,...,13,37,11,34,14,5,4,1,2,0
2020-02-04,3156,73,25,25,66,9,29,72,28,89,...,9,33,13,35,23,3,2,7,2,0
2020-02-05,2987,74,21,21,59,2,23,52,45,87,...,22,32,5,37,8,4,5,0,1,0


In [50]:
df_new_confirmed_China_stacked = df_new_confirmed_China.stack()
df_new_confirmed_China_stacked_new = pd.DataFrame([df_new_confirmed_China_stacked]).T
df_new_confirmed_China_stacked_new.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,0
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-02-05,Xinjiang,4
2020-02-05,Gansu,5
2020-02-05,InnerMongolia,0
2020-02-05,Qinghai,1
2020-02-05,Tibet,0


In [51]:
df_new_confirmed_China_stacked_final = df_new_confirmed_China_stacked_new.reset_index(level=[0,1])
df_new_confirmed_China_stacked_final.columns = ['Date', 'Province', 'New Confirmed']
df_new_confirmed_China_final = df_new_confirmed_China_stacked_final.set_index('Province')
df_new_confirmed_China_final.tail()

Unnamed: 0_level_0,Date,New Confirmed
Province,Unnamed: 1_level_1,Unnamed: 2_level_1
Xinjiang,2020-02-05,4
Gansu,2020-02-05,5
InnerMongolia,2020-02-05,0
Qinghai,2020-02-05,1
Tibet,2020-02-05,0


In [52]:
df_deathes_China = pd.read_sql_query('SELECT * FROM China_deathes',engine)

In [53]:
df_deathes_China['Date'] = df_deathes_China['Date'].str[0:10]
df_deathes_China = df_deathes_China.set_index("Date")
df_deathes_China.tail()

Unnamed: 0_level_0,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,Henan,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-01,294,0,1,1,0,0,0,1,0,2,...,1,0,0,2,0,0,0,0,0,0
2020-02-02,350,0,1,1,0,0,1,1,0,2,...,1,0,0,2,0,0,0,0,0,0
2020-02-03,414,0,1,1,0,0,1,1,0,2,...,1,0,0,2,0,0,0,0,0,0
2020-02-04,479,0,1,1,0,0,1,1,0,2,...,1,0,0,2,0,0,0,0,0,0
2020-02-05,550,0,1,1,0,1,1,1,0,2,...,1,0,0,3,0,0,0,0,0,0


In [54]:
df_deathes_China_stacked = df_deathes_China.stack()
df_deathes_China_stacked_new = pd.DataFrame([df_deathes_China_stacked]).T
df_deathes_China_stacked_new.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,0
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-02-05,Xinjiang,0
2020-02-05,Gansu,0
2020-02-05,InnerMongolia,0
2020-02-05,Qinghai,0
2020-02-05,Tibet,0


In [55]:
df_deathes_China_stacked_final =df_deathes_China_stacked_new.reset_index(level=[0,1])
df_deathes_China_stacked_final.columns = ['Date', 'Province', 'Death']
df_deathes_China_final = df_deathes_China_stacked_final.set_index('Province')
df_deathes_China_final.tail()

Unnamed: 0_level_0,Date,Death
Province,Unnamed: 1_level_1,Unnamed: 2_level_1
Xinjiang,2020-02-05,0
Gansu,2020-02-05,0
InnerMongolia,2020-02-05,0
Qinghai,2020-02-05,0
Tibet,2020-02-05,0


In [56]:
df_new_deathes_China = pd.read_sql_query('SELECT * FROM China_new_deathes',engine)

In [57]:
df_new_deathes_China['Date'] = df_new_deathes_China['Date'].str[0:10]
df_new_deathes_China = df_new_deathes_China.set_index("Date")
df_new_deathes_China.tail()

Unnamed: 0_level_0,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,Henan,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-01,45,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-02-02,56,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-02-03,64,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-02-04,65,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-02-05,71,0,0,0,0,1,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0


In [58]:
df_new_deathes_China_stacked = df_new_deathes_China.stack()
df_new_deathes_China_stacked_new = pd.DataFrame([df_new_deathes_China_stacked]).T
df_new_deathes_China_stacked_new.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,0
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-02-05,Xinjiang,0
2020-02-05,Gansu,0
2020-02-05,InnerMongolia,0
2020-02-05,Qinghai,0
2020-02-05,Tibet,0


In [59]:
df_new_deathes_China_stacked_final =df_new_deathes_China_stacked_new.reset_index(level=[0,1])
df_new_deathes_China_stacked_final.columns = ['Date', 'Province', 'New Death']
df_new_deathes_China_final = df_new_deathes_China_stacked_final.set_index('Province')
df_new_deathes_China_final.tail()

Unnamed: 0_level_0,Date,New Death
Province,Unnamed: 1_level_1,Unnamed: 2_level_1
Xinjiang,2020-02-05,0
Gansu,2020-02-05,0
InnerMongolia,2020-02-05,0
Qinghai,2020-02-05,0
Tibet,2020-02-05,0


In [60]:
df_recovered_China = pd.read_sql_query('SELECT * FROM China_recovered',engine)

In [61]:
df_recovered_China['Date'] = df_recovered_China['Date'].str[0:10]
df_recovered_China = df_recovered_China.set_index("Date")
df_recovered_China.tail()

Unnamed: 0_level_0,Hubei,Guangdong,Beijing,Shanghai,Zhejiang,Tianjin,Chongqing,Jiangxi,Shandong,Henan,...,Hebei,Jiangsu,Jilin,Heilongjiang,Shaanxi,Xinjiang,Gansu,InnerMongolia,Qinghai,Tibet
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-01,215,12,8,11,23,0,3,10,5,3,...,0,6,1,2,0,0,0,1,0,0
2020-02-02,295,14,11,11,36,1,7,18,6,13,...,3,7,1,2,0,0,3,1,0,0
2020-02-03,396,20,22,11,48,1,9,19,7,19,...,3,8,1,2,1,0,6,1,0,0
2020-02-04,521,32,23,13,63,2,14,27,13,40,...,4,13,1,4,2,0,7,1,0,0
2020-02-05,634,32,23,13,63,2,14,27,13,40,...,4,13,1,4,2,0,7,1,0,0


In [62]:
df_recovered_China_stacked = df_recovered_China.stack()
df_recovered_China_stacked_new = pd.DataFrame([df_recovered_China_stacked]).T
df_recovered_China_stacked_new.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,0
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-02-05,Xinjiang,0
2020-02-05,Gansu,7
2020-02-05,InnerMongolia,1
2020-02-05,Qinghai,0
2020-02-05,Tibet,0


In [63]:
df_recovered_China_stacked_final =df_recovered_China_stacked_new.reset_index(level=[0,1])
df_recovered_China_stacked_final.columns = ['Date', 'Province', 'Recovered']
df_recovered_China_final = df_recovered_China_stacked_final.set_index('Province')
df_recovered_China_final.head()

Unnamed: 0_level_0,Date,Recovered
Province,Unnamed: 1_level_1,Unnamed: 2_level_1
Hubei,2020-01-11,2
Guangdong,2020-01-11,0
Beijing,2020-01-11,0
Shanghai,2020-01-11,0
Zhejiang,2020-01-11,0


In [64]:
df_new_recovered_China = pd.read_sql_query('SELECT * FROM China_new_recovered',engine)

In [65]:
df_new_recovered_China['Date'] = df_new_recovered_China['Date'].str[0:10]
df_new_recovered_China = df_new_recovered_China.set_index("Date")

In [66]:
df_new_recovered_China_stacked = df_new_recovered_China.stack()
df_new_recovered_China_stacked_new = pd.DataFrame([df_new_recovered_China_stacked]).T

In [67]:
df_new_recovered_China_stacked_final =df_new_recovered_China_stacked_new.reset_index(level=[0,1])
df_new_recovered_China_stacked_final.columns = ['Date', 'Province', 'New Recovered']
df_new_recovered_China_final = df_new_recovered_China_stacked_final.set_index('Province')
df_new_recovered_China_final.head()

Unnamed: 0_level_0,Date,New Recovered
Province,Unnamed: 1_level_1,Unnamed: 2_level_1
Hubei,2020-01-11,2
Guangdong,2020-01-11,0
Beijing,2020-01-11,0
Shanghai,2020-01-11,0
Zhejiang,2020-01-11,0


In [68]:
df_merge1 = pd.merge(df_confirmed_China_final, df_new_confirmed_China_final, on=['Province', 'Date'])
df_merge2 = pd.merge(df_deathes_China_final, df_new_deathes_China_final, on=['Province', 'Date'])
df_merge3 = pd.merge(df_recovered_China_final, df_new_recovered_China_final, on=['Province', 'Date'])
df_merge4 = pd.merge(df_merge1, df_merge2, on=['Province', 'Date'])
df_merge5 = pd.merge(df_merge4, df_merge3, on=['Province', 'Date'])

In [69]:
df_merge5 = df_merge5.reset_index()
df_merge5['Country'] ='China'
df_china_current_table = df_merge5[['Date','Province','Country','Confirmed','New Confirmed','Death','New Death','Recovered','New Recovered']]
df_china_current_table

Unnamed: 0,Date,Province,Country,Confirmed,New Confirmed,Death,New Death,Recovered,New Recovered
0,2020-01-11,Hubei,China,41,41,1,1,2,2
1,2020-01-11,Guangdong,China,0,0,0,0,0,0
2,2020-01-11,Beijing,China,0,0,0,0,0,0
3,2020-01-11,Shanghai,China,0,0,0,0,0,0
4,2020-01-11,Zhejiang,China,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
801,2020-02-05,Xinjiang,China,36,4,0,0,0,0
802,2020-02-05,Gansu,China,62,5,0,0,7,0
803,2020-02-05,InnerMongolia,China,42,0,0,0,1,0
804,2020-02-05,Qinghai,China,18,1,0,0,0,0


In [70]:
df_china_current_table=df_china_current_table.rename(columns={"New Confirmed": "NewConfirmed", "New Death": "NewDeath","New Recovered":"NewRecorvered"})

In [71]:
df_china_current_table.to_sql('China_current_final', engine, index=True,if_exists='replace')

In [72]:
df_China_transformed = pd.read_sql_query('SELECT * FROM China_current_final',engine)
df_China_transformed.tail()

Unnamed: 0,index,Date,Province,Country,Confirmed,NewConfirmed,Death,NewDeath,Recovered,NewRecorvered
801,801,2020-02-05,Xinjiang,China,36,4,0,0,0,0
802,802,2020-02-05,Gansu,China,62,5,0,0,7,0
803,803,2020-02-05,InnerMongolia,China,42,0,0,0,1,0
804,804,2020-02-05,Qinghai,China,18,1,0,0,0,0
805,805,2020-02-05,Tibet,China,1,0,0,0,0,0


### Part 4

In [73]:
df_wuhan_sex_death = pd.read_csv("data/wuhan.csv")

In [74]:
bins = [1,10,20,30,40,50,60,70,80,90,100]
sex_groups = df_wuhan_sex_death.groupby(['Sex', pd.cut(df_wuhan_sex_death.Age, bins)])
df_sex_death = sex_groups.size()

In [75]:
df_sex_death = pd.DataFrame(df_sex_death)
df_sex_death = df_sex_death.reset_index(level=[0,1])

In [76]:
new_df_Female_death = df_sex_death.head(4)
new_df_Male_death = df_sex_death.tail(6)
new_df_final_sex_death = pd.merge(new_df_Male_death, new_df_Female_death,on='Age', how="outer")

In [77]:
new_df_final_sex_death['Sex_y'] = "Female"
new_df_final_sex_death['0_y'].fillna(0, inplace=True)
new_df_final_sex_death['Total'] = new_df_final_sex_death['0_x']+new_df_final_sex_death['0_y']
new_df_final_sex_death = new_df_final_sex_death.rename(columns={"Sex_x": "Male", 
                                                                "0_x": "Male_death", 
                                                                "Sex_y": "Female",
                                                                "0_y": "Female_death",
                                                                "Total": "Total_death"})

In [78]:
new_df_final_sex_death = new_df_final_sex_death[['Age','Female','Female_death','Male',"Male_death","Total_death"]]
pd.options.display.float_format = '{:,.0f}'.format

In [79]:
new_df_final_sex_death.index = ['30-40','40-50','50-60','60-70','70-80','80-90']
new_df_final_sex_death = new_df_final_sex_death[['Female','Female_death','Male',"Male_death","Total_death"]]
new_df_final_sex_death = new_df_final_sex_death.reset_index()
new_df_final_sex_death = new_df_final_sex_death.rename(columns={"index": "Age"})
new_df_final_sex_death

Unnamed: 0,Age,Female,Female_death,Male,Male_death,Total_death
0,30-40,Female,0,Male,2,2
1,40-50,Female,1,Male,1,2
2,50-60,Female,0,Male,4,4
3,60-70,Female,6,Male,9,15
4,70-80,Female,5,Male,8,13
5,80-90,Female,3,Male,9,12


In [80]:
# export df to sqlite
engine = create_engine('sqlite:///wuhan_pneumonia.sqlite')
new_df_final_sex_death.to_sql('Sex_Death', engine, index=False, if_exists='replace')

In [81]:
new_df_final_sex_death = pd.read_sql_query('SELECT * FROM Sex_Death',engine)
new_df_final_sex_death

Unnamed: 0,Age,Female,Female_death,Male,Male_death,Total_death
0,30-40,Female,0,Male,2,2
1,40-50,Female,1,Male,1,2
2,50-60,Female,0,Male,4,4
3,60-70,Female,6,Male,9,15
4,70-80,Female,5,Male,8,13
5,80-90,Female,3,Male,9,12
