In [2]:
import os
import pandas as pd
import re
from datetime import date, datetime, timedelta

In [3]:
currentPath = os.getcwd()
currentPath 

'/Users/pipika/Documents/FIT5147'

# Daily reports: 

## check the column names and missing columns
Following steps will only read the files ending with `-2020.csv` and will generate all the column names used by those files. 

With the column names generated, in case some files don't have required column, check if any files need any actions. 

In [4]:
# Step 1
dailyReports = []       # Store all the file names of sourced daily reports ** WILL USE IT ALL THE TIME **
columnNames = []        # Store all the column names of every daily reports

# With the column names in step 1
# Step 2
country = []            # Store all the file names that don't have a 'country/region' column
confirmed = []          # Store all the file names that don't have a 'confirmed' column
deaths = []             # Store all the file names that don't have a 'deaths' column
recovered = []          # Store all the file names that don't have a 'recovered' column
active = []             # Store all the file names that don't have a 'active' column


# Input folder
folderName = 'Datasets/csse_covid_19_daily_reports'
folderPath = os.path.join(currentPath, folderName)

# Generate a list with all the file names under the directory
for curDir, dirList, nameList in os.walk(folderPath, topdown=True):
    
    # fileName in MM-DD-YYYY.csv
    for fileName in nameList:  
        
        # ----------------------------------------------------------------------------------------------
        # Step 1: Saving file names & checking column names
        # ----------------------------------------------------------------------------------------------
        
        # Only use 2020.csv data
        if fileName.endswith('-2020.csv') is True: 
            
            # Store file names in the list
            dailyReports.append(fileName)
             
            # Read daily report one by one
            df = pd.read_csv(os.path.join(curDir, fileName))

            # Add unique column names into list
            for col in df.columns:
                if col not in columnNames:
                    columnNames.append(col)  
            
            # ----------------------------------------------------------------------------------------------
            # Step 2: Check missing columns
            # ----------------------------------------------------------------------------------------------

            if ('Country/Region' not in df.columns) and ('Country_Region' not in df.columns):
                country.append(fileName)

            if 'Confirmed' not in df.columns:
                confirmed.append(fileName)

            if 'Deaths' not in df.columns:
                deaths.append(fileName)

            if 'Recovered' not in df.columns:
                recovered.append(fileName)

            if 'Active' not in df.columns:
                active.append(fileName)

columnNames, len(country), len(confirmed), len(deaths), len(recovered), len(active)

(['Province/State',
  'Country/Region',
  'Last Update',
  'Confirmed',
  'Deaths',
  'Recovered',
  'FIPS',
  'Admin2',
  'Province_State',
  'Country_Region',
  'Last_Update',
  'Lat',
  'Long_',
  'Active',
  'Combined_Key',
  'Incidence_Rate',
  'Case-Fatality_Ratio',
  'Incident_Rate',
  'Case_Fatality_Ratio',
  'Latitude',
  'Longitude'],
 0,
 0,
 0,
 0,
 60)

Data of different state/provinces group by its country will be add up. This project will be looking at country based data. Thus, longitude and latitude cannot be used, it will be combined with other dataset.

Case Fatality Ratio not needed for answering Q1, although this concept is needed in Q2, its better to have an ongoing calculation. Will be solved later.

Thus, the columns will be used are: 
- Active
- Confirmed
- Deaths
- Recovered
- **Country/Region** / Country_Region

Some files missing `active` columns. Active cases is a dependent value, can be calculated and add in the value directly in the next step. 

# Output 1: `/Modified_Datasets/1_cleanedDailyReports`
In this step, wrangle the downloaded dataset to a new folder called ***`1_cleanedDailyReports`***. The actions are:
1. Only keep files in 2020
2. Drop duplicate rows if there is any
3. Unified column names: Country/Region / Country_Region to `Country/Region`
4. Fill in na value with default value
    - 'Confirmed','Recovered','Death'   -> 0
    - 'Country/Region'                  -> None
5. Add `Active` column if there isn't any
    - 'Active' = 'Confirmed'-'Recovered'-'Deaths'
6. Drop unrelated columns, only keep these columns:
    - Country/Region
    - Confirmed
    - Deaths
    - Recovered
    - Active
7. Group by 'Country/Region' column and sum other values, save to new dataframe 'group'
8. Add `Date` column from its own filename in form 'YYYY-MM-DD'
9. Write to csv files with its original file name

In [5]:
# Recognize date value, read date from file name, change form and fill in columns
from datetime import date, datetime, timedelta

# Make an output directory
output1Path = os.path.join(currentPath,'Modified_Datasets/1_cleanedDailyReports')
if not os.path.exists(output1Path):
    os.makedirs(output1Path)

# Input folder
folderName = 'Datasets/csse_covid_19_daily_reports'
folderPath = os.path.join(currentPath, folderName)
    
# Daily reports in 2020
for fileName in dailyReports: 

    # Read daily report one by one
    df = pd.read_csv(os.path.join(folderPath, fileName))
    
    # Drop duplicate rows 
    df = df.drop_duplicates()
        
    # Unified different column names
    df = df.rename(columns = {'Country_Region': 'Country/Region'})
    
    # Add 0 for numeric columns and None for str columns
    df[['Confirmed','Recovered','Deaths']] = df[['Confirmed','Recovered','Deaths']].fillna(0)
    df[['Country/Region']] = df[['Country/Region']].fillna('None')

    # Add 'Active' column if original file don't have one
    if 'Active' not in df.columns:
        df['Active'] = df['Confirmed'] - df['Recovered'] - df['Deaths'] 
        
    # Select the column wanted
    df = df.loc[:, df.columns.isin(['Country/Region', 'Confirmed', 'Deaths', 'Recovered', 'Active'])]
    
    # Add up all the total cases of each country
    group = df.groupby('Country/Region').sum()
      
    # Add 'Date' column 
    # Recognize date in MM-DD-YYYY form and shift to 'YYYY-MM-DD' form
    reportDate = datetime.strptime(os.path.splitext(fileName)[0],'%m-%d-%Y').date().strftime('%Y-%m-%d')
    group['Date'] = reportDate
      
    # Generate the file path
    filePath = os.path.join(output1Path, fileName)
    
    # Write the file back, keep the index as 'country' recognized as index
    group.to_csv(filePath, index = True)

# Combine with geographical coordinate dataset

## Check for country names in all the cleaned files in `1_cleanedDailyReports`
Generate a unique country name list.

In [6]:
# Input folder
folderName = 'Modified_Datasets/1_cleanedDailyReports'
folderPath = os.path.join(currentPath, folderName)

countryName = []        # Store all the file names of sourced daily reports 

for fileName in dailyReports:

    # Read daily report one by one
    df = pd.read_csv(os.path.join(folderPath, fileName))
    
    # Add country names into list
    for country in df['Country/Region']:
        if country not in countryName:
            countryName.append(country) 
countryName.sort()
len(countryName)

241

## Read `countries.csv` and check for unmatching naming
A `geo` list was initialized which will be act like a data frame structure, will be storing all the rows that will be put in the final `geoCoor` data frame. 

With unmatching country naming, will be resolve in two different ways:
- With the different calling of country names in the downloaded dataset, change and unified them within the file (read, change, write back to `1_cleanedDailyReports` folder.
    - Check matching country names directly `==` -> locate df with condition, `.tolist()` save rows to list -> append rows in `geo` (column:'id', 'name', 'latitude', 'longitude', 'native')
    - Check if match the native writing in rows `in` `geo[i][(native)4]` -> pair of name will be unify in the next step
    - Split the country names in to key words, excluding the connecting words ('and','of'), check for matching words `in` `geo[i][1(name)]` -> obvious matching pairs will unify in next step


In [7]:
# Input folder & Output folder
folderName = 'Modified_Datasets/1_cleanedDailyReports'
folderPath = os.path.join(currentPath, folderName)

# A set of matching country names
change = ['Cabo Verde','Timor-Leste',' Azerbaijan','Mainland China','Iran (Islamic Republic of)','Viet Nam',
          'Republic of Ireland','Republic of Moldova','Korea, South','Saint Martin','US','UK','Taiwan*',
          'Taipei and environs','Bahamas, The','Bahamas','Gambia, The','Hong Kong SAR','Macao SAR','Macao',
          "Cote d'Ivoire",'Holy See','Gambia','Republic of the Congo','West Bank and Gaza','Russian Federation','Czechia']
to = ['Cape Verde','East Timor','Azerbaijan','China','Iran','Vietnam',
      'Ireland','Moldova','South Korea','St. Martin','United States','United Kingdom','Taiwan',
      'Taiwan','The Bahamas','The Bahamas','The Gambia','Hong Kong','Macau','Macau',
      'Ivory Coast','Vatican City','The Gambia','Congo (Brazzaville)','Palestine','Russia','Czech Republic']

countryName = []        # Store all the file names of sourced daily reports
for fileName in dailyReports:

    # Read daily report one by one
    df = pd.read_csv(os.path.join(folderPath, fileName))
    
    # Replace the different country names with only one form
    df['Country/Region'] = df['Country/Region'].replace(change, to)
    
    # Write the file back
    df.to_csv(os.path.join(folderPath, fileName), index = False)
    
    # Check that again
    for country in df['Country/Region']:
        if country not in countryName:
            countryName.append(country) 
            
countryName.sort()
len(countryName)

216

In [8]:
# Create 'countries' data frame storing long and lat values

# Input file
fileName = 'countries.csv'
filePath = os.path.join(currentPath, 'Datasets', fileName)
df = pd.read_csv(filePath)

from re import search

geo = []                # A list storing all the row lists, will transform into data frame structure
unmatch = []            # Store all the country names (from daily reports) that not in (matching) countries.csv 

# Save matching country to geo list first
for cName in countryName:
    if cName in df.name.values:
        row = df.loc[df.name.values == cName, ['id', 'name', 'latitude', 'longitude', 'native']].values.tolist()
        geo.append(row[0])
    else:
        unmatch.append(cName)

## Find out the naming difference within the countryName list, if found, change in files

# Check for native country name writing, add to 'change' and 'to' in the last step
for cName in unmatch:
    for i in range(len(geo)):
        if cName in geo[i][4]:
            print('native writing: ',[cName,geo[i][1]])
            unmatch.remove(cName)

# Split names in to single words, rough finding, add obvious matching to 'change' and 'to' in the last step
for cName in unmatch:
    split = cName.split()
    for words in split:
        for i in range(len(geo)):
            if words in geo[i][1] and words not in ['and', 'of']:
                print('search words: ',[cName,geo[i][1]])



search words:  ['Channel Islands', 'Cayman Islands']
search words:  ['Channel Islands', 'Faroe Islands']
search words:  ['Channel Islands', 'Marshall Islands']
search words:  ['Channel Islands', 'Solomon Islands']
search words:  ['North Ireland', 'Ireland']
search words:  ['Papua New Guinea', 'New Zealand']
search words:  ['Papua New Guinea', 'Equatorial Guinea']
search words:  ['Papua New Guinea', 'Guinea']
search words:  ['Papua New Guinea', 'Guinea-Bissau']
search words:  ['Republic of Korea', 'Central African Republic']
search words:  ['Republic of Korea', 'Czech Republic']
search words:  ['Republic of Korea', 'Dominican Republic']
search words:  ['Saint Barthelemy', 'Saint Lucia']
search words:  ['Saint Kitts and Nevis', 'Saint Lucia']
search words:  ['Saint Vincent and the Grenadines', 'Saint Lucia']
search words:  ['South Korea', 'South Africa']
search words:  ['South Korea', 'South Sudan']
search words:  ['St. Martin', 'Martinique']


With matching names found, unified them within its own files.

- With the different calling between two datasets (Daily reports and countries.csv), change the country name in the data frame read from countries.csv to match the naming in daily reports.
    - Search ignoring cases, add row in `geo` with the country name from daily reports at second position of row
    - Search with regular expression using `.contains()`, ignoring cases 
        -Some country names need to go back to the last step and change within files
    - Split names in to words, excluding ['the','of','islands','and','republic','west'], search with `.contains()`, but only print if the output has only 1 result
    - With some other naming of the country names, googled and unified mannually

Finally, transform list into data frame. Add 'North Ireland' as not included in countries.csv. Ignore the cruise ship related data.

In [9]:
## Rough search in df, if found, change in df

# Ignore cases

for cName in unmatch:
    if cName.lower() in df.name.str.lower().values:
        row = df.loc[df.name.str.lower().values == cName.lower(), ['id', 'name', 'latitude', 'longitude', 'native']].values.tolist()
        print('Ignore cases: ',[cName, row])
        row[0][1] = cName
        geo.append(row[0])
        unmatch.remove(cName)

# Use .contains() to search with re, ignore case, match the naming in map to the WHO report

for cName in unmatch:
    row = df.loc[df.name.str.contains(cName, flags=re.I, regex=True),['id', 'name', 'latitude', 'longitude', 'native']].values.tolist()
    print('contains: ',[cName,row])
    # only write in the row with matching output
    if len(row) != 0:
        row[0][1] = cName
        geo.append(row[0])
        unmatch.remove(cName)

# Found that 'UK' & 'US' are missleading the .contains function, change to long form in last step
# Also found some countries with two/three different names, unified them in last step
    
    
# Split names in to single words, rough finding

for cName in unmatch:
    split = cName.lower().split()
    for words in split:
        if words not in ['the','of','islands','and','republic','west']:
            row = df.loc[df.name.str.contains(words, flags=re.I, regex=True),['id', 'name', 'latitude', 'longitude', 'native']].values.tolist()
            if len(row) == 1:
                print('word=',words,'split, contains: ',[cName,row])
                
                # only write in the row with matching output
                row[0][1] = cName
                print('------',[cName,row[0]])
                geo.append(row[0])
                if cName in unmatch:
                    unmatch.remove(cName)

# Match name manully

tuples = [('Czech','Czechia'),('Myanmar','Burma'),('Swaziland','Eswatini'),('Korea South','South Korea'),
          ('Korea North','Republic of Korea'),('Gambia','The Gambia'),
          ('Palestin','Palestine'),('Martin','St. Martin'),('Cura','Curacao'),('Ivory','Ivory Coast')]

for t in tuples:
    row = df.loc[df.name.str.contains(t[0], flags=re.I, regex=True),['id', 'name', 'latitude', 'longitude', 'native']].values.tolist()
    # only write in the row with matching output
    if len(row) == 1:
        row[0][1] = t[1]
        geo.append(row[0])
        if t[1] in unmatch:
            unmatch.remove(t[1])   

df.name = df.name.replace(['Congo The Democratic Republic Of The','Congo'],['Congo (Kinshasa)','Congo (Brazzaville)'])
row = df.loc[df.name.values == 'Congo (Kinshasa)', ['id', 'name', 'latitude', 'longitude', 'native']].values.tolist()
geo.append(row[0])
unmatch.remove('Congo (Kinshasa)')
row = df.loc[df.name.values == 'Congo (Brazzaville)', ['id', 'name', 'latitude', 'longitude', 'native']].values.tolist()
geo.append(row[0])
unmatch.remove('Congo (Brazzaville)')

## Find repeating data

for i in range(len(geo)):
    for j in range(len(geo)):
        if i != j and geo[i][0] == geo[j][0]:
            print(geo[i],geo[j])
geo.sort()  

geoCoor = pd.DataFrame(geo, columns=['id','Country/Region','Latitude', 'Longitude','native'])
geoCoor = geoCoor.drop_duplicates()
geoCoor = geoCoor.loc[:, geoCoor.columns.isin(['Country/Region','Latitude', 'Longitude'])]


nIreland = {'Country/Region':'North Ireland','Latitude':54.7877, 'Longitude':-6.4923}
geoCoor = geoCoor.append(nIreland, ignore_index=True)
geoCoor

# # Only left cruise ship related:  ['Cruise Ship', 'Diamond Princess', 'Channel Islands','Others','MS Zaandam']


Ignore cases:  ['Antigua and Barbuda', [[10, 'Antigua And Barbuda', 17.05, -61.8, 'Antigua and Barbuda']]]
Ignore cases:  ['Papua New Guinea', [[171, 'Papua new Guinea', -6.0, 147.0, 'Papua Niugini']]]
Ignore cases:  ['Saint Kitts and Nevis', [[185, 'Saint Kitts And Nevis', 17.33333333, -62.75, 'Saint Kitts and Nevis']]]
Ignore cases:  ['Trinidad and Tobago', [[223, 'Trinidad And Tobago', 11.0, -61.0, 'Trinidad and Tobago']]]
contains:  ['Burma', []]
contains:  ['Channel Islands', []]
contains:  ['Congo (Brazzaville)', []]
contains:  ['Congo (Kinshasa)', []]
contains:  ['Croatia', [[55, 'Croatia (Hrvatska)', 45.16666666, 15.5, 'Hrvatska']]]
contains:  ['Curacao', []]
contains:  ['Diamond Princess', []]
contains:  ['Eswatini', []]
contains:  ['Fiji', [[73, 'Fiji Islands', -18.0, 175.0, 'Fiji']]]
contains:  ['Hong Kong', [[98, 'Hong Kong S.A.R.', 22.25, 114.16666666, '香港']]]
contains:  ['MS Zaandam', []]
contains:  ['Macau', [[128, 'Macau S.A.R.', 22.16666666, 113.55, '澳門']]]
contains:  

  return func(self, *args, **kwargs)


Unnamed: 0,Country/Region,Latitude,Longitude
0,Afghanistan,33.000000,65.000000
1,Albania,41.000000,20.000000
2,Algeria,28.000000,3.000000
3,Andorra,42.500000,1.500000
4,Angola,-12.500000,18.500000
...,...,...,...
206,Zambia,-15.000000,30.000000
207,Zimbabwe,-20.000000,30.000000
208,Kosovo,42.561291,20.340304
209,Curacao,12.116667,-68.933333


# Output 2: `Modified_Datasets/2_combineLatLong`

Combine the files in `Output 1` with longitude and latitude columns.

In [10]:
# from datetime import date, datetime, timedelta

# # List of datetime.date type of data in YYYY-MM-DD form
# yyyymmdd = []

# # fileName in MM-DD-YYYY
# for fileName in dailyReports:  

#     # Recgnize date, now in form YYYY-MM-DD, type:datetime.date
#     # datetime.strptime(strdate, 'format') -> yyyy mm dd hh min ss
#     # .date() to only keep 'yyyy mm dd'
#     ymd = datetime.strptime(os.path.splitext(fileName)[0],'%m-%d-%Y').date()
#     yyyymmdd.append(ymd)


# # A list of filename for every 5 days since 2020-01-22
# every5day = []

# # From the earlies file, get the date for every 5 days
# start_date = min(yyyymmdd)
# while start_date <= max(yyyymmdd):
    
#     # Save back to the filename form (datetime->str + .csv)
#     # datatime.strftime('format'): datetime->str
#     mdy = start_date.strftime('%m-%d-%Y')
#     fileName = mdy + '.csv'
#     every5day.append(fileName)

#     # Add 5 days to the date for next loop
#     start_date += timedelta(5)

In [11]:
# Make an output directory
output2Path = os.path.join(currentPath,'Modified_Datasets/2_combineLatLong')
if not os.path.exists(output2Path):
    os.makedirs(output2Path)

# Input folder
folderName = 'Modified_Datasets/1_cleanedDailyReports'
folderPath = os.path.join(currentPath, folderName)

for fileName in dailyReports: 

    # Generate the file path
    filePath = os.path.join(folderPath, fileName)

    # Read daily report one by one
    df = pd.read_csv(filePath)
    
    # left merge: keep all other columns in df, only add lat long in
    df = pd.merge(df, geoCoor, how='left', on=['Country/Region'])
    
    # Generate the file path
    filePath = os.path.join(output2Path, fileName)
    
    # Write the file back
    df.to_csv(filePath, index = False)

# Lock down dates `countryLockdowndatesJHUMatch.csv`

Read into df and only keep the 'Country/Region','Date'. For the countries that do not have a lock down date, only keep the rows that 'Date' column is `.notna()`. 

Check if there are unmatching country names. `.replace()` column values with matching names.

Save df into `lockdown` dataframe, add two more column showing the date 14 an 28 days after lockdown date by changing 'LockdownDate' column to a datetime value using `pd.to_datetime('column',format='%Y-%m-%d')+timedelta(14/28)`

In [12]:
fileName = 'countryLockdowndatesJHUMatch.csv'
filePath = os.path.join(currentPath, 'Datasets', fileName)
df = pd.read_csv(filePath)
df = df.loc[:,['Country/Region','Date']]
df = df.drop_duplicates()
df = df[df['Date'].notna()]
df
countryName
# # unmatching country names
# for name in df['Country/Region'].values:
#     if name not in countryName:
#         print(name)

df['Country/Region'] = df['Country/Region'].replace(['Mainland China','US','UK','Republic of Ireland'],
                                                    ['China','United States','United Kingdom','Ireland'])
# unmatching country names
for name in df['Country/Region'].values:
    if name not in countryName:
        print(name)
        
lockdown = pd.DataFrame()
lockdown[['Country/Region','LockdownDate']] = df[['Country/Region','Date']]
lockdown['LockdownDate'] = pd.to_datetime(lockdown['LockdownDate'], format = '%Y-%m-%d')
lockdown['14 Days'] = pd.to_datetime(lockdown['LockdownDate'], format = '%Y-%m-%d') + timedelta(14)
lockdown['28 Days'] = pd.to_datetime(lockdown['LockdownDate'], format = '%Y-%m-%d') + timedelta(28)
lockdown

Unnamed: 0,Country/Region,LockdownDate,14 Days,28 Days
0,China,2020-01-23,2020-02-06,2020-02-20
1,Hong Kong,2020-03-23,2020-04-06,2020-04-20
2,Macau,2020-01-26,2020-02-09,2020-02-23
3,Taiwan,2020-02-02,2020-02-16,2020-03-01
4,United States,2020-03-23,2020-04-06,2020-04-20
...,...,...,...,...
168,Montenegro,2020-03-24,2020-04-07,2020-04-21
170,Kyrgyzstan,2020-03-24,2020-04-07,2020-04-21
171,Mauritius,2020-03-17,2020-03-31,2020-04-14
187,Uganda,2020-03-18,2020-04-01,2020-04-15


# Combine all files from `'Modified_Datasets/2_combineLatLong'`
Use `pd.concat()` to combine all the df read from input folder. In order to sort the column by 'Country/Region','Date' columns, change the 'Date' column to a `datetime` value in order to sort in ascending order. The `concat_df` will sort by country first and then date of the recorded cases

In [13]:
# Input folder    
folderName = 'Modified_Datasets/2_combineLatLong'
all_df = (pd.read_csv(os.path.join(currentPath, folderName, fileName)) for fileName in dailyReports)
concat_df = pd.concat(all_df, ignore_index=True)

concat_df['Date'] = pd.to_datetime(concat_df['Date'], format='%Y-%m-%d')
concat_df = concat_df.sort_values(['Country/Region','Date'], ascending = [1,1])
concat_df = concat_df.drop_duplicates()

# Final output: `Modified_Datasets/3_combinedAllDates`
Merge the combined dataset and lockdown together on 'country' column, keeping all the values in combined dataset (left merge). Added three boolean columns show if that date that country had locked down or not by comparing the value between date of the cases been recorded and lockdown dates. Save to 'COVID19_Dataset.csv' file.

In [14]:
# Make an output directory
output3Path = os.path.join(currentPath,'Modified_Datasets/3_combinedAllDates')
if not os.path.exists(output3Path):
    os.makedirs(output3Path)

df = pd.merge(concat_df, lockdown, how='left', on=['Country/Region'])
# df['LockdownDate'] = pd.to_datetime(df['LockdownDate'], format = '%Y-%m-%d')
df.loc[df['Date'] >= df['LockdownDate'],['LockdownStatus']] = True
df.loc[df['Date'] < df['LockdownDate'],['LockdownStatus']] = False
df.loc[df['Date'] >= df['14 Days'],['14d_checkpoint']] = True
df.loc[df['Date'] < df['14 Days'],['14d_checkpoint']] = False
df.loc[df['Date'] >= df['28 Days'],['28d_checkpoint']] = True
df.loc[df['Date'] < df['28 Days'],['28d_checkpoint']] = False
df = df.drop_duplicates()
df.to_csv(os.path.join(output3Path, 'COVID19_Dataset.csv'), index = False)


Lastly, group the df by 'country' names, cumulate the cases from the first date and add in cumulate columns. And write to 'COVID19_DatasetCum.csv' file.

In [15]:
df[['Confirmed_Cum', 'Deaths_Cum', 'Recovered_Cum', 'Active_Cum']]= df.groupby(['Country/Region'])['Confirmed', 'Deaths', 'Recovered', 'Active'].cumsum()
df.to_csv(os.path.join(output3Path, 'COVID19_DatasetCum.csv'), index = False)

  df[['Confirmed_Cum', 'Deaths_Cum', 'Recovered_Cum', 'Active_Cum']]= df.groupby(['Country/Region'])['Confirmed', 'Deaths', 'Recovered', 'Active'].cumsum()
