In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

# Web Scrapping Covid cases

### Request 

In [2]:
url = 'https://www.worldometers.info/coronavirus/#main_table'

requests.get(url).status_code

200

## Scraping

In [3]:
response = requests.get(url)

soup = BeautifulSoup(response.text, 'lxml')

In [4]:
# Finding the table
table = soup.find('table', attrs={'id': 'main_table_countries_today'})

In [5]:
# Saving columns name and turning into dataframe
column_name = [col.get_text().rstrip() for col in table.find_all('th')]
df = pd.DataFrame(columns=column_name)

In [6]:
df

Unnamed: 0,#,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",...,TotalTests,Tests/\n1M pop,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop


In [7]:
# Saving the rows
rows = table.find_all('tr', attrs={'style': ''})[1:]

# for each row in the rows list
for row in rows:
    
    # Find all the cells in the row
    datas = row.find_all('td')
    
    # Saving all the cells in a list
    data = [t.text for t in datas]
    
    # Locating the last index in the dataframe and assign the row 
    df_length = len(df)
    df.loc[df_length] = data

In [8]:
df

Unnamed: 0,#,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",...,TotalTests,Tests/\n1M pop,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
0,,World,694669281,0,6911715,0,666516323,+48554,21241243,37613,...,,,,All,\n,,,,,
1,1,USA,108103082,,1173670,,105975479,,953933,1184,...,1181726232,3529593,334805269,North America,3,285,0,,,2849
2,2,India,44997167,,531930,,44463717,,1520,,...,930797975,661721,1406631776,Asia,31,2644,2,,,1
3,4,Germany,38428685,,174352,,38240600,,13733,,...,122332384,1458359,83883596,Europe,2,481,1,,,164
4,5,Brazil,37771706,,705313,,36249161,,817232,,...,63776166,296146,215353593,South America,6,305,3,,,3795
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
208,224,Montserrat,1403,,8,,1376,,19,,...,17762,3577442,4965,North America,4,621,0,,,3827
209,225,Niue,884,,,,874,,10,,...,,,1622,Australia/Oceania,2,,,,,6165
210,227,Tokelau,80,,,,,,80,,...,,,1378,Australia/Oceania,17,,,,,58055
211,231,China,503302,,5272,,379053,,118977,,...,160000000,110461,1448471400,Asia,2878,274748,9,,,82


## Saving data

In [9]:
#df.to_csv('covid.csv')

# Web Scraping fist case in each country

### Request

In [10]:
url = 'https://en.m.wikipedia.org/wiki/COVID-19_pandemic_by_country_and_territory'

response = requests.get(url)

response.status_code

200

## Scraping

In [11]:
soup = BeautifulSoup(response.text, 'lxml')

In [12]:
# Finding and saving the table
table = soup.find('table', attrs={'class': 'wikitable mw-datatable'}).find('tbody')

# Saving the columns name 
col_names = [col.get_text().rstrip() for col in table.find_all('th')]
col_names

['Date', 'Countries / Territories']

In [13]:
# Turning into dataframe
df_first_case = pd.DataFrame(columns=col_names)
df_first_case

Unnamed: 0,Date,Countries / Territories


In [14]:
# Finding and saving all the rows in a list
rows = table.find_all('tr')[1:]

In [15]:
# For each row in the list
for row in rows:
    # Find all the cells in the row
    datas = row.find_all('td')
    
    # Save in a list
    data = [d.get_text().rstrip() for d in datas]
    
    # Locate the last index in the dataframe and assign the row
    df_length = len(df_first_case)
    df_first_case.loc[df_length] = data
df_first_case

Unnamed: 0,Date,Countries / Territories
0,1 December 2019[a],China
1,13 January 2020,Thailand
2,16 January 2020,Japan
3,20 January 2020,South Korea • United States
4,21 January 2020,Taiwan[b]
...,...,...
84,2 April 2022,Nauru
85,8 May 2022[x],North Korea
86,20 May 2022,Tuvalu
87,16 July 2022,Pitcairn Islands[e]


## Cleaning

In [16]:
# Change columns name
df_first_case.rename(columns={'Date': 'first_case', 'Countries / Territories': 'Country,Other'}, inplace=True)
df_first_case.head()

Unnamed: 0,first_case,"Country,Other"
0,1 December 2019[a],China
1,13 January 2020,Thailand
2,16 January 2020,Japan
3,20 January 2020,South Korea • United States
4,21 January 2020,Taiwan[b]


In [17]:
# Removing the symbols and letters from dates
df_first_case['first_case'] = df_first_case['first_case'].map(lambda x: x.rsplit('[')[0])
df_first_case.first_case.unique()

array(['1 December 2019', '13 January 2020', '16 January 2020',
       '20 January 2020', '21 January 2020', '22 January 2020',
       '23 January 2020', '24 January 2020', '25 January 2020',
       '27 January 2020', '29 January 2020', '30 January 2020',
       '31 January 2020', '3 February 2020', '14 February 2020',
       '19 February 2020', '21 February 2020', '24 February 2020',
       '25 February 2020', '26 February 2020', '27 February 2020',
       '28 February 2020', '29 February 2020', '1 March 2020',
       '2 March 2020', '3 March 2020', '4 March 2020', '5 March 2020',
       '6 March 2020', '7 March 2020', '8 March 2020', '9 March 2020',
       '10 March 2020', '11 March 2020', '12 March 2020', '13 March 2020',
       '14 March 2020', '15 March 2020', '16 March 2020', '17 March 2020',
       '18 March 2020', '19 March 2020', '20 March 2020', '21 March 2020',
       '22 March 2020', '23 March 2020', '24 March 2020', '25 March 2020',
       '26 March 2020', '28 March 2020',

It is a bit messy but I can see there are a lot countries in the same location in the dataframe. I will split them into columns and add as a row in the country column.

In [18]:
# Split the data into a list
df_first_case['Country,Other'] = df_first_case['Country,Other'].map(lambda x: x.split(' • '))
df_first_case.head()

Unnamed: 0,first_case,"Country,Other"
0,1 December 2019,[ China]
1,13 January 2020,[ Thailand]
2,16 January 2020,[ Japan]
3,20 January 2020,"[ South Korea, United States]"
4,21 January 2020,[ Taiwan[b]]


In [19]:
# Transform this list into columns
split = pd.DataFrame(df_first_case['Country,Other'].to_list())
split.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,China,,,,,,,,,,,,,,,,,,,
1,Thailand,,,,,,,,,,,,,,,,,,,
2,Japan,,,,,,,,,,,,,,,,,,,
3,South Korea,United States,,,,,,,,,,,,,,,,,,
4,Taiwan[b],,,,,,,,,,,,,,,,,,,


In [20]:
# Add the first case date as reference to the dataset
x = pd.concat([df_first_case, split], axis=1)
x.head()

Unnamed: 0,first_case,"Country,Other",0,1,2,3,4,5,6,7,...,10,11,12,13,14,15,16,17,18,19
0,1 December 2019,[ China],China,,,,,,,,...,,,,,,,,,,
1,13 January 2020,[ Thailand],Thailand,,,,,,,,...,,,,,,,,,,
2,16 January 2020,[ Japan],Japan,,,,,,,,...,,,,,,,,,,
3,20 January 2020,"[ South Korea, United States]",South Korea,United States,,,,,,,...,,,,,,,,,,
4,21 January 2020,[ Taiwan[b]],Taiwan[b],,,,,,,,...,,,,,,,,,,


In [21]:
# for column in the columns
for i in x.columns[3:]:
    # Save the date of first and the country and drop all the na in the column
    y = x[['first_case', i]].dropna()
    
    # Add the date and country in a row
    x = pd.concat([x, y.rename(columns={i: 0})])
    

In [22]:
# Save the data only with the first case and country
x = x[['first_case', 0]]
x.head(10)

Unnamed: 0,first_case,0
0,1 December 2019,China
1,13 January 2020,Thailand
2,16 January 2020,Japan
3,20 January 2020,South Korea
4,21 January 2020,Taiwan[b]
5,22 January 2020,Hong Kong[c]
6,23 January 2020,Nepal
7,24 January 2020,France
8,25 January 2020,Australia
9,27 January 2020,Cambodia


In [23]:
x.shape

(256, 2)

In [24]:
# Rename the country row from 0 to the same as the main dataset
x.rename(columns={0: 'Country,Other'}, inplace=True)

In [25]:
# Clean all the symbols and letters from the countries name
x['Country,Other'] = x['Country,Other'].map(lambda x: x.strip().split('[')[0])

In [30]:
x.sample(10)

Unnamed: 0,first_case,"Country,Other"
22,29 February 2020,Luxembourg
59,11 April 2020,Saba
35,13 March 2020,Curaçao
25,3 March 2020,Argentina
14,14 February 2020,Egypt
43,21 March 2020,Transnistria
35,13 March 2020,Kazakhstan
60,16 April 2020,Bonaire
19,26 February 2020,Romania
41,19 March 2020,Chad


In [32]:
# x.to_csv('first_case.csv')

# Web Scraping the lockdowns

### Request

In [34]:
url = 'https://en.m.wikipedia.org/wiki/COVID-19_lockdowns'

response = requests.get(url)

soup = BeautifulSoup(response.content, 'html.parser')

## Scraping

In [40]:
# Finding the table and saving the table
table = soup.find('table', attrs={'class':'wikitable sortable mw-collapsible'})

There're no patterns in this table, each row is different from each other, it's a complex table with multiple index. Using pandas my be the better solution

In [41]:
# Reading HTML page
dfs = pd.read_html(url)

In [37]:
# Saving the table 
df_lockdown = dfs[0]

In [42]:
df_lockdown.head()

Unnamed: 0_level_0,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns,vteCOVID-19 pandemic lockdowns
Unnamed: 0_level_1,Country / territory,Place,First lockdown,First lockdown,First lockdown,Second lockdown,Second lockdown,Second lockdown,Third lockdown,Third lockdown,...,Sixth lockdown,Seventh lockdown,Seventh lockdown,Seventh lockdown,Eighth lockdown,Eighth lockdown,Eighth lockdown,Total length (days),Level,Unnamed: 28_level_1
Unnamed: 0_level_2,Country / territory,Place,Start date,End date,Length (days),Start date,End date,Length (days),Start date,End date,...,Length (days),Start date,End date,Length (days),Start date,End date,Length (days),Total length (days),Level,Unnamed: 28_level_2
0,Albania,Albania,2020-03-13[84],2020-06-01[85],80,,,,,,...,,,,,,,,80,National,
1,Algeria,Algiers,2020-03-23[86],2020-05-14[87],52,,,,,,...,,,,,,,,52,City,
2,Algeria,Blida,2020-03-23[86],2020-05-14[87],52,,,,,,...,,,,,,,,52,City,
3,Argentina,Greater Buenos Aires,2020-03-19[88],2020-11-08[89],234,2021-05-22[90],2021-05-30,9.0,2021-06-05[91],2021-06-06,...,,,,,,,,245,Metropolitan area,
4,Argentina,Rest of the country,2020-03-19[88],2020-05-10[92],52,2021-05-22[90],2021-05-30,9.0,2021-06-05[91],2021-06-06,...,,,,,,,,63,National,


In [43]:
level = df_lockdown['vteCOVID-19 pandemic lockdowns']['Level'][['Level']]
level.head()

Unnamed: 0,Level
0,National
1,City
2,City
3,Metropolitan area
4,National


There are countries with no first lockdown but it has a second one?! A bit confusing, let's see if it was just a late lockdown considered

In [39]:
df_lockdown['vteCOVID-19 pandemic lockdowns'][['Country / territory','Second lockdown']]['Second lockdown']['Start date'].unique()

array([nan, '2021-05-22[90]', '2020-07-09[b][97][98]', '2020-08-06[98]',
       '2020-07-09[b][117]', '2020-11-19[123]', '2021-10-16[126]',
       '2020-12-19[127]', '2021-01-08[130]', '2021-01-31[136]',
       '2021-06-26[139]', '2021-06-27[142]', '2021-06-29[133]',
       '2021-06-30 [144]', '2021-07-20[145]', '2021-08-05[147][148]',
       '2021-08-05[147]', '2021-08-07[151]', '2021-08-08[153]',
       '2021-08-09[155]', '2021-08-09[159]', '2021-08-11[160]',
       '2021-08-12[161]', '2021-08-14[163]', '2020-11-03[180]',
       '2021-04-05[188]', '2020-11-02[195]',
       '2020-11-28 (de facto)[x][216]', '2020-11-07[224]',
       '2020-12-26[228]', '2020-12-25[230]', '2020-12-22[237]',
       '2021-01-10[240]', '2020-10-22[243]', '2020-12-25[246]',
       '2021-04-19', '2020-10-30[258]', '2020-11-02[269]',
       '2020-10-20[272]', '2020-11-07[278]', '2020-11-03[280]',
       '2021-01-23[283]', '2021-04-27[290]', '2021-04-19[291]',
       '2021-05-03[293]', '2021-04-14[294]', '2021-

In [47]:
df_lockdown = df_lockdown['vteCOVID-19 pandemic lockdowns'][['Country / territory','First lockdown']]

In [48]:
df_lockdown

Unnamed: 0_level_0,Country / territory,First lockdown,First lockdown,First lockdown
Unnamed: 0_level_1,Country / territory,Start date,End date,Length (days)
0,Albania,2020-03-13[84],2020-06-01[85],80
1,Algeria,2020-03-23[86],2020-05-14[87],52
2,Algeria,2020-03-23[86],2020-05-14[87],52
3,Argentina,2020-03-19[88],2020-11-08[89],234
4,Argentina,2020-03-19[88],2020-05-10[92],52
...,...,...,...,...
237,Vietnam,,,21
238,Vietnam,,,21
239,Vietnam,,,21
240,Zimbabwe,2020-03-30[516],2020-05-02[517],33


In [49]:
# Svaling the country without the multiple indexing
territory = df_lockdown[['Country / territory']]['Country / territory']
territory.head()

Unnamed: 0,Country / territory
0,Albania
1,Algeria
2,Algeria
3,Argentina
4,Argentina


In [50]:
# Saving the dates without the multiple indexing
dates = df_lockdown['First lockdown']

# Join the tables outer 
df_lockdown = territory.join(dates, how='outer')

In [51]:
# Dropping texting row
df_lockdown.drop(index=241, inplace=True)

In [52]:
df_lockdown

Unnamed: 0,Country / territory,Start date,End date,Length (days)
0,Albania,2020-03-13[84],2020-06-01[85],80
1,Algeria,2020-03-23[86],2020-05-14[87],52
2,Algeria,2020-03-23[86],2020-05-14[87],52
3,Argentina,2020-03-19[88],2020-11-08[89],234
4,Argentina,2020-03-19[88],2020-05-10[92],52
...,...,...,...,...
236,Vietnam,,,21
237,Vietnam,,,21
238,Vietnam,,,21
239,Vietnam,,,21


In [53]:
# Checking for duplicates
df_lockdown.duplicated().sum()

93

In [54]:
# Drop duplicates by country
df_lockdown.drop_duplicates(subset=['Country / territory'], inplace=True)

In [55]:
df_lockdown.head()

Unnamed: 0,Country / territory,Start date,End date,Length (days)
0,Albania,2020-03-13[84],2020-06-01[85],80
1,Algeria,2020-03-23[86],2020-05-14[87],52
3,Argentina,2020-03-19[88],2020-11-08[89],234
5,Armenia,2020-03-24[93],2020-05-04[94],41
6,Australia,2020-03-31[95],2020-05-12[96][a],43


In [56]:
# Cleaning the symbols and number from the date
df_lockdown['Start date'] = df_lockdown['Start date'].map(lambda x: x.strip().split('[')[0])
df_lockdown['End date'] = df_lockdown['End date'].map(lambda x: x.strip().split('[')[0])

In [57]:
df_lockdown.head()

Unnamed: 0,Country / territory,Start date,End date,Length (days)
0,Albania,2020-03-13,2020-06-01,80
1,Algeria,2020-03-23,2020-05-14,52
3,Argentina,2020-03-19,2020-11-08,234
5,Armenia,2020-03-24,2020-05-04,41
6,Australia,2020-03-31,2020-05-12,43


In [58]:
# Changing column name
df_lockdown.rename(columns={'Country / territory': 'Country,Other'}, inplace=True)
df_lockdown.head()

Unnamed: 0,"Country,Other",Start date,End date,Length (days)
0,Albania,2020-03-13,2020-06-01,80
1,Algeria,2020-03-23,2020-05-14,52
3,Argentina,2020-03-19,2020-11-08,234
5,Armenia,2020-03-24,2020-05-04,41
6,Australia,2020-03-31,2020-05-12,43


# Setting the final dataset

In [73]:
# Concat the lockdown with level
df_lockdown = pd.concat([df_lockdown, level], axis=1)

In [74]:
# Mergin tht lockdown dataset
df = df_covid.merge(df_lockdown, on='Country,Other', how='left')

In [82]:
# Setting manually the country with lockdown due shorthand name or misspelling
df.loc[1,'Start date'] = '2020-03-19'
df.loc[1, 'End date'] = '2021-06-15'
df.loc[1, 'Length (days)'] = '453'
df.loc[1, 'Level'] = 'State'

df.loc[9,'Start date'] = '2020-03-23'
df.loc[9, 'End date'] = '2020-07-04'
df.loc[9, 'Length (days)'] = '103'
df.loc[9, 'Level'] = 'National'

df.loc[63,'Start date'] = '2020-03-26'
df.loc[63, 'End date'] = '2020-04-17'
df.loc[63, 'Length (days)'] = '22'
df.loc[63, 'Level'] = 'National'

In [83]:
df.head()

Unnamed: 0,#,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",...,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop,first_case,Start date,End date,Length (days),Level
0,,World,688091303,20575.0,6873000,12.0,660502581,23697.0,20715722,39079.0,...,,,,,,,,,,
1,1.0,USA,106787016,,1162662,,104752848,,871506,1151.0,...,288.0,0.0,,,2603.0,20 January 2020,2020-03-19,2021-06-15,453.0,State
2,2.0,India,44976599,,531736,,44425250,,19613,,...,2645.0,2.0,,,14.0,30 January 2020,2020-03-25,2020-06-07,74.0,National
3,3.0,France,40029701,,166862,,39764181,,98658,869.0,...,393.0,0.0,,,1504.0,24 January 2020,2020-03-17,2020-05-11,55.0,National
4,4.0,Germany,38415659,,173628,,38215200,2000.0,26831,,...,483.0,1.0,,,320.0,27 January 2020,2020-03-16,2020-05-30,76.0,National


In [84]:
df.columns

Index(['#', 'Country,Other', 'TotalCases', 'NewCases', 'TotalDeaths',
       'NewDeaths', 'TotalRecovered', 'NewRecovered', 'ActiveCases',
       'Serious,Critical', 'Tot Cases/1M pop', 'Deaths/1M pop', 'TotalTests',
       'Tests/\n1M pop', 'Population', 'Continent', '1 Caseevery X ppl',
       '1 Deathevery X ppl', '1 Testevery X ppl', 'New Cases/1M pop',
       'New Deaths/1M pop', 'Active Cases/1M pop', 'first_case', 'Start date',
       'End date', 'Length (days)', 'Level'],
      dtype='object')

In [85]:
# Drop first column
df.drop(columns=['#'], inplace=True)

# rename columns correctlly
df.rename(columns={'Tests/\n1M pop': 'Tests/1M pop'}, inplace=True)
df.head()

Unnamed: 0,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,...,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop,first_case,Start date,End date,Length (days),Level
0,World,688091303,20575.0,6873000,12.0,660502581,23697.0,20715722,39079.0,88276,...,,,,,,,,,,
1,USA,106787016,,1162662,,104752848,,871506,1151.0,318953,...,288.0,0.0,,,2603.0,20 January 2020,2020-03-19,2021-06-15,453.0,State
2,India,44976599,,531736,,44425250,,19613,,31975,...,2645.0,2.0,,,14.0,30 January 2020,2020-03-25,2020-06-07,74.0,National
3,France,40029701,,166862,,39764181,,98658,869.0,610353,...,393.0,0.0,,,1504.0,24 January 2020,2020-03-17,2020-05-11,55.0,National
4,Germany,38415659,,173628,,38215200,2000.0,26831,,457964,...,483.0,1.0,,,320.0,27 January 2020,2020-03-16,2020-05-30,76.0,National


In [87]:
df.iloc[:5, :11]

Unnamed: 0,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,Deaths/1M pop
0,World,688091303,20575.0,6873000,12.0,660502581,23697.0,20715722,39079.0,88276,881.7
1,USA,106787016,,1162662,,104752848,,871506,1151.0,318953,3473.0
2,India,44976599,,531736,,44425250,,19613,,31975,378.0
3,France,40029701,,166862,,39764181,,98658,869.0,610353,2544.0
4,Germany,38415659,,173628,,38215200,2000.0,26831,,457964,2070.0


In [88]:
df.iloc[:5, 11:]

Unnamed: 0,TotalTests,Tests/1M pop,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop,first_case,Start date,End date,Length (days),Level
0,,,,All,\n,,,,,,,,,,
1,1178974205.0,3521373.0,334805269.0,North America,3,288.0,0.0,,,2603.0,20 January 2020,2020-03-19,2021-06-15,453.0,State
2,928216464.0,659886.0,1406631776.0,Asia,31,2645.0,2.0,,,14.0,30 January 2020,2020-03-25,2020-06-07,74.0,National
3,271490188.0,4139547.0,65584518.0,Europe,2,393.0,0.0,,,1504.0,24 January 2020,2020-03-17,2020-05-11,55.0,National
4,122332384.0,1458359.0,83883596.0,Europe,2,483.0,1.0,,,320.0,27 January 2020,2020-03-16,2020-05-30,76.0,National


In [86]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220 entries, 0 to 219
Data columns (total 26 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Country,Other        220 non-null    object
 1   TotalCases           220 non-null    object
 2   NewCases             220 non-null    object
 3   TotalDeaths          220 non-null    object
 4   NewDeaths            220 non-null    object
 5   TotalRecovered       220 non-null    object
 6   NewRecovered         220 non-null    object
 7   ActiveCases          220 non-null    object
 8   Serious,Critical     220 non-null    object
 9   Tot Cases/1M pop     220 non-null    object
 10  Deaths/1M pop        220 non-null    object
 11  TotalTests           220 non-null    object
 12  Tests/1M pop         220 non-null    object
 13  Population           220 non-null    object
 14  Continent            220 non-null    object
 15  1 Caseevery X ppl    220 non-null    object
 16  1 Deathe

In [99]:
df['NewCases'] = df['NewCases'].map(lambda x: x.strip('+'))
df['NewDeaths'] = df['NewDeaths'].map(lambda x: x.strip('+'))

In [100]:
df['NewRecovered'] = df['NewRecovered'].map(lambda x: x.strip('+'))

In [113]:
df.to_csv('Covid.csv')