# Random Covid Analysis
- Identify countries that have less cases than that of India
- Identify countries that have more deaths than that of India
- Identify countries that have better recovery rate than that of India

In [None]:
!pip3 install beautifulsoup4==4.9.1
!pip3 install lxml==4.5.2
!pip3 install matplotlib==3.3.0
!pip3 install ipympl==0.5.7
!pip3 install pandas==1.1.0
!pip3 install seaborn==0.10.1
!pip3 install mpld3==0.5.1

In [None]:
import requests, time, pandas as pd
from bs4 import BeautifulSoup

URL = 'https://en.wikipedia.org/wiki/Template:COVID-19_pandemic_data'
page = requests.get(URL)

soup = BeautifulSoup(page.content, 'html.parser')

table = soup.find("table", class_="wikitable")

df_table = pd.DataFrame()
df_table = pd.read_html(str(table))[0]

df_table

In [None]:
df_table.to_excel("covid_wiki_stats.xlsx")

In [None]:
df_table.info()

In [None]:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.reset_index.html
df_table.columns

In [None]:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.MultiIndex.droplevel.html
df_table.columns = df_table.columns.droplevel(level=1)

In [None]:
df_table

In [None]:
df_table.columns.tolist()

In [None]:
df_table.columns = ['Location[a]_1', 'Location[a]_2', 'Cases[b]', 'Deaths[c]', 'Recov.[d]', 'Ref.']

In [None]:
df_table['Location[a]_2']

In [None]:
df_table.drop(["Location[a]_1"],axis=1,inplace=True)

In [None]:
df_table

In [None]:
df_table.columns = ['Location', 'Cases', 'Deaths', 'Recov', 'Ref']

In [None]:
df_table

## Identify countries that have less cases than that of India

In [None]:
df_table.info()

In [None]:
df_table[df_table['Cases'] < df_table[df_table['Location']=="India"]['Cases'].values[0]]

In [None]:
df_table[df_table['Location']=="India"]['Cases']

In [None]:
df_table = df_table[df_table.Cases.apply(lambda x: x.isnumeric())]

In [None]:
df_table

In [None]:
df_table[df_table['Cases'] < df_table[df_table['Location']=="India"]['Cases'].values[0]]

In [None]:
df_table_cases_less_india = df_table[df_table['Cases'] < df_table[df_table['Location']=="India"]['Cases'].values[0]]

# Plot the Data

In [None]:
df_table["Cases"] = pd.to_numeric(df_table["Cases"], errors='coerce')
df_table_cases_less_india["Cases"] = pd.to_numeric(df_table_cases_less_india["Cases"], errors='coerce')

In [None]:
INDIA_CASES = df_table[df_table['Location']=="India"]['Cases'].values[0]

def no_of_cases_gt_india(col):
    return int(col) - int(INDIA_CASES)

def percent_gt_india(col):
    return (int(col) - int(INDIA_CASES)) * 100 / int(INDIA_CASES)


df_table_cases_less_india['NO_LT_INDIA'] = df_table_cases_less_india['Cases'].apply(no_of_cases_gt_india)
df_table_cases_less_india['PERCENT_LT_INDIA'] = df_table_cases_less_india['Cases'].apply(percent_gt_india)

In [None]:
df_table_cases_less_india.info()

In [None]:
df_table_cases_less_india.plot.bar(title='Cases less than India',x='Location',y='Cases')

In [None]:
# widget & notebook adds a little interactivity. widget for jupyterlab
%matplotlib widget
import pandas as pd
import matplotlib.pyplot as plt

fig, ax = plt.subplots()
df_table_cases_less_india.plot.bar(title='Cases less than India',x='Location',y='Cases', ax=ax)
fig.tight_layout()