In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from matplotlib import pyplot as plt # data visualization 
import matplotlib as mpl

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Setting Up the Data

### Here, I just read the csv file and print out the first 5 rows in the data

In [None]:
country_wise_data = pd.read_csv('../input/corona-virus-report/country_wise_latest.csv')
country_wise_data = country_wise_data[["Country/Region", "Confirmed", "Deaths", "WHO Region"]]

In [None]:
country_wise_data.head()

### Here, I get the index of the countries that I want to analyze

In [None]:
country_wise_data[country_wise_data["Country/Region"] == "South Korea"].index.values

In [None]:
country_wise_data[country_wise_data["Country/Region"] == "China"].index.values

In [None]:
country_wise_data[country_wise_data["Country/Region"] == "United Kingdom"].index.values

In [None]:
country_wise_data[country_wise_data["Country/Region"] == "Brazil"].index.values

In [None]:
country_wise_data[country_wise_data["Country/Region"] == "US"].index.values

# Data Visualization

In [None]:
mpl.style.use("seaborn") # Makes all the graphs have this style 

plt.barh("South Korea", country_wise_data.loc[155, "Confirmed"]) # South Korea
plt.barh("China", country_wise_data.loc[36, "Confirmed"]) # China
plt.barh("United Kingdom", country_wise_data.loc[177, "Confirmed"])  # UK
plt.barh("Brazil", country_wise_data.loc[23, "Confirmed"]) # Brazil
plt.barh("United States", country_wise_data.loc[173, "Confirmed"]) # US

plt.xlabel("Number of Cases (in million)")
plt.ylabel("Country")
plt.title("Cases by Country")
plt.show()

The cell above shows the number of cases of the 5 countries that I picked to analyze: United States, Brazil, United Kingdom, China, and South Korea.
However, it may a bit inaccurate because the graph doesn't take account of the population for each country.
Also, the data hasn't been updated since the summer of 2020, and I made this notebook in December 2020 so obviously, the cases in the graph are much lower than it is right now.

In [None]:
# I got the population of the countries that I want to analyze.
south_korea_population = 51640000 # 51.64 million
china_population = 1393000000 # 1.393 billion
UK_population = 66650000 # 66.65 million
brazil_population = 209500000 # 209.5 million
US_population = 328200000 # 328.2 million

# I created variables for each country's number of cases.
south_korea_cases = country_wise_data.loc[155, "Confirmed"]
china_cases = country_wise_data.loc[36, "Confirmed"]
UK_cases = country_wise_data.loc[177, "Confirmed"]
brazil_cases = country_wise_data.loc[23, "Confirmed"]
US_cases = country_wise_data.loc[173, "Confirmed"]

south_korea_cases_over_population = south_korea_cases/south_korea_population
china_cases_over_population = china_cases/china_population
UK_cases_over_population = UK_cases/UK_population
brazil_cases_over_population = brazil_cases/brazil_population
US_cases_over_population = US_cases/US_population

plt.barh("South Korea", south_korea_cases_over_population) # South Korea
plt.barh("China", china_cases_over_population) # China
plt.barh("United Kingdom", UK_cases_over_population)  # UK
plt.barh("Brazil", brazil_cases_over_population) # Brazil
plt.barh("United States", US_cases_over_population) # US

plt.xlabel("# of Cases over Population")
plt.ylabel("Country")
plt.title("Cases over Population by Country")
plt.show()

This bar chart is more accurate because I divided the cases over the population, meaning that it takes account of the country's population. From the visualization, you can see that the US is doing the worst out of the 5 countries in the COVID-19 pandemic because they have the most number of cases over the amount of population.

In [None]:
# Trying to add up all the confirmed cases with specific regions
total_americas_confirmed_cases = 0
total_europe_confirmed_cases = 0
total_southeast_asia_confirmed_cases = 0
total_africa_confirmed_cases = 0
total_eastern_mediterranean_confirmed_cases = 0

for _, row in country_wise_data.iterrows(): # Loops through the rows and checks its region name. Based on that, it adds the number of confirmed cases for each region.
    if row["WHO Region"] == "Americas":
        total_americas_confirmed_cases += row["Confirmed"]
    elif row["WHO Region"] == "Europe":
        total_europe_confirmed_cases += row["Confirmed"]
    elif row["WHO Region"] == "South-East Asia":
        total_southeast_asia_confirmed_cases += row["Confirmed"]
    elif row["WHO Region"] == "Africa":
        total_africa_confirmed_cases += row["Confirmed"]
    elif row["WHO Region"] == "Eastern Mediterranean":
        total_eastern_mediterranean_confirmed_cases += row["Confirmed"]    

In [None]:
plt.barh("Americas", total_americas_confirmed_cases)
plt.barh("Europe", total_europe_confirmed_cases)
plt.barh("Africa", total_africa_confirmed_cases)
plt.barh("Southeast Asia", total_southeast_asia_confirmed_cases)
plt.barh("Eastern Mediterranean", total_eastern_mediterranean_confirmed_cases)

plt.title("Cases by Region")
plt.xlabel("Number of Cases (in millions)")
plt.ylabel("Region")
plt.show()