In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
print('All modules are sucessfully imported.')

All modules are sucessfully imported.


## Importing COVID 19 Dataset 

In [None]:
corona_dataset_csv = pd.read_csv("")

In [None]:
corona_dataset_csv.head(10) #specify the number of dataframes you want to see

## Checking the shape of the dataset i.e. how many rows and columns

In [None]:
corona_dataset_csv.shape

## Deleting the unwanted columns

In [None]:
#df = corona_dataset_csv.drop(["Lat","Long"], axis = 1) (the default value for axis is 0 that means it will search Lat and Long as a row and deleted them. By specifying axis = 1, we specify Lat and Long as columns and thus they will be dropped.)

#df.head(10) The above line returns a copy of the corona_dataset_csv without Lat and Long and also the above line returns a dataframe so we store it in df and then check using df.head()

In [None]:
#There's an another way.

corona_dataset_csv.drop(["Lat","Long"], axis = 1, inplace = True) #by using inplace=True, it makes changes in the original dataset_csv file and thus we dont need to store it in an another dataframe as seen above

In [None]:
corona_dataset_csv.head(10) 

## Aggregatting the rows by the country

In [None]:
corona_dataset_aggregated = corona_dataset_csv.groupby("Country/Region").sum() #This groups all the countries and returns their sum as a single country.

In [None]:
corona_dataset_aggregated.head(10)

In [None]:
corona_dataset_aggregated.shape

## Visualizing data related to any country (For eg - India)

In [None]:
corona_dataset_aggregated.loc["India"].plot() #loc will help to locate the row which has the name India and will return a pandas series and .plot() will plot these values and we'll get to see a graph
corona_dataset_aggregated.loc["China"].plot()
plt.legend() #this shows the legend for the graph and makes it easy for visualizing.

## Calculating a Good Measure

In [None]:
corona_dataset_aggregated.loc["China"][:3].plot() #This will plot the graph for the first 3 columns i.e for the first three days of the spread of the pandemic in China

### Calculating the maximun munber of new infected cases in our period

In [None]:
corona_dataset_aggregated.loc["China"].diff().plot() #This method calculates the first derivative of this data and then plot it

### Finding the maximun infection rate

In [None]:
corona_dataset_aggregated.loc["China"].diff().max() #This pulls the maximun number of infected cases in China in one day

In [None]:
corona_dataset_aggregated.loc["India"].diff().max() #This pulls the maximun number of infected cases in India in one day

In [None]:
corona_dataset_aggregated.loc["Spain"].diff().max() #This pulls the maximun number of infected cases in Spain in one day

### Finding the maximum infection rate for all countries

In [None]:
countries = list(corona_dataset_aggregated.index)
max_infection_rate = []

for country in countries:
    y = corona_dataset_aggregated.loc[country].diff().max()
    max_infection_rate.append(y)

corona_dataset_aggregated["Max_Infection_Rate"] = max_infection_rate   # create a new columns that stores the maximum infection rate of each country in 24 hrs.

corona_dataset_aggregated.head(10)

## Creating a new dataframe with the needed column

In [None]:
corona_data = pd.DataFrame(corona_dataset_aggregated["Max_Infection_Rate"]) #creates a new dataframe that contains just the index that is the country name and the max infection rate column

In [None]:
corona_data.head(10)

## Importing the WorldHappinessReport.csv file 

In [None]:
happiness_report_csv = pd.read_csv("")
happiness_report_csv.head(10)

### Dropping the useless columns

In [None]:
useless_cols = ["Overall rank", "Score", "Generosity", "Perceptions of corruption"]
happiness_report_csv.drop(useless_cols, axis = 1, inplace = True)
happiness_report_csv.head(10)

### Changing the indices of the dataframe 

In [None]:
happiness_report_csv.set_index("Country or region", inplace = True) #set_index take the name of the column you want to set as the index.
happiness_report_csv.head(10)

## Joining the 2 dataset together (corona_data & happiness_report_csv)

In [None]:
data = corona_data.join(happiness_report_csv, how = "inner")  #join method joins the two datasets together. It takes how as a parameter in which we specify how we want the join to be. Here we went with the inner join because the number of countries in corona_data (187) is more than the countries in happiness_report_csv (156)
data.head(10)

## To see whether there's any relation between the columns related to different life factors and column related to the max infection rate in that country.

For this we use the correlation matrix

In [None]:
data.corr()

# Visualization of the results

In [None]:
data.head(10)

### Plotting GDP vs Max Infection Rate

In [None]:
x = data["GDP per capita"]
y = data["Max_Infection_Rate"]

sns.scatterplot(x, np.log(y))    #we have used log scaling for the y axis to see better results in graph

In [None]:
sns.regplot(x, np.log(y))  #This puts a line in the scatterplot for better visualization

## Plotting Social support vs Max Infection Rate

In [None]:
x = data["Social support"]
y = data["Max_Infection_Rate"]

sns.scatterplot(x, np.log(y))

In [None]:
sns.regplot(x, np.log(y))

## Plotting Healthy life expectancy vs Max Infection Rate

In [None]:
x = data["Healthy life expectancy"]
y = data["Max_Infection_Rate"]

sns.scatterplot(x, np.log(y))

In [None]:
sns.regplot(x, np.log(y))

## Plotting Freedom to make life choices vs Max Infection Rate

In [None]:
x = data["Freedom to make life choices"]
y = data["Max_Infection_Rate"]

sns.scatterplot(x, np.log(y))

In [None]:
sns.regplot(x, np.log(y))