In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt 

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
corona_dataset_csv = pd.read_csv('/kaggle/input/covid19-confirmed-datasetcsv/covid19_Confirmed_dataset.csv')
corona_dataset_csv.head(10)

**Exploring shape of the data**

In [None]:
corona_dataset_csv.shape

**Deleting useless columns**

In [None]:
corona_dataset_csv.drop(['Lat','Long'],axis=1,inplace=True)
corona_dataset_csv.head(10)


**Aggregating the rows by the country**

In [None]:
corona_dataset_aggregated = corona_dataset_csv.groupby("Country/Region").sum()
corona_dataset_aggregated.head(10)


In [None]:
corona_dataset_aggregated.shape


**Visualizing data related to a country for example China, Spain and Italy**

In [None]:
corona_dataset_aggregated.loc['China'].plot()
corona_dataset_aggregated.loc['Italy'].plot()
corona_dataset_aggregated.loc['Spain'].plot()
plt.legend()

**Caculating the first derivative of the curve**

In [None]:
corona_dataset_aggregated.loc['China'][:3].plot()

In [None]:
corona_dataset_aggregated.loc['China'].diff().plot()


**Find maxmimum infection rate for China**

In [None]:
corona_dataset_aggregated.loc['China'].diff().max()


**Find maximum infection rate for all of the countries.**

In [None]:
countries = list(corona_dataset_aggregated.index)
max_infection_rates = []
for country in countries :
    max_infection_rates.append(corona_dataset_aggregated.loc[country].diff().max())
corona_dataset_aggregated['max infection rate'] = max_infection_rates
corona_dataset_aggregated.head(10)


**Creating new data frame with only maximum infection rate per country**

In [None]:
corona_data = pd.DataFrame(corona_dataset_aggregated["max infection rate"])
corona_data.head(7)

**Reporting World Happiness report dataset**

In [None]:
happiness_dataset = pd.read_csv('/kaggle/input/world-shle-report/worldwide_happiness_report.csv')
happiness_dataset.head(10)

**Cleaning data from useless columns/scores**

In [None]:
columns_to_dropped = ['Overall rank','Score','Generosity','Perceptions of corruption']
happiness_dataset.drop(columns_to_dropped,axis=1 , inplace=True)
happiness_dataset.head()

**Changing the indices of the dataframe**

In [None]:
happiness_dataset.set_index(['Country or region'],inplace=True)
happiness_dataset.head()

**Merging two datasets together**

In [None]:
corona_data.head()


In [None]:
corona_data.shape

In [None]:
happiness_dataset.head()

In [None]:
happiness_dataset.shape

In [None]:
overall_data = corona_data.join(happiness_dataset, how = "inner")
overall_data.head()

**Working with correlation matrix**

In [None]:
overall_data.corr()

**visualizing results**

In [None]:
overall_data.head()

**Plotting max GDP vs max infection rate**

In [None]:
x = overall_data["GDP per capita"]
y = overall_data["max infection rate"]
sns.scatterplot(x, y)

**utilizing NumPy methods to reduce scaling problem...**

In [None]:
sns.scatterplot(x, np.log(y))

In [None]:
sns.regplot(x, np.log(y))

In [None]:
x = overall_data["Social support"]
y = overall_data["max infection rate"]
sns.regplot(x, np.log(y))

In [None]:
x = overall_data["Healthy life expectancy"]
y = overall_data["max infection rate"]
sns.regplot(x, np.log(y))

In [None]:
x = overall_data["Freedom to make life choices"]
y = overall_data["max infection rate"]
sns.regplot(x, np.log(y))

In [None]:
x = overall_data["Social support"]
y = overall_data["max infection rate"]
sns.regplot(x, np.log(y))