# COVID 19 Economic impact

In [None]:
import os
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler

## Data

In [None]:
df = pd.read_csv('/kaggle/input/impact-of-covid19-pandemic-on-the-global-economy/transformed_data.csv')
df.head()

In [None]:
df.info()

* CODE = ISO code of the country
* COUNTRY = country name
* DATE = Date of the values
* HDI = Human developement index
* TC = Total cases
* TD = Total deaths
* STI = Stringency Index
* POP = Population
* GDPCAP = Gross domestic product per capita

In [None]:
df[df.HDI.isna()]

In [None]:
df.COUNTRY[df.HDI.isna()].unique()

There are null values for *HDI*, Human Development Index, for this countries. We can assume that this values are 0 due to that countries are small.

In [None]:
df.fillna(0, inplace=True)

## Visualization

In [None]:
fig = px.choropleth(df, locationmode='ISO-3', locations='CODE',
                   color='GDPCAP',
                   hover_name='COUNTRY',hover_data=['TC','TD','STI','POP'],
                   animation_frame='DATE', projection= 'natural earth',
                   title='GDP by date')
fig.show()

In [None]:
fig = px.choropleth(df, locationmode='ISO-3', locations='CODE',
                   color='TD',
                   hover_name='COUNTRY',hover_data=['TC','GDPCAP','STI','POP'],
                   animation_frame='DATE',projection='natural earth',
                   title='Total Deaths by date')
fig.show()

In [None]:
fig = px.choropleth(df, locationmode='ISO-3', locations='CODE',
                   color='TC',
                   hover_name='COUNTRY',hover_data=['TD','GDPCAP','STI','POP'],
                   animation_frame='DATE',projection='natural earth',
                   title='Total Cases by date')
fig.show()

In [None]:
fig = px.choropleth(df, locationmode='ISO-3', locations='CODE',
                   color='POP',
                   hover_name='COUNTRY',hover_data=['TD','TC','GDPCAP','STI'],
                   animation_frame='DATE',projection='natural earth',
                   title='Population by date')
fig.show()

In [None]:
plt.figure(figsize=(10,6))

heatmap = sns.heatmap(df.corr(), vmin=-1,vmax=1, annot=True, cmap='viridis')

heatmap.set_title('Correlation Heatmap', fontdict={'fontsize':12}, pad=12)
plt.show()

GDP per capita has a strong correlation with Human Developement Index and Population which is more or less obvious. Then it has a moderate correlation with Total cases, total deaths and Stringency Index.

In [None]:
TC_w = []
TD_w = []
STI = []
HDI = []
GDPCAP = []

for Date in df.DATE.unique():
    TC_w.append( np.sum(df['TC'][df.DATE.isin([Date])]))
    TD_w.append(np.sum(df['TD'][df.DATE.isin([Date])]))
    STI.append(np.mean(df['STI'][df.DATE.isin([Date])]))
    HDI.append(np.mean(df['HDI'][df.DATE.isin([Date])]))
    GDPCAP.append(np.mean(df['GDPCAP'][df.DATE.isin([Date])]))
    
df_W = pd.DataFrame(data=zip(df.DATE.unique(),TC_w,TD_w,STI,HDI,GDPCAP), columns=['DATE','TD','TC','STI','HDI','GDPCAP'])
df_W.DATE=pd.to_datetime(df_W.DATE, format='%Y %m %d')

In [None]:
fig, ax =plt.subplots(2,1,figsize=(14,16))

sns.lineplot(ax=ax[0],x=df_W.DATE,y=df_W['GDPCAP'],label='GDP per capita')
sns.lineplot(ax=ax[0],x=df_W.DATE,y=df_W['HDI'], label='Stringency Index')
sns.lineplot(ax=ax[0],x=df_W.DATE,y=df_W['STI'], label='Human Developement Index')

ax[0].set_title('Mean GDP CAP, HDI, STI')

sns.lineplot(ax=ax[1],x=df_W.DATE,y=df_W['TC'], label='Total Cases')
sns.lineplot(ax=ax[1],x=df_W.DATE,y=df_W['TD'], label='Total Deaths')
plt.title('World values')
plt.show()

It's visible that most values change drastically from March. This is due to some of the contries don't have all the information from previous months.

In [None]:
perc=(np.sum((df['GDPCAP']==0.).astype('int32'))/len(df))
print('Percentaje of GDP missing values = 0: {:.3f}%'.format(perc*100))

In [None]:
plt.subplots(figsize=(14,9))

sns.lineplot(x=df_W.DATE,y=MinMaxScaler().fit_transform(np.array(df_W['GDPCAP']).reshape(-1,1)).ravel(), label='GDP Mean')
sns.lineplot(x=df_W.DATE,y=MinMaxScaler().fit_transform(np.array(df_W['TD']).reshape(-1,1)).ravel(), label='Total Deaths')
sns.lineplot(x=df_W.DATE,y=MinMaxScaler().fit_transform(np.array(df_W['TC']).reshape(-1,1)).ravel(), label='Total Cases')
sns.lineplot(x=df_W.DATE,y=MinMaxScaler().fit_transform(np.array(df_W['STI']).reshape(-1,1)).ravel(), label='Stringency Index Mean')
sns.lineplot(x=df_W.DATE,y=MinMaxScaler().fit_transform(np.array(df_W['HDI']).reshape(-1,1)).ravel(), label='Human Developement Index Mean')
plt.title('Scaled World values')
plt.show()

### Erasing missing data

In [None]:
df_er = df.drop(index=df[df.GDPCAP==0.].index)
df_er.drop(index=df_er[df_er.HDI==0.].index, inplace=True)
df_er.drop(index=df_er[df_er.STI==0.].index, inplace=True)

In [None]:
TC_w = []
TD_w = []
STI = []
HDI = []
GDPCAP = []

for Date in df_er.DATE.unique():
    TC_w.append( np.sum(df_er['TC'][df_er.DATE.isin([Date])]))
    TD_w.append(np.sum(df_er['TD'][df_er.DATE.isin([Date])]))
    STI.append(np.mean(df_er['STI'][df_er.DATE.isin([Date])]))
    HDI.append(np.mean(df_er['HDI'][df_er.DATE.isin([Date])]))
    GDPCAP.append(np.mean(df_er['GDPCAP'][df_er.DATE.isin([Date])]))
    
df_W = pd.DataFrame(data=zip(df_er.DATE.unique(),TC_w,TD_w,STI,HDI,GDPCAP), columns=['DATE','TD','TC','STI','HDI','GDPCAP'])
df_W.DATE=pd.to_datetime(df_W.DATE, format='%Y %m %d')

In [None]:
fig, ax =plt.subplots(2,1,figsize=(14,16))

sns.lineplot(ax=ax[0],x=df_W.DATE,y=df_W['GDPCAP'], label='GDP per capita')
sns.lineplot(ax=ax[0],x=df_W.DATE,y=df_W['HDI'], label='Stringency Index')
sns.lineplot(ax=ax[0],x=df_W.DATE,y=df_W['STI'], label='Human Developement Index')

ax[0].set_title('Mean GDP CAP, HDI, STI')

sns.lineplot(ax=ax[1],x=df_W.DATE,y=df_W['TC'], label='Total Cases')
sns.lineplot(ax=ax[1],x=df_W.DATE,y=df_W['TD'], label='Total Deaths')
plt.title('World values')
plt.show()

In [None]:
plt.subplots(figsize=(14,9))

sns.lineplot(x=df_W.DATE,y=MinMaxScaler().fit_transform(np.array(df_W['GDPCAP']).reshape(-1,1)).ravel(), label='GDP Mean')
sns.lineplot(x=df_W.DATE,y=MinMaxScaler().fit_transform(np.array(df_W['TD']).reshape(-1,1)).ravel(), label='Total Deaths')
sns.lineplot(x=df_W.DATE,y=MinMaxScaler().fit_transform(np.array(df_W['TC']).reshape(-1,1)).ravel(), label='Total Cases')
sns.lineplot(x=df_W.DATE,y=MinMaxScaler().fit_transform(np.array(df_W['STI']).reshape(-1,1)).ravel(), label='Stringency Index Mean')
sns.lineplot(x=df_W.DATE,y=MinMaxScaler().fit_transform(np.array(df_W['HDI']).reshape(-1,1)).ravel(), label='Human Developement Index Mean')
plt.title('Scaled World values')
plt.show()

### Final result:

As eyes can see, the trend is similar to the previous one but now is much clear the **real** impact on GDP and Human developement of Covid 19. As Total Cases and Total Deaths rise GDP and HDI decreases and STI rises. 

Something nice to realize is that the GDP, HDI and STI changes are previous to the total cases and total deaths changes. This is a 'We saw that coming and even with that we could not prepared well'. 

In the last month the TC and TD decreased so much and GDP, HDI increased less than correlatively. STI should decreased given the previous information but it increased. This is may due to global fear of keep taking bad decisions making its appearence.