# Imports

In [47]:
import json
import folium
import requests
import mimetypes
import http.client
import pandas as pd
import streamlit as st
import plotly
import plotly.express as px
from streamlit_folium import folium_static
from folium.plugins import HeatMap
from pandas.io.json import json_normalize

import matplotlib.pyplot as plt
import seaborn as sns

import inflection
import datetime


from IPython.core.display    import HTML
from IPython.display         import Image

In [27]:
!pip install inflection

Collecting inflection
  Using cached inflection-0.5.1-py2.py3-none-any.whl (9.5 kB)
Installing collected packages: inflection
Successfully installed inflection-0.5.1
You should consider upgrading via the '/home/marxcerqueira/.pyenv/versions/3.9.1/envs/covid-dashboard/bin/python3.9 -m pip install --upgrade pip' command.[0m


## Helper Functions

In [12]:
def jupyter_settings():
    %matplotlib inline
    %pylab inline
    
    plt.style.use( 'bmh' )
    plt.rcParams['figure.figsize'] = [25, 12]
    plt.rcParams['font.size'] = 24
    
    display( HTML( '<style>.container { width:100% !important; }</style>') )
    pd.options.display.max_columns = None
    pd.options.display.max_rows = None
    pd.set_option( 'display.expand_frame_repr', False )
    
    sns.set()

In [18]:
jupyter_settings()

Populating the interactive namespace from numpy and matplotlib


## Loading Data

In [22]:
conn = http.client.HTTPSConnection("api.covid19api.com")
payload = ''
headers = {}
conn.request("GET","/summary",payload,headers)

res = conn.getresponse()

data  = res.read().decode('UTF-8')

covid = json.loads(data)

# Gera o dataframe
data_raw = pd.DataFrame(covid['Countries'])
data_raw.head()

Unnamed: 0,ID,Country,CountryCode,Slug,NewConfirmed,TotalConfirmed,NewDeaths,TotalDeaths,NewRecovered,TotalRecovered,Date,Premium
0,8cc444b3-c02c-470d-a6fe-e31826ded357,Afghanistan,AF,afghanistan,194,59939,6,2631,66,53272,2021-05-03T00:37:38.096Z,{}
1,97bd00f4-9932-4858-bf8a-5788f4211cf1,Albania,AL,albania,100,131185,2,2396,834,110172,2021-05-03T00:37:38.096Z,{}
2,93605ff5-91ba-4c94-8e90-51e5411467c5,Algeria,DZ,algeria,203,122311,8,3261,141,85249,2021-05-03T00:37:38.096Z,{}
3,20fed65f-3180-4f4b-8f10-414370833ad5,Andorra,AD,andorra,0,13232,0,125,0,12684,2021-05-03T00:37:38.096Z,{}
4,a6eb4ea0-2680-4ede-9cb2-caa9fcf44fbf,Angola,AO,angola,163,26815,4,600,37,23913,2021-05-03T00:37:38.096Z,{}


# Data Description

In [23]:
df1 = data_raw.copy()

## Rename Columns

In [24]:
list(df1)

['ID',
 'Country',
 'CountryCode',
 'Slug',
 'NewConfirmed',
 'TotalConfirmed',
 'NewDeaths',
 'TotalDeaths',
 'NewRecovered',
 'TotalRecovered',
 'Date',
 'Premium']

In [29]:
cols_old = list(df1)
snakecase = lambda x: inflection.underscore(x)
col_news = list(map(snakecase, cols_old))

# Rename columns
df1.columns = col_news

In [30]:
df1.head()

Unnamed: 0,id,country,country_code,slug,new_confirmed,total_confirmed,new_deaths,total_deaths,new_recovered,total_recovered,date,premium
0,8cc444b3-c02c-470d-a6fe-e31826ded357,Afghanistan,AF,afghanistan,194,59939,6,2631,66,53272,2021-05-03T00:37:38.096Z,{}
1,97bd00f4-9932-4858-bf8a-5788f4211cf1,Albania,AL,albania,100,131185,2,2396,834,110172,2021-05-03T00:37:38.096Z,{}
2,93605ff5-91ba-4c94-8e90-51e5411467c5,Algeria,DZ,algeria,203,122311,8,3261,141,85249,2021-05-03T00:37:38.096Z,{}
3,20fed65f-3180-4f4b-8f10-414370833ad5,Andorra,AD,andorra,0,13232,0,125,0,12684,2021-05-03T00:37:38.096Z,{}
4,a6eb4ea0-2680-4ede-9cb2-caa9fcf44fbf,Angola,AO,angola,163,26815,4,600,37,23913,2021-05-03T00:37:38.096Z,{}


## Data Dimension

In [33]:
print('Number of rows: {}'.format(df1.shape[0]))
print('Number of columns: {}'.format(df1.shape[1]))

Number of rows: 190
Number of columns: 12


## Check NA Values

In [None]:
#checking NA values, dtypes, dataset dimension, change dtypes

In [31]:
df1.isnull().sum()

id                 0
country            0
country_code       0
slug               0
new_confirmed      0
total_confirmed    0
new_deaths         0
total_deaths       0
new_recovered      0
total_recovered    0
date               0
premium            0
dtype: int64

## Data Types

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 190 entries, 0 to 189
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   ID              190 non-null    object
 1   Country         190 non-null    object
 2   CountryCode     190 non-null    object
 3   Slug            190 non-null    object
 4   NewConfirmed    190 non-null    int64 
 5   TotalConfirmed  190 non-null    int64 
 6   NewDeaths       190 non-null    int64 
 7   TotalDeaths     190 non-null    int64 
 8   NewRecovered    190 non-null    int64 
 9   TotalRecovered  190 non-null    int64 
 10  Date            190 non-null    object
 11  Premium         190 non-null    object
dtypes: int64(6), object(6)
memory usage: 17.9+ KB


## Change Dtypes

In [34]:
# chage date dtype to datetime
df1['date'] = pd.to_datetime(df1['date'])

In [35]:
df1.dtypes

id                              object
country                         object
country_code                    object
slug                            object
new_confirmed                    int64
total_confirmed                  int64
new_deaths                       int64
total_deaths                     int64
new_recovered                    int64
total_recovered                  int64
date               datetime64[ns, UTC]
premium                         object
dtype: object

# Feature Engineering

In [36]:
df2 = df1.copy()

In [53]:
# create features from date - day, week, month, year 
#day
df2['day'] = df2['date'].dt.day

#week
df2['week_of_year'] = df2['date'].dt.isocalendar().week.astype('int64')

#month
df2['month'] = df2['date'].dt.month

#year
df2['year'] = df2['date'].dt.year

In [52]:
df2.dtypes

id                              object
country                         object
country_code                    object
slug                            object
new_confirmed                    int64
total_confirmed                  int64
new_deaths                       int64
total_deaths                     int64
new_recovered                    int64
total_recovered                  int64
date               datetime64[ns, UTC]
premium                         object
day                              int64
week                             int64
month                            int64
year                             int64
dtype: object

# Data Cleaning

In [61]:
df3 = df2.copy()

In [62]:
df3.head()

Unnamed: 0,id,country,country_code,slug,new_confirmed,total_confirmed,new_deaths,total_deaths,new_recovered,total_recovered,date,premium,day,week,month,year,week_of_year
0,8cc444b3-c02c-470d-a6fe-e31826ded357,Afghanistan,AF,afghanistan,194,59939,6,2631,66,53272,2021-05-03 00:37:38.096000+00:00,{},3,18,5,2021,18
1,97bd00f4-9932-4858-bf8a-5788f4211cf1,Albania,AL,albania,100,131185,2,2396,834,110172,2021-05-03 00:37:38.096000+00:00,{},3,18,5,2021,18
2,93605ff5-91ba-4c94-8e90-51e5411467c5,Algeria,DZ,algeria,203,122311,8,3261,141,85249,2021-05-03 00:37:38.096000+00:00,{},3,18,5,2021,18
3,20fed65f-3180-4f4b-8f10-414370833ad5,Andorra,AD,andorra,0,13232,0,125,0,12684,2021-05-03 00:37:38.096000+00:00,{},3,18,5,2021,18
4,a6eb4ea0-2680-4ede-9cb2-caa9fcf44fbf,Angola,AO,angola,163,26815,4,600,37,23913,2021-05-03 00:37:38.096000+00:00,{},3,18,5,2021,18


In [63]:
# drop unnecessary features
df3 = df3.drop(['id', 'country_code', 'slug', 'premium'], axis = 1)

In [64]:
df3.head()

Unnamed: 0,country,new_confirmed,total_confirmed,new_deaths,total_deaths,new_recovered,total_recovered,date,day,week,month,year,week_of_year
0,Afghanistan,194,59939,6,2631,66,53272,2021-05-03 00:37:38.096000+00:00,3,18,5,2021,18
1,Albania,100,131185,2,2396,834,110172,2021-05-03 00:37:38.096000+00:00,3,18,5,2021,18
2,Algeria,203,122311,8,3261,141,85249,2021-05-03 00:37:38.096000+00:00,3,18,5,2021,18
3,Andorra,0,13232,0,125,0,12684,2021-05-03 00:37:38.096000+00:00,3,18,5,2021,18
4,Angola,163,26815,4,600,37,23913,2021-05-03 00:37:38.096000+00:00,3,18,5,2021,18


# Data Visualization

In [65]:
# daily cases, daily deaths, daily recovery

#daily
daily_cases = df2[['total_confirmed', 'day']].groupby('day').sum()
daily_cases

Unnamed: 0_level_0,total_confirmed
day,Unnamed: 1_level_1
3,151975599


teste

In [66]:
conn = http.client.HTTPSConnection("api.covid19api.com")
payload = ''
headers = {}
conn.request("GET","/summary",payload,headers)
res = conn.getresponse()
data  = res.read().decode('UTF-8')
covid = json.loads(data)
# Gera o dataframe
df = pd.DataFrame(covid['Countries'])

In [67]:
# Data Cleaning
# Drop unnecessary features
covid1 = df.drop(columns = ['CountryCode', 'Slug', 'Premium'], axis = 1)

# Feature Engineering
covid1['ActiveCases'] = covid1['TotalConfirmed'] - covid1['TotalRecovered']
covid1['ActiveCases'] = covid1['ActiveCases'] - covid1['TotalDeaths']
# New Dataframes
dfn = covid1.drop(['NewConfirmed', 'NewDeaths', 'NewRecovered'], axis = 1)
dfn = dfn.groupby('Country')['TotalConfirmed','TotalDeaths','TotalRecovered','ActiveCases'].sum().sort_values(by = 'TotalConfirmed', ascending = False)
dfn.style.background_gradient(cmap = 'Oranges')
dfc = covid1.groupby('Country')['TotalConfirmed', 'TotalDeaths', 'TotalRecovered', 'ActiveCases'].max().sort_values(by = 'TotalConfirmed', ascending = False).reset_index()


  dfn = dfn.groupby('Country')['TotalConfirmed','TotalDeaths','TotalRecovered','ActiveCases'].sum().sort_values(by = 'TotalConfirmed', ascending = False)
  dfc = covid1.groupby('Country')['TotalConfirmed', 'TotalDeaths', 'TotalRecovered', 'ActiveCases'].max().sort_values(by = 'TotalConfirmed', ascending = False).reset_index()


In [76]:
# Obtém os totais consolidados
confirmed_tot = int(dfc['TotalConfirmed'].sum())
deaths_tot = int(dfc['TotalDeaths'].sum())
recovered_tot = int(dfc['TotalRecovered'].sum())
active_tot = int(dfc['ActiveCases'].sum())
d = {'total confirmed cases': confirmed_tot, 'total deaths': deaths_tot, 'total recovered': recovered_tot, 'total active cases': active_tot}
world_df = pd.DataFrame(d,  index=['world'])

In [70]:
d

{'total confirmed cases': 151975599,
 'total deaths': 3190232,
 'total recovered': 88892904,
 'total active cases': 59892463}

In [77]:
world_df

Unnamed: 0,total confirmed cases,total deaths,total recovered,total active cases
world,151975599,3190232,88892904,59892463
