# Projeto COVID-19
## Philipe Couto

### Importando as bibliotecas necessárias

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.model_selection import train_test_split
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots


In [2]:
url = "covid_19_data.csv"

In [3]:
df = pd.read_csv(url, parse_dates=['ObservationDate', 'Last Update'])
df.dtypes

SNo                         int64
ObservationDate    datetime64[ns]
Province/State             object
Country/Region             object
Last Update        datetime64[ns]
Confirmed                 float64
Deaths                    float64
Recovered                 float64
dtype: object

In [4]:
import re ## função replace, para tirar letras maiusculas e caracteres especiais dos nomes das colunas.

def corrige_colunas(col_name):
    return re.sub(r"[/| ]", "", col_name).lower()

In [5]:
#corrigindo colunas
df.columns = [corrige_colunas(col) for col in df.columns]
df.columns

Index(['sno', 'observationdate', 'provincestate', 'countryregion',
       'lastupdate', 'confirmed', 'deaths', 'recovered'],
      dtype='object')

# Selecionando os dados do Brasil sobre covid

In [6]:
df.countryregion.value_counts()

US                     4990
Mainland China         3687
Canada                 1093
Australia               788
France                  752
                       ... 
North Ireland             1
Channel Islands           1
Cape Verde                1
Republic of Ireland       1
East Timor                1
Name: countryregion, Length: 223, dtype: int64

In [7]:
df.countryregion.unique()

array(['Mainland China', 'Hong Kong', 'Macau', 'Taiwan', 'US', 'Japan',
       'Thailand', 'South Korea', 'Singapore', 'Philippines', 'Malaysia',
       'Vietnam', 'Australia', 'Mexico', 'Brazil', 'Colombia', 'France',
       'Nepal', 'Canada', 'Cambodia', 'Sri Lanka', 'Ivory Coast',
       'Germany', 'Finland', 'United Arab Emirates', 'India', 'Italy',
       'UK', 'Russia', 'Sweden', 'Spain', 'Belgium', 'Others', 'Egypt',
       'Iran', 'Israel', 'Lebanon', 'Iraq', 'Oman', 'Afghanistan',
       'Bahrain', 'Kuwait', 'Austria', 'Algeria', 'Croatia',
       'Switzerland', 'Pakistan', 'Georgia', 'Greece', 'North Macedonia',
       'Norway', 'Romania', 'Denmark', 'Estonia', 'Netherlands',
       'San Marino', ' Azerbaijan', 'Belarus', 'Iceland', 'Lithuania',
       'New Zealand', 'Nigeria', 'North Ireland', 'Ireland', 'Luxembourg',
       'Monaco', 'Qatar', 'Ecuador', 'Azerbaijan', 'Czech Republic',
       'Armenia', 'Dominican Republic', 'Indonesia', 'Portugal',
       'Andorra', 'Latvia

In [8]:
df.loc[df.countryregion == 'Brazil']

Unnamed: 0,sno,observationdate,provincestate,countryregion,lastupdate,confirmed,deaths,recovered
82,83,2020-01-23,,Brazil,2020-01-23 17:00:00,0.0,0.0,0.0
2455,2456,2020-02-26,,Brazil,2020-02-26 23:53:02,1.0,0.0,0.0
2559,2560,2020-02-27,,Brazil,2020-02-26 23:53:02,1.0,0.0,0.0
2668,2669,2020-02-28,,Brazil,2020-02-26 23:53:02,1.0,0.0,0.0
2776,2777,2020-02-29,,Brazil,2020-02-29 21:03:05,2.0,0.0,0.0
...,...,...,...,...,...,...,...,...
24850,24851,2020-05-15,,Brazil,2020-05-16 02:32:19,220291.0,14962.0,84970.0
25227,25228,2020-05-16,,Brazil,2020-05-17 02:32:32,233511.0,15662.0,89672.0
25604,25605,2020-05-17,,Brazil,2020-05-18 02:32:21,241080.0,16118.0,94122.0
25981,25982,2020-05-18,,Brazil,2020-05-19 02:32:18,255368.0,16853.0,100459.0


In [9]:
brasil = df.loc[(df.countryregion == 'Brazil') & (df.confirmed > 0)]
brasil

Unnamed: 0,sno,observationdate,provincestate,countryregion,lastupdate,confirmed,deaths,recovered
2455,2456,2020-02-26,,Brazil,2020-02-26 23:53:02,1.0,0.0,0.0
2559,2560,2020-02-27,,Brazil,2020-02-26 23:53:02,1.0,0.0,0.0
2668,2669,2020-02-28,,Brazil,2020-02-26 23:53:02,1.0,0.0,0.0
2776,2777,2020-02-29,,Brazil,2020-02-29 21:03:05,2.0,0.0,0.0
2903,2904,2020-03-01,,Brazil,2020-02-29 21:03:05,2.0,0.0,0.0
...,...,...,...,...,...,...,...,...
24850,24851,2020-05-15,,Brazil,2020-05-16 02:32:19,220291.0,14962.0,84970.0
25227,25228,2020-05-16,,Brazil,2020-05-17 02:32:32,233511.0,15662.0,89672.0
25604,25605,2020-05-17,,Brazil,2020-05-18 02:32:21,241080.0,16118.0,94122.0
25981,25982,2020-05-18,,Brazil,2020-05-19 02:32:18,255368.0,16853.0,100459.0


In [10]:
##buscando os casos confirmados.
#plot de gráfico
                    #eixo X        #eixo Y
px.line(brasil, 'observationdate', 'confirmed', title = 'Casos confirmados no Brasil')


In [11]:
#Novos casos por dia

#Utilizando técnica de programação funcional (função)

#lambda é uma função do tipo anonima.

from numpy import arange


brasil['novoscasos'] = list(map(
    lambda x: 0 if(x==0) else brasil['confirmed'].iloc[x] - brasil['confirmed'].iloc[x-1],
    np.arange(brasil.shape[0])
))



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [12]:
px.line(brasil, x='observationdate', y='novoscasos', title='Novos casos por dia')

In [13]:
#Mortes por covid

fig = go.Figure()
fig.add_trace(
    go.Scatter(x=brasil.observationdate, y=brasil.deaths, name='Mortes',
    mode='lines+markers', line={'color':'red'})
)

fig.update_layout(title='Mortes covid')
fig.show()

In [14]:
#taxa de crescimento

#taxa_crescimento=  {presente/passado}**(1/n)-1\

def taxa_crescimento(data, variable, data_inicio=None, data_fim=None):
    if data_inicio is None:
        data_inicio = data.observationdate.loc[data[variable] > 0].min()
    else:
        data_inicio = pd.to_datetime(data_inicio)
    
    if data_fim is None:
        data_fim = data.observationdate.iloc[-1]
    else:
        data_fim = pd.to_datetime(data_fim)


    #Define os valores do presente e passado
    passado = data.loc[data.observationdate == data_inicio, variable].values[0]
    presente = data.loc[data.observationdate == data_fim, variable].values[0]

    #define o numero de pontos no tempo que vamos avaliar.
    n = (data_fim - data_inicio).days

    #Calcular taxa
    taxa = (presente/passado)**(1/n) -1

    return taxa*100


In [15]:
taxa_crescimento(brasil, 'confirmed')

16.27183353112116

In [16]:
def taxa_crescimento_diario(data, variable, data_inicio=None):
    if data_inicio == None:
        data_inicio = data.observationdate.loc[data[variable] > 0].min()
    else:
        data_inicio = pd.to_datetime(data_inicio)

    data_fim = data.observationdate.max()

    n = (data_fim - data_inicio).days
    taxas = list(map(
        lambda x: (data[variable].iloc[x] - data[variable].iloc[x-1]) / data[variable].iloc[x-1],
        range(1, n+1)
    ))
    return np.array(taxas) * 100

In [17]:
tx_dia = taxa_crescimento_diario(brasil, 'confirmed')
tx_dia

array([  0.        ,   0.        , 100.        ,   0.        ,
         0.        ,   0.        , 100.        ,   0.        ,
       225.        ,   0.        ,  53.84615385,  25.        ,
        24.        ,  22.58064516,  36.84210526, 190.38461538,
         0.        ,   7.28476821,  23.45679012,  60.5       ,
        15.88785047,  66.93548387,  27.69726248,  28.75157629,
        51.4201763 ,  24.45019405,  16.78794179,  13.66266133,
        16.87548943,  14.47236181,  14.25226807,   9.01639344,
         7.58928571,  24.8525879 ,  19.57320273,  17.67115272,
        12.58080557,  14.39929329,   7.43243243,   9.26325247,
        15.40169394,  15.22017956,  11.88620903,   8.54521335,
         5.54537122,   7.06807546,   5.57858688,   7.81903542,
        12.10513815,   7.4329096 ,  10.70501233,   8.83557983,
         5.44492335,   5.4043566 ,   5.73350023,   6.21648599,
         9.35157462,   8.00823407,   9.77184834,   6.36504619,
         6.88748019,   8.58316283,   8.80726429,   9.41

In [18]:
primeiro_dia = brasil.observationdate.loc[brasil.confirmed > 0].min()


#date_range faz um vetor de dias
px.line(x=pd.date_range(primeiro_dia, brasil.observationdate.max())[1:],
y=tx_dia, title = 'Taxa de crescimento de casos de covid no Brasil')

# Predições

### Para conseguir realizar predições de séries temporais é necessário decompor em
- Tendencia
- Sazonalidade
- Ruido

In [19]:
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt


In [20]:
confirmados = brasil.confirmed
confirmados.index = brasil.observationdate
confirmados

observationdate
2020-02-26         1.0
2020-02-27         1.0
2020-02-28         1.0
2020-02-29         2.0
2020-03-01         2.0
                ...   
2020-05-15    220291.0
2020-05-16    233511.0
2020-05-17    241080.0
2020-05-18    255368.0
2020-05-19    271885.0
Name: confirmed, Length: 84, dtype: float64

res = seasonal_decompose(confirmados)
fig, (ax1, ax2, ax3, ax4) = plt.subplot(4, 1, figsize =(10,8))


In [21]:
#ARIMA Library 

from pmdarima.arima import auto_arima
modelo = auto_arima(confirmados)

In [23]:
fig = go.Figure(go.Scatter(
    x=confirmados.index, y=confirmados, name='Observados'
))

fig.add_trace(go.Scatter(
    x=confirmados.index,
    y=modelo.predict_in_sample(),
    name='Preditos'
    
))

fig.add_trace(go.Scatter(
    x=pd.date_range('2020-05-20', '2020-06-20'),
    y=modelo.predict(31), 
    name='Forecast'
))

fig.update_layout(title='Previsão de casos confirmados para os proximos 30 dias')
fig.show()


# Modelo de Crescimento

Utilizando a biblioteca fbprophet

In [32]:
from prophet import Prophet

train = confirmados.reset_index()[:-5]
test = confirmados.reset_index()[-5:]

# Renomeando Colunas
train.rename(columns={'observationdate':'ds', 'confirmed':'y'}, inplace=True)
test.rename(columns={'observationdate':'ds', 'confirmed':'y'}, inplace=True)

#Definir o modelo de crescimento
profeta = Prophet(growth='logistic', changepoints=['2020-03-21', '2020-03-30', '2020-04-25', '2020-05-03', '2020-05-10'])

pop = 211463256
train['cap'] = pop 


#treina o modelo
profeta.fit(train)


#construir previsões para o futuro
future_dates = profeta.make_future_dataframe(periods=200)
future_dates['cap'] = pop 
forecast = profeta.predict(future_dates)

17:01:41 - cmdstanpy - INFO - Chain [1] start processing
17:01:44 - cmdstanpy - INFO - Chain [1] done processing


In [33]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=forecast.ds, y=forecast.yhat, name = 'Predict'))
#fig.add_trace(go.Scatter(x=test.index, y=test, name = 'Teste'))
fig.add_trace(go.Scatter(x=train.ds, y=train.y, name = 'Treino'))
fig.update_layout(title='Predicões usando prophet')
fig.show()

