<a href="https://colab.research.google.com/github/kurek0010/machine-learing-bootcamp/blob/main/unsupervised/05_case_studies/03_coronavirus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

* @author: krakowiakpawel9@gmail.com  
* @site: e-smartdata.org

### prophet
Strona biblioteki: [https://facebook.github.io/prophet/](https://facebook.github.io/prophet/)  

Dokumentacja/User Guide: [https://facebook.github.io/prophet/docs/quick_start.html](https://facebook.github.io/prophet/docs/quick_start.html)

Biblioteka do pracy z szeregami czasowymi od Facebook'a

Aby zainstalować bibliotekę prophet, użyj polecenia poniżej:
```
!pip install fbprophet
```
Aby zaktualizować do najnowszej wersji użyj polecenia poniżej:
```
!pip install --upgrade fbprophet
```
Kurs stworzony w oparciu o wersję `0.5`

### Spis treści:
1. [Import bibliotek](#0)
2. [Wczytanie danych](#1)
3. [Eksploracja i przygotowanie danych](#2)
4. [Budowa modelu](#3)




### <a name='0'></a> Import bibliotek

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

np.random.seed(42)

### <a name='1'></a> Wczytanie danych

In [None]:
# dane od 22.01.2020 do 17.02.2020
url = 'https://storage.googleapis.com/esmartdata-courses-files/ml-course/coronavirus.csv'
data = pd.read_csv(url, parse_dates=['Date', 'Last Update'])
data.head()

### <a name='2'></a> Eksploracja i przygotowanie danych

In [None]:
data.info()

In [None]:
data.isnull().sum()

In [None]:
# brak Province/State -> Country
data['Province/State'] = np.where(data['Province/State'].isnull(), data['Country'], data['Province/State'])
data.isnull().sum()

In [None]:
data['Country'].value_counts().nlargest(10)

In [None]:
data['Country'] = np.where(data['Country'] == 'Mainland China', 'China', data['Country'])
data['Country'].value_counts().nlargest(10)

In [None]:
tmp = data['Country'].value_counts().nlargest(15).reset_index()
tmp.columns = ['Country', 'Count']
tmp = tmp.sort_values(by=['Count', 'Country'], ascending=[False, True])
tmp['iso_alpha'] = ['CHN', 'USA', 'AUS', 'CAN', 'JPN', 'KOR', 'THA', 'HKG', np.nan, 'SGP', 'TWN', 'VNM', 'FRA', 'MYS', 'NPL']
tmp

In [None]:
px.scatter_geo(tmp, locations='iso_alpha', size='Count', size_max=40, template='plotly_dark', color='Count',
               text='Country', projection='natural earth', color_continuous_scale='reds', width=950,
               title='Liczba przypadków Koronawirusa na świcie - TOP15')

In [None]:
px.scatter_geo(tmp, locations='iso_alpha', size='Count', size_max = 40, template='plotly_dark', color='Count',
               text='Country', projection='natural earth', color_continuous_scale='reds', scope='asia', width=950,
               title='Liczba przypadków Koronawirusa - Azja (z TOP15 global)')

In [None]:
px.bar(tmp, x='Country', y='Count', template='plotly_dark', width=950, color_discrete_sequence=['#42f5c8'],
       title='Liczba przypadków Koronawirusa w rozbiciu na kraje')

In [None]:
px.bar(tmp.query("Country != 'China'"), x='Country', y='Count', template='plotly_dark', width=950,
       color_discrete_sequence=['#42f5c8'], title='Liczba przypadków Koronawirusa w rozbiciu na kraje (poza Chinami)')

In [None]:
tmp = data.groupby(by=data['Date'].dt.date)[['Confirmed', 'Deaths', 'Recovered']].sum().reset_index()
tmp

In [None]:
fig = go.Figure()

trace1 = go.Scatter(x=tmp['Date'], y=tmp['Confirmed'], mode='markers+lines', name='Confirmed')
trace2 = go.Scatter(x=tmp['Date'], y=tmp['Deaths'], mode='markers+lines', name='Deaths')
trace3 = go.Scatter(x=tmp['Date'], y=tmp['Recovered'], mode='markers+lines', name='Recovered')

fig.add_trace(trace1)
fig.add_trace(trace2)
fig.add_trace(trace3)

fig.update_layout(template='plotly_dark', width=950, title='Koronawirus (22.01-17.02.2020)')

In [None]:
data_confirmed = tmp[['Date', 'Confirmed']]
data_confirmed.columns = ['ds', 'y']
data_confirmed.head()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=data_confirmed['ds'], y=data_confirmed['y'], mode='markers+lines',
                         name='Confirmed', fill='tozeroy'))
fig.update_layout(template='plotly_dark', width=950, title='Liczba potwierdzonych przypadków (22.01-12.02)')

### <a name='3'></a> Budowa modelu

In [None]:
from prophet import Prophet
from prophet.plot import plot_plotly

# dopasowanie modelu
model = Prophet(yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=False)
model.fit(data_confirmed)

# predykcja
future = model.make_future_dataframe(periods=7, freq='D')
forecast = model.predict(future)
plot_plotly(model, forecast)