<a href="https://colab.research.google.com/github/krakowiakpawel9/ml_course/blob/master/unsupervised/04_anomaly_detection/04_anomaly_detection_time_series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### scikit-learn
Strona biblioteki: [https://scikit-learn.org](https://scikit-learn.org)  

Dokumentacja/User Guide: [https://scikit-learn.org/stable/user_guide.html](https://scikit-learn.org/stable/user_guide.html)

Podstawowa biblioteka do uczenia maszynowego w języku Python.

Aby zainstalować bibliotekę scikit-learn, użyj polecenia poniżej:
```
!pip install scikit-learn
```
Aby zaktualizować do najnowszej wersji bibliotekę scikit-learn, użyj polecenia poniżej:
```
!pip install --upgrade scikit-learn
```
Kurs stworzony w oparciu o wersję `0.22.1`

### Spis treści:
1. [Import bibliotek](#0)
2. [Wygenerowanie danych](#1)
3. [Wizualizacja danych](#2)
4. [Algorytm K-średnich](#3)
5. [Wizualizacja klastrów](#4)




### <a name='0'></a> Import bibliotek

In [1]:
import fbprophet
import pandas as pd
import plotly.express as px

sns.set()
fbprophet.__version__

'0.5'

In [2]:
df = pd.read_csv('test_detect_anoms.csv', parse_dates=['timestamp'])
df.head()

Unnamed: 0,timestamp,count
0,2018-09-25 14:01:00,182.478
1,2018-09-25 14:02:00,176.231
2,2018-09-25 14:03:00,183.917
3,2018-09-25 14:04:00,177.798
4,2018-09-25 14:05:00,165.469


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14398 entries, 0 to 14397
Data columns (total 2 columns):
timestamp    14398 non-null datetime64[ns]
count        14398 non-null float64
dtypes: datetime64[ns](1), float64(1)
memory usage: 225.1 KB


In [4]:
df.describe()

Unnamed: 0,count
count,14398.0
mean,112.546917
std,26.512252
min,21.351
25%,94.909075
50%,107.435
75%,125.05125
max,250.0


In [6]:
px.line(df, x='timestamp', y='count', title='Anomaly Detection', width=800)

In [7]:
px.scatter(df, x='timestamp', y='count', opacity=0.3, title='Anomaly Detection', width=800)

In [8]:
data = df.copy()
data.columns = ['ds', 'y']
data.head()

Unnamed: 0,ds,y
0,2018-09-25 14:01:00,182.478
1,2018-09-25 14:02:00,176.231
2,2018-09-25 14:03:00,183.917
3,2018-09-25 14:04:00,177.798
4,2018-09-25 14:05:00,165.469


In [9]:
from fbprophet import Prophet

model = Prophet(daily_seasonality=False, yearly_seasonality=False, weekly_seasonality=False, 
                seasonality_mode='multiplicative', interval_width=0.99, changepoint_range=0.8)

model.fit(data)
forecast = model.predict(data)

INFO:numexpr.utils:NumExpr defaulting to 2 threads.


In [10]:
forecast.head()

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2018-09-25 14:01:00,137.758331,83.394924,190.643974,137.758331,137.758331,0.0,0.0,0.0,0.0,0.0,0.0,137.758331
1,2018-09-25 14:02:00,137.664419,85.132237,195.247966,137.664419,137.664419,0.0,0.0,0.0,0.0,0.0,0.0,137.664419
2,2018-09-25 14:03:00,137.570506,78.067127,194.851873,137.570506,137.570506,0.0,0.0,0.0,0.0,0.0,0.0,137.570506
3,2018-09-25 14:04:00,137.476593,79.102755,195.786738,137.476593,137.476593,0.0,0.0,0.0,0.0,0.0,0.0,137.476593
4,2018-09-25 14:05:00,137.382681,85.327095,194.101266,137.382681,137.382681,0.0,0.0,0.0,0.0,0.0,0.0,137.382681


In [11]:
forecast = forecast[['ds', 'trend', 'yhat', 'yhat_lower', 'yhat_upper']]
forecast['real'] = data['y']
forecast['anomaly'] = 1
forecast.loc[forecast['real'] > forecast['yhat_upper'], 'anomaly'] = -1
forecast.loc[forecast['real'] < forecast['yhat_lower'], 'anomaly'] = -1
forecast.head()

Unnamed: 0,ds,trend,yhat,yhat_lower,yhat_upper,real,anomaly
0,2018-09-25 14:01:00,137.758331,137.758331,83.394924,190.643974,182.478,1
1,2018-09-25 14:02:00,137.664419,137.664419,85.132237,195.247966,176.231,1
2,2018-09-25 14:03:00,137.570506,137.570506,78.067127,194.851873,183.917,1
3,2018-09-25 14:04:00,137.476593,137.476593,79.102755,195.786738,177.798,1
4,2018-09-25 14:05:00,137.382681,137.382681,85.327095,194.101266,165.469,1


[Plotly color scales](https://plot.ly/python/builtin-colorscales/)

In [12]:
px.scatter(forecast, x='ds', y='real', color='anomaly', color_continuous_scale='Bluyl', 
           title='Anomaly Detection in Time Series', template='plotly_dark', width=900, height=500)

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14398 entries, 0 to 14397
Data columns (total 2 columns):
timestamp    14398 non-null datetime64[ns]
count        14398 non-null float64
dtypes: datetime64[ns](1), float64(1)
memory usage: 225.1 KB


In [32]:
df_sep = df[df['timestamp'].dt.month == 9]
df_sep.columns = ['ds', 'y']
df_sep.head()

Unnamed: 0,ds,y
0,2018-09-25 14:01:00,182.478
1,2018-09-25 14:02:00,176.231
2,2018-09-25 14:03:00,183.917
3,2018-09-25 14:04:00,177.798
4,2018-09-25 14:05:00,165.469


In [0]:
model = Prophet(daily_seasonality=False, yearly_seasonality=False, weekly_seasonality=False, 
                seasonality_mode='multiplicative', interval_width=0.99, changepoint_range=0.8)

model.fit(df_sep)
forecast = model.predict(df_sep)

In [34]:
forecast = forecast[['ds', 'trend', 'yhat', 'yhat_lower', 'yhat_upper']]
forecast['real'] = data['y']
forecast['anomaly'] = 1
forecast.loc[forecast['real'] > forecast['yhat_upper'], 'anomaly'] = -1
forecast.loc[forecast['real'] < forecast['yhat_lower'], 'anomaly'] = -1
forecast.head()

Unnamed: 0,ds,trend,yhat,yhat_lower,yhat_upper,real,anomaly
0,2018-09-25 14:01:00,162.55146,162.55146,116.149682,210.696118,182.478,1
1,2018-09-25 14:02:00,162.287604,162.287604,117.315447,205.661093,176.231,1
2,2018-09-25 14:03:00,162.023747,162.023747,113.994912,210.255414,183.917,1
3,2018-09-25 14:04:00,161.759891,161.759891,115.288978,215.544147,177.798,1
4,2018-09-25 14:05:00,161.496034,161.496034,110.52731,208.493254,165.469,1


In [36]:
px.scatter(forecast, x='ds', y='real', color='anomaly', color_continuous_scale='Bluyl', 
           title='Anomaly Detection in Time Series - September', template='plotly_dark', width=900, height=500)