# Библиотеки

In [None]:
%load_ext autoreload
%autoreload 2

In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from statsmodels.tsa.seasonal import seasonal_decompose

from src.metrics import evaluate
from src.models import aggregated_daily_predictions, BaselineYearAgo
from src.process_data import read_datasets, prepare_dataset
from src.plots import plot_prediction

# Загрузка данных

In [28]:
datasets = read_datasets()

In [None]:
train = datasets['source_train'].copy()
test = datasets['source_test'].copy()

In [None]:
train

In [None]:
train.info()

In [None]:
train

# EDA

In [None]:
train = train.sort_values(['date', 'time'])
test = test.sort_values(['date', 'time'])

In [None]:
train = train.reset_index().rename(columns={'index': 'id'})
train

In [None]:
train['target'].describe()

In [None]:

train = prepare_dataset(train)

In [None]:
train

In [None]:
train[train.date == '2020-09-04'][['datetime', 'target']]

In [None]:
train[train.date == '2020-09-04'][['datetime', 'target']].head(20).plot(x = "datetime", y = "target")

In [None]:
train[train.date == '2020-09-05'][['datetime', 'target']].head(20).plot(x = "datetime", y = "target")

In [None]:
fig = px.line(train.sort_values('datetime'), x='datetime', y="target", hover_data=["date", "time"])
fig.show()

In [None]:
train['date'].min(), train['date'].max()

In [None]:
test['date'].min(), test['date'].max()

In [None]:
train['time'].unique()

In [None]:
train['weather_pred'].shape

In [None]:
train['weather_pred'].value_counts()

In [None]:
train[train['weather_pred'].isna()]

In [None]:
train['weather_pred'][train['weather_pred'].fillna('').str.contains('ясн')].unique()

In [None]:
train['weather_pred'].str.contains('ясн').sum()

In [None]:
train['weather_fact'].value_counts()

In [None]:
# Из Kaggle, покомпонентое разложение, надо заставить его нормально работать

# Multiplicative Decomposition 
multiplicative_decomposition = seasonal_decompose(train['target'], model='multiplicative', period=30)

# Additive Decomposition
additive_decomposition = seasonal_decompose(train['target'], model='additive', period=30)

# Plot
plt.rcParams.update({'figure.figsize': (15,15), 'figure.dpi': 70})
multiplicative_decomposition.plot().suptitle('Multiplicative Decomposition', fontsize=16)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])

additive_decomposition.plot().suptitle('Additive Decomposition', fontsize=16)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])

plt.show()

# Baseline

## `target` ровно год назад

In [None]:
train[train.date == '2020-01-01'].head() # с id 8760 должны появиться предсказания baseline модели

In [None]:
train.shape[0]

In [None]:
test.shape[0]

In [None]:
test.shape[0] / (train.shape[0] + test.shape[0])

In [None]:
train.date.unique().shape[0]

In [None]:
test.date.unique().shape[0]

In [None]:


model = BaselineYearAgo()
model.fit(train.drop('target', axis=1), train['target'])

df = train[train.date >= '2020-12-31'].drop('target', axis=1)
df['predict'] = model.predict(df)

In [None]:
df = train[train.date >= '2020-01-01'] # с этой даты baseline модель может выдать прогноз
df['predict'] = model.predict(df.drop('target', axis=1))
train_metrics = pd.json_normalize(evaluate(df['target'], df['predict']))
train_metrics

In [None]:
train[train.date >= '2020-01-01'].date.unique().shape

In [None]:
df2 = aggregated_daily_predictions(df).reset_index()
pd.json_normalize(evaluate(df2['target'], df2['predict']))

In [None]:
plot_prediction(df, "Train: hourly")
plot_prediction(df2, "Train: daily")

In [None]:
df = prepare_dataset(test)
df['predict'] = model.predict(df.drop('target', axis=1))

In [None]:
test_metrics = pd.json_normalize(evaluate(df['target'], df['predict']))
test_metrics

In [None]:
df2 = aggregated_daily_predictions(df).reset_index()
df2

In [None]:
pd.json_normalize(evaluate(df2['target'], df2['predict']))

In [None]:
plot_prediction(df, "Test: hourly")
plot_prediction(df2, "Test: daily")