In [None]:
!pip3 install --upgrade --user pip lightgbm

In [None]:
# !pip install pystan==2.14
# !pip install fbprophet
# or below is better
!conda install -c plotly plotly==3.10.0 --yes
!conda install -c conda-forge fbprophet --yes

In [None]:
!wget -O timeseries_raw.csv https://sagemaker-us-west-2-169088282855.s3-us-west-2.amazonaws.com/miya/timeseries_raw.csv
!wget -O shenzhen_weather.csv https://sagemaker-us-west-2-169088282855.s3-us-west-2.amazonaws.com/miya/shenzhen_weather.csv

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('dark_background')

from sklearn import preprocessing

import json

import lightgbm as lgb

from lightgbm import plot_importance, plot_tree

try:
    # To enable interactive mode you should install ipywidgets
    # https://github.com/jupyter-widgets/ipywidgets
    from ipywidgets import interact, SelectMultiple
    INTERACTIVE = True
except ImportError:
    INTERACTIVE = False

## Data Prepare

In [None]:
df_weather = pd.read_csv('shenzhen_weather.csv', header=0, sep=',', parse_dates=[0])
df_weather.set_index(['date'], inplace=True)

df_weather

In [None]:
df = pd.read_csv('timeseries_raw.csv', header=None, sep=',', parse_dates=[0])
df.rename(columns={0:"date"}, inplace=True)
df.set_index(['date'], inplace=True)

df = df[~df.index.isnull()]

df.head()

In [None]:
df.describe()

In [None]:
# 2019-07-10 00:00:00 to 2019-10-09 23:59:59

DATETIME_START_OF_TRAIN = "2019-07-10 00:00:00"
DATETIME_END_OF_TRAIN = "2019-09-17 00:00:00"
DATETIME_START_OF_TEST = DATETIME_END_OF_TRAIN
DATETIME_END_OF_TEST = "2019-09-24 00:00:00"
DATETIME_START_OF_PREDICT = DATETIME_END_OF_TEST
DATETIME_END_OF_PREDICT = "2019-10-01 00:00:00"

freq = '1D'
prediction_length = 7
context_length = 14

cols = [24, 31, 36]

In [None]:
_ = df.loc[:, cols].plot(style='-', figsize=(15,5), title='site: 24, 31, 36')

In [None]:
df_train = df.loc[df.index <= DATETIME_START_OF_TEST, :].copy()
df_test = df.loc[(df.index >= DATETIME_START_OF_TEST) & (df.index <= DATETIME_START_OF_PREDICT), :].copy()

In [None]:
def create_features(df, df_weather, label=None):
    """
    Creates time series features from datetime index
    """
    df['date'] = df.index
    #df['hour'] = df['date'].dt.hour
    df['dayofweek'] = df['date'].dt.dayofweek
    #df['quarter'] = df['date'].dt.quarter
    df['month'] = df['date'].dt.month
    #df['year'] = df['date'].dt.year
    df['dayofyear'] = df['date'].dt.dayofyear
    df['dayofmonth'] = df['date'].dt.day
    df['weekofyear'] = df['date'].dt.weekofyear
    
    X = df[['dayofweek', 'month',]]
    #X = df[['dayofyear','dayofmonth','weekofyear']]
    
    X = X.join(df_weather, how='left')

    if label:
        y = df[label]
        return X, y
    return X

## Configuration

In [None]:
params = {
    'num_leaves': 5,
    'metric': ['l1', 'l2'],
    'verbose': -1
}

## Fit Model

In [None]:
col = 300
    
X_train, y_train = create_features(df_train, df_weather, label=col)
X_test, y_test = create_features(df_test, df_weather, label=col)

lgb_train = lgb.Dataset(X_train, y_train)
lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)

evals_result = {}  # to record eval results for plotting
gbm = lgb.train(params,
                lgb_train,
                num_boost_round=400,
                valid_sets=[lgb_train, lgb_test],
                #feature_name=['f' + str(i + 1) for i in range(X_train.shape[-1])],
                #categorical_feature=["dayofweek"],
                evals_result=evals_result,
                verbose_eval=10)

_ = plot_importance(gbm, height=0.9)

## Predict

In [None]:
df_test['prediction'] = gbm.predict(X_test)
df_all = pd.concat([df_test, df_train], sort=False)
_ = df_all[[col,'prediction']].plot(figsize=(15, 5))

## Evaluation

## Evaluate All Sites

In [None]:
for col in cols:
    X_train, y_train = create_features(df_train, df_weather, label=col)
    X_test, y_test = create_features(df_test, df_weather, label=col)
    
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)
    
    evals_result = {}  # to record eval results for plotting
    gbm = lgb.train(params,
                    lgb_train,
                    num_boost_round=400,
                    valid_sets=[lgb_train, lgb_test],
                    #feature_name=['f' + str(i + 1) for i in range(X_train.shape[-1])],
                    #categorical_feature=["dayofweek"],
                    evals_result=evals_result,
                    verbose_eval=10)
    
    _ = plot_importance(gbm, height=0.9)
    
    df_test['prediction'] = gbm.predict(X_test)
    df_all = pd.concat([df_test, df_train], sort=False)
    _ = df_all[[col,'prediction']].plot(figsize=(15, 5))