In [44]:
# Import AWS and Sagemaker SDKs and get files access
import boto3
import io
from sagemaker import get_execution_role

role = get_execution_role()
bucket ='sagemaker-data-energy-demand'

In [45]:
!pip install lightgbm

[33mYou are using pip version 10.0.1, however version 19.3.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [46]:
import sys
import warnings
warnings.filterwarnings('ignore')

import numpy as np 
import pandas as pd 

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
plt.style.use('fivethirtyeight')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error


# Light GBM 
import lightgbm as lgb

In [47]:
# Import data 

CITY = 'LosAngeles'
train_key = 'dataframes/%s/train/data_train.csv' % CITY
validation_key = 'dataframes/%s/test/data_validation.csv' % CITY

train_location = 's3://{}/{}'.format(bucket, train_key)
validation_location = 's3://{}/{}'.format(bucket, validation_key)

df_train = pd.read_csv(train_location, index_col='datetime')
df_validation = pd.read_csv(validation_location, index_col='datetime')

In [48]:
def data_sppliter(df, label):
    cols = list(df.columns)
    cols.remove(label)
    X = df[cols]
    y = df[label]
    return X, y

X_train, y_train = data_sppliter(df_train, label='demand(t)')
X_validation, y_validation = data_sppliter(df_validation, label='demand(t)')

In [49]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_validation_scaled = scaler.transform(X_validation)

X_train = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
X_validation = pd.DataFrame(X_validation_scaled, columns=X_validation.columns, index=X_validation.index)

## Create LightGBM Model

In [50]:
# Creating model
gbm = lgb.LGBMRegressor(num_leaves=31,
                        learning_rate=0.05,
                        n_estimators=300)

In [51]:
gbm.fit(X_train, y_train,
        eval_set=[(X_validation, y_validation)],
        eval_metric='l1',
        early_stopping_rounds=10)

print('Starting predicting...')

# predict
df_validation['LightGBM_demand_prediction'] = gbm.predict(X_validation, num_iteration=gbm.best_iteration_)

# eval
#print('The rmse of prediction is:', mean_squared_error(y_true, y_validation) ** 0.5)

[1]	valid_0's l1: 444.89	valid_0's l2: 276373
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l1: 430.213	valid_0's l2: 256164
[3]	valid_0's l1: 416.42	valid_0's l2: 238024
[4]	valid_0's l1: 402.99	valid_0's l2: 221108
[5]	valid_0's l1: 390.14	valid_0's l2: 205829
[6]	valid_0's l1: 378.952	valid_0's l2: 192489
[7]	valid_0's l1: 367.439	valid_0's l2: 179783
[8]	valid_0's l1: 356.366	valid_0's l2: 168191
[9]	valid_0's l1: 345.885	valid_0's l2: 157455
[10]	valid_0's l1: 336.271	valid_0's l2: 147752
[11]	valid_0's l1: 326.774	valid_0's l2: 138768
[12]	valid_0's l1: 317.896	valid_0's l2: 130564
[13]	valid_0's l1: 309.023	valid_0's l2: 122836
[14]	valid_0's l1: 300.894	valid_0's l2: 115995
[15]	valid_0's l1: 292.798	valid_0's l2: 109532
[16]	valid_0's l1: 285.021	valid_0's l2: 103575
[17]	valid_0's l1: 277.793	valid_0's l2: 98180.9
[18]	valid_0's l1: 270.906	valid_0's l2: 93189.8
[19]	valid_0's l1: 263.293	valid_0's l2: 88174.9
[20]	valid_0's l1: 256.828	valid_0's 

In [52]:
df_lgb = df_validation[['LightGBM_demand_prediction']].copy()

In [53]:
# save as csv file to continue in another notebook
lgb_buffer = io.StringIO()
s3_resource = boto3.resource('s3')
key = 'predict/%s/pred_lightgbm.csv' % CITY

df_lgb.to_csv(lgb_buffer, compression=None)
s3_resource.Object(bucket, key).put(Body=lgb_buffer.getvalue())

{'ResponseMetadata': {'RequestId': 'FFF2C0CC2FDD85F9',
  'HostId': 'szNADCvVMKknpfIkctgklxghJUtcA9ac+K2SM+12EaKh8amUmi1mrC3pxgWFKhDHF0XCNkitVwk=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'szNADCvVMKknpfIkctgklxghJUtcA9ac+K2SM+12EaKh8amUmi1mrC3pxgWFKhDHF0XCNkitVwk=',
   'x-amz-request-id': 'FFF2C0CC2FDD85F9',
   'date': 'Fri, 17 Jan 2020 19:55:54 GMT',
   'etag': '"7b4b9a4a08b52e65d0b2a0807250d8da"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"7b4b9a4a08b52e65d0b2a0807250d8da"'}