In [13]:
# Import AWS and Sagemaker SDKs and get files access
import boto3
import io
from sagemaker import get_execution_role

role = get_execution_role()
bucket ='sagemaker-data-energy-demand'

In [14]:
!pip install lightgbm

[33mYou are using pip version 10.0.1, however version 19.3.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [15]:
import sys
import warnings
warnings.filterwarnings('ignore')

import numpy as np 
import pandas as pd 

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
plt.style.use('fivethirtyeight')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error


# Light GBM 
import lightgbm as lgb

In [16]:
# Import data 

CITY = 'LosAngeles'
train_key = 'dataframes/%s/train/data_train.csv' % CITY
validation_key = 'dataframes/%s/test/data_validation.csv' % CITY

train_location = 's3://{}/{}'.format(bucket, train_key)
validation_location = 's3://{}/{}'.format(bucket, validation_key)

df_train = pd.read_csv(train_location, index_col='datetime')
df_validation = pd.read_csv(validation_location, index_col='datetime')

In [17]:
def data_sppliter(df, label):
    cols = list(df.columns)
    cols.remove(label)
    X = df[cols]
    y = df[label]
    return X, y

X_train, y_train = data_sppliter(df_train, label='demand(t)')
X_validation, y_validation = data_sppliter(df_validation, label='demand(t)')

In [18]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_validation_scaled = scaler.transform(X_validation)

X_train = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
X_validation = pd.DataFrame(X_validation_scaled, columns=X_validation.columns, index=X_validation.index)

## Create LightGBM Model

In [19]:
# Creating model
gbm = lgb.LGBMRegressor(num_leaves=31,
                        learning_rate=0.05,
                        n_estimators=100)

In [10]:
gbm.fit(X_train, y_train,
        eval_set=[(X_validation, y_validation)],
        eval_metric='l1',
        early_stopping_rounds=10)

print('Starting predicting...')

# predict
df_validation['LightGBM_demand_prediction'] = gbm.predict(X_validation, num_iteration=gbm.best_iteration_)

# eval
#print('The rmse of prediction is:', mean_squared_error(y_true, y_validation) ** 0.5)

[1]	valid_0's l1: 522.937	valid_0's l2: 432070
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l1: 498.457	valid_0's l2: 392748
[3]	valid_0's l1: 475.377	valid_0's l2: 357274
[4]	valid_0's l1: 453.271	valid_0's l2: 325097
[5]	valid_0's l1: 432.506	valid_0's l2: 296195
[6]	valid_0's l1: 412.733	valid_0's l2: 270044
[7]	valid_0's l1: 393.916	valid_0's l2: 246252
[8]	valid_0's l1: 376.236	valid_0's l2: 224861
[9]	valid_0's l1: 359.475	valid_0's l2: 205500
[10]	valid_0's l1: 343.533	valid_0's l2: 187790
[11]	valid_0's l1: 328.597	valid_0's l2: 171950
[12]	valid_0's l1: 314.341	valid_0's l2: 157493
[13]	valid_0's l1: 300.845	valid_0's l2: 144434
[14]	valid_0's l1: 288.119	valid_0's l2: 132611
[15]	valid_0's l1: 276.28	valid_0's l2: 122053
[16]	valid_0's l1: 264.989	valid_0's l2: 112394
[17]	valid_0's l1: 254.17	valid_0's l2: 103534
[18]	valid_0's l1: 244.17	valid_0's l2: 95667.1
[19]	valid_0's l1: 234.648	valid_0's l2: 88371.6
[20]	valid_0's l1: 225.768	valid_0's 

In [11]:
df_lgb = df_validation[['LightGBM_demand_prediction']].copy()

In [12]:
# save as csv file to continue in another notebook
lgb_buffer = io.StringIO()
s3_resource = boto3.resource('s3')
key = 'predict/%s/pred_lightgbm.csv' % CITY

df_lgb.to_csv(lgb_buffer, compression=None)
s3_resource.Object(bucket, key).put(Body=lgb_buffer.getvalue())

{'ResponseMetadata': {'RequestId': '8D3603E3CEA02F1C',
  'HostId': 'DaUU2WGSPgepLISg8BRWdZBvEdEFGg7+24lGnEPMtTXKp4InXOasoDuRW8EUFjzI7wFGZ5l9slA=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'DaUU2WGSPgepLISg8BRWdZBvEdEFGg7+24lGnEPMtTXKp4InXOasoDuRW8EUFjzI7wFGZ5l9slA=',
   'x-amz-request-id': '8D3603E3CEA02F1C',
   'date': 'Thu, 16 Jan 2020 17:38:05 GMT',
   'etag': '"3c9a70b7f18148aa22c08c40ad0b1052"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"3c9a70b7f18148aa22c08c40ad0b1052"'}