In [None]:
import boto3
import io
from sagemaker import get_execution_role

role = get_execution_role()
bucket ='sagemaker-data-energy-demand'

In [None]:
import numpy as np 
import pandas as pd 
from datetime import datetime, timedelta

import matplotlib.pyplot as plt

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

import warnings
warnings.filterwarnings('ignore')

In [None]:
CITY = 'LosAngeles'
df_file = 'dataframes/%s_reframed.csv' % CITY

df_location = 's3://{}/{}'.format(bucket, df_file)
df = pd.read_csv(df_location, index_col ='datetime')
df.head()

In [None]:
plt.plot(df.loc[(df['year(t)'] == 2019) & (df['month(t)'] == 11)]['demand(t)'],label='201911')
plt.plot(df.loc[(df['year(t)'] == 2019) & (df['month(t)'] == 12)]['demand(t)'],label='201912')
plt.xticks(fontsize=14, rotation=45)
plt.xlabel('Date')
plt.ylabel('Demand (M)')
plt.title('Latest two months of data')
plt.legend()
plt.show()

In [None]:
plt.boxplot([df['demand(t)']], labels=['demand(t)'])
plt.title('Box Plot - Demand(t)')
plt.ylabel('Target')
plt.grid(True)

In [None]:
#split_date = '2019-12-01 01:00:00+00:00'
lastrow_date = df.tail(1).index.values[0]
               
datetime_lastrow = datetime.strptime(lastrow_date[:-6], '%Y-%m-%d %H:%M:%S')
split_test_date = datetime_lastrow - timedelta(hours=71)
split_test_date_str = str(split_test_date)
split_validation_date = split_test_date - timedelta(days=30)
split_validation_date_str = str(split_validation_date)


df_train = df.loc[df.index <= split_validation_date_str].copy()
df_validation = df.loc[(df.index > split_validation_date_str) & (df.index <= split_test_date_str) ].copy()
df_test = df.loc[df.index > split_test_date_str].copy()

print(df_train.shape)
print(df_validation.shape)
print(df_test.shape)

In [None]:
print((df_train.shape[0] + df_validation.shape[0] + df_test.shape[0]) == df.shape[0])

In [None]:
# save as csv file to continue in another notebook
s3_resource = boto3.resource('s3')

train_buffer = io.StringIO()
train_key = 'dataframes/%s_reframed_train.csv' % CITY
df_train.to_csv(train_buffer, compression=None)

validation_buffer = io.StringIO()
validation_key = 'dataframes/%s_reframed_validation.csv' % CITY
df_validation.to_csv(validation_buffer, compression=None)

test_buffer = io.StringIO()
test_key = 'dataframes/%s_reframed_test.csv' % CITY
df_test.to_csv(test_buffer, compression=None)


s3_resource.Object(bucket, train_key).put(Body=train_buffer.getvalue())
s3_resource.Object(bucket, validation_key).put(Body=validation_buffer.getvalue())
s3_resource.Object(bucket, test_key).put(Body=test_buffer.getvalue())