In [1]:
# Data processing
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler

from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard

In [2]:
dfCCO = pd.read_csv(r".\Data\processed\CCO_processed.csv", index_col="invoice_month")
dfPYCO = pd.read_csv(r".\Data\processed\PYCO_processed.csv", index_col="invoice_month")
dfXCO = pd.read_csv(r".\Data\processed\XCO_processed.csv", index_col="invoice_month")

In [59]:
def build_model (trainX, trainY, epoch, bs):
    
    
    model = Sequential()
    model.add(LSTM(8, activation='tanh', input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=True))
    model.add(LSTM(4, activation='tanh', return_sequences=False))
#     model.add(Dropout(0.2))
    model.add(Dense(trainY.shape[1]))

    model.compile(optimizer='adam', loss='mse')
    
    es = EarlyStopping(monitor='val_loss', min_delta=1e-10, patience=20)
    rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15)
    mcp = ModelCheckpoint(filepath='weights.h5', monitor='val_loss', save_best_only=True, save_weights_only=True)
    
    model.fit(trainX, trainY, shuffle=True, epochs= epoch, verbose=0, validation_split=0.2, batch_size= bs)

    
    return model

In [60]:
def forecast_costs(df: pd.DataFrame) -> float:
    scaler = StandardScaler()
    df_scaled = pd.DataFrame(scaler.fit_transform(df.fillna(0)), columns=df.columns)
    df_shift = df_scaled.shift(1)
    y = df_scaled.total_cost.shift(-1).head(-1).tail(-1).values[..., None]
    df_array = df_scaled.tail(-1).head(-1).fillna(0).values
    df_shift_array = df_shift.tail(-1).head(-1).fillna(0).values
    # Forecaster is the last two months turned into a tensor so that we can create a final forecast in the end.
    forecaster = np.concatenate([df_scaled.tail(1).values[..., None], df_shift.tail(1).values[..., None]], axis=2)
    X = np.concatenate([df_array[..., None], df_shift_array[..., None]], axis=2)
    model = build_model(X, y, 200, 1)
    forecast = model.predict(forecaster)
    return scaler.inverse_transform(np.resize(forecast, df.shape))[0, 0]

In [39]:
PYCO_up = dfPYCO.iloc[8:13]
PYCO_up

Unnamed: 0_level_0,total_cost,Elastic Load Balancing cost,Elastic Load Balancing usage,Amazon Elastic Compute Cloud cost,Amazon Elastic Compute Cloud usage,Amazon Simple Storage Service cost,Amazon Simple Storage Service usage,AWS Key Management Service cost,AWS Key Management Service usage,Amazon Cognito cost,...,Amazon DocumentDB (with MongoDB compatibility) cost,Amazon DocumentDB (with MongoDB compatibility) usage,Amazon Virtual Private Cloud cost,Amazon Virtual Private Cloud usage,AWS Step Functions cost,AWS Step Functions usage,Amazon Registrar cost,Amazon Registrar usage,Amazon Glacier cost,Amazon Glacier usage
invoice_month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-05-01,6489.349368,,,4584.858139,80250.791519,235.900572,26958430.0,0.043525,34510.0,0.0,...,,,,,,,,,,
2020-06-01,7482.256827,,,5136.041689,84290.524854,268.039829,27242910.0,0.062792,40932.0,0.0,...,,,,,,,,,,
2020-07-01,7707.567863,,,5797.07225,87149.629996,287.486184,29896410.0,0.03485,31618.0,0.0,...,,,,,,,,,,
2020-08-01,7731.745608,133.13805,5741.158456,5908.525521,82895.455643,297.854546,25094560.0,0.0,19362.0,,...,,,,,,,,,,
2020-09-01,8486.25509,205.604257,8668.786392,6561.398514,76923.357193,301.621427,22490130.0,0.0,6118.0,0.0,...,,,,,,,,,,


In [40]:
PYCO_down = dfPYCO.iloc[:5]
PYCO_down

Unnamed: 0_level_0,total_cost,Elastic Load Balancing cost,Elastic Load Balancing usage,Amazon Elastic Compute Cloud cost,Amazon Elastic Compute Cloud usage,Amazon Simple Storage Service cost,Amazon Simple Storage Service usage,AWS Key Management Service cost,AWS Key Management Service usage,Amazon Cognito cost,...,Amazon DocumentDB (with MongoDB compatibility) cost,Amazon DocumentDB (with MongoDB compatibility) usage,Amazon Virtual Private Cloud cost,Amazon Virtual Private Cloud usage,AWS Step Functions cost,AWS Step Functions usage,Amazon Registrar cost,Amazon Registrar usage,Amazon Glacier cost,Amazon Glacier usage
invoice_month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-09-01,11868.213465,,,9759.034174,77207.321219,64.66387,12378150.0,0.0,426.0,,...,,,,,,,,,,
2019-10-01,10742.465921,,,7612.544765,95662.106033,55.099083,17904210.0,0.0,682.0,,...,,,,,,,,,,
2019-11-01,10572.9236,,,6631.922277,76201.274526,81.014411,15400550.0,0.0,777.0,,...,,,,,,,,,,
2019-12-01,8721.96947,,,5609.078014,74923.884233,100.358831,20552910.0,0.0,1063.0,,...,,,,,,,,,,
2020-01-01,8752.106507,,,5360.513077,86255.990066,168.925449,38883060.0,0.092671,50891.0,,...,,,,,,,,,,


In [41]:
PYCO_mixed = dfPYCO.iloc[5:10]
PYCO_mixed

Unnamed: 0_level_0,total_cost,Elastic Load Balancing cost,Elastic Load Balancing usage,Amazon Elastic Compute Cloud cost,Amazon Elastic Compute Cloud usage,Amazon Simple Storage Service cost,Amazon Simple Storage Service usage,AWS Key Management Service cost,AWS Key Management Service usage,Amazon Cognito cost,...,Amazon DocumentDB (with MongoDB compatibility) cost,Amazon DocumentDB (with MongoDB compatibility) usage,Amazon Virtual Private Cloud cost,Amazon Virtual Private Cloud usage,AWS Step Functions cost,AWS Step Functions usage,Amazon Registrar cost,Amazon Registrar usage,Amazon Glacier cost,Amazon Glacier usage
invoice_month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-01,7094.260716,,,4695.577926,72648.22,188.909749,43590420.0,0.074398,44800.0,,...,,,,,,,,,,
2020-03-01,8728.033331,,,5307.14164,1296659.0,226.842485,49955200.0,0.086564,48855.0,,...,,,,,,,,,,
2020-04-01,7154.925511,,,4636.989642,79771.51,227.142632,32545860.0,0.030285,30096.0,,...,,,,,,,,,,
2020-05-01,6489.349368,,,4584.858139,80250.79,235.900572,26958430.0,0.043525,34510.0,0.0,...,,,,,,,,,,
2020-06-01,7482.256827,,,5136.041689,84290.52,268.039829,27242910.0,0.062792,40932.0,0.0,...,,,,,,,,,,


In [61]:
forecast_costs(PYCO_up)



7757.106

In [62]:
forecast_costs(PYCO_down)



10334.688

In [63]:
forecast_costs(PYCO_mixed)



6502.3403