In [1]:
import pandas as pd
import numpy as np
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM, Dropout
from keras.layers import Dense

Using TensorFlow backend.


## Reading Data

In [2]:
df = pd.read_excel('PrivatizedDataforParticipants.xlsx')
new_header = df.iloc[1] #grab the first row for the header
df = df[2:] #take the data less the header row
df.columns = new_header #set the header row as the df header
df2 = df.drop(['Generic Group','Generic Brand','Generic Product Category','Generic Sub-Variable','Units'],axis=1)
df2 = df2.rename(columns={'Generic LookupKey': 'Generic_LookupKey', 'Generic Variable': 'Generic_Variable'})
df2.reset_index(inplace = True, drop = True)

df2 = df2[df2.Generic_Variable == 'Total Revenue']
df2.head()

1,Generic Product,Generic Group variable,Generic_Variable,Generic_LookupKey,2016-04-01 00:00:00,2016-05-01 00:00:00,2016-06-01 00:00:00,2016-07-01 00:00:00,2016-08-01 00:00:00,2016-09-01 00:00:00,...,2019-06-01 00:00:00,2019-07-01 00:00:00,2019-08-01 00:00:00,2019-09-01 00:00:00,2019-10-01 00:00:00,2019-11-01 00:00:00,2019-12-01 00:00:00,2020-01-01 00:00:00,2020-02-01 00:00:00,2020-03-01 00:00:00
9,Panther - Leopard - Lion,Revenue,Total Revenue,Segment 1 - Sandesh Brand 2Sandesh Brand 2Mobi...,-1.96432,-1.89887,-1.8643,-1.72079,-1.67049,-1.44222,...,0.903317,1.05569,1.08673,1.09002,x,x,x,x,x,x


## Helper Functions

In [3]:
# split a univariate sequence
def split_sequence(sequence, n_steps):
	X, y = list(), list()
	for i in range(len(sequence)):
		# find the end of this pattern
		end_ix = i + n_steps
		# check if we are beyond the sequence
		if end_ix > len(sequence)-1:
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return array(X), array(y)

def forecast_accuracy(forecast, actual):
    mape = np.mean(np.abs(forecast - actual)/np.abs(actual))  # MAPE
    me = np.mean(forecast - actual)             # ME
    mae = np.mean(np.abs(forecast - actual))    # MAE
    mpe = np.mean((forecast - actual)/actual)   # MPE
    rmse = np.mean((forecast - actual)**2)**.5  # RMSE
    return({'mape':mape, 'me':me, 'mae': mae, 
            'mpe': mpe, 'rmse':rmse})

## Total Revenue

In [4]:
df3 = df2.iloc[0:1,:]
df3.head()

1,Generic Product,Generic Group variable,Generic_Variable,Generic_LookupKey,2016-04-01 00:00:00,2016-05-01 00:00:00,2016-06-01 00:00:00,2016-07-01 00:00:00,2016-08-01 00:00:00,2016-09-01 00:00:00,...,2019-06-01 00:00:00,2019-07-01 00:00:00,2019-08-01 00:00:00,2019-09-01 00:00:00,2019-10-01 00:00:00,2019-11-01 00:00:00,2019-12-01 00:00:00,2020-01-01 00:00:00,2020-02-01 00:00:00,2020-03-01 00:00:00
9,Panther - Leopard - Lion,Revenue,Total Revenue,Segment 1 - Sandesh Brand 2Sandesh Brand 2Mobi...,-1.96432,-1.89887,-1.8643,-1.72079,-1.67049,-1.44222,...,0.903317,1.05569,1.08673,1.09002,x,x,x,x,x,x


In [5]:
df3 = df3.iloc[:, 10:46]
df3 = df3.melt(var_name="Month", value_name="Value")

In [6]:
raw_seq = np.asarray(df3.Value)
# choose a number of time steps
n_steps = 19
# split into samples
X, y = split_sequence(raw_seq, n_steps)
# reshape from [samples, timesteps] into [samples, timesteps, features]
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))
# define model
model = Sequential()
model.add(LSTM(110, activation='relu', return_sequences=True, input_shape=(n_steps, n_features)))
model.add(Dropout(0.2))
model.add(LSTM(110, activation='relu', return_sequences=True, input_shape=(n_steps, n_features)))
model.add(Dropout(0.2))
model.add(LSTM(110, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mape')
# fit model
model.fit(X, y, epochs=200, verbose=0)

<keras.callbacks.callbacks.History at 0x14516ec50>

In [12]:
fc = []
for arr in X:
    x_input = []
    for i in arr:
        x_input.append(i[0])
    x_input = np.asarray(x_input)
    x_input = x_input.reshape((1, n_steps, n_features))
    yhat = model.predict(x_input, verbose=0)
    fc.append(yhat[0][0])
actual = np.asarray(df3.Value)[n_steps:]
print(fc,actual,len(fc),len(actual))

[0.33479336, 0.35369086, 0.3783667, 0.4089336, 0.44475213, 0.4853277, 0.5300246, 0.57865, 0.6304716, 0.6850188, 0.7400075, 0.79793674, 0.85792273, 0.9178598, 0.9806008, 1.0402368, 1.1003687] [0.36631410728412644 0.3266383901143087 0.43425186407642236
 0.42452153509961077 0.4390333493552168 0.5527755555833955
 0.5201774292539691 0.6508009734609722 0.6426546769034288
 0.5364196170649654 0.9273668443224474 0.7795443989356905
 0.9187934978030785 0.9033173407388989 1.055693645451505 1.086731288919
 1.0900180177886272] 17 17


In [13]:
forecast_accuracy(fc,actual)

{'mape': 0.07802851474527313,
 'me': -0.022946476316881577,
 'mae': 0.050535074047779585,
 'mpe': -0.026147807283621902,
 'rmse': 0.07049098972288967}

In [14]:
df4 = df2.iloc[0:1,:]
for i in range(0,6):
    x_input = df4.iloc[:,i+46-n_steps:i+46].values.tolist()[0]
    x_input = np.asarray(x_input)
    x_input = x_input.reshape((1, n_steps, n_features))
    yhat = model.predict(x_input, verbose=0)
    df4.iloc[:,i+46] = yhat[0][0]
df4

1,Generic Product,Generic Group variable,Generic_Variable,Generic_LookupKey,2016-04-01 00:00:00,2016-05-01 00:00:00,2016-06-01 00:00:00,2016-07-01 00:00:00,2016-08-01 00:00:00,2016-09-01 00:00:00,...,2019-06-01 00:00:00,2019-07-01 00:00:00,2019-08-01 00:00:00,2019-09-01 00:00:00,2019-10-01 00:00:00,2019-11-01 00:00:00,2019-12-01 00:00:00,2020-01-01 00:00:00,2020-02-01 00:00:00,2020-03-01 00:00:00
9,Panther - Leopard - Lion,Revenue,Total Revenue,Segment 1 - Sandesh Brand 2Sandesh Brand 2Mobi...,-1.96432,-1.89887,-1.8643,-1.72079,-1.67049,-1.44222,...,0.903317,1.05569,1.08673,1.09002,1.168814,1.22378,1.28948,1.34959,1.41555,1.47998
