In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
from statsmodels.tsa.ar_model import AR
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.ar_model import AutoReg

# Load the time series data
df = pd.read_csv('shampoo.csv')

# Split the data into training and testing sets
split_index = int(len(df) * 0.8)
train_data = df[:split_index]
test_data = df[split_index:]

# Convert the data to a 2D array
train_data = train_data[['Month', 'Sales']].values
test_data = test_data[['Month', 'Sales']].values

# Convert the sales data to numeric values
train_data[:, 1] = pd.to_numeric(train_data[:, 1], errors='coerce').astype('float32')
test_data[:, 1] = pd.to_numeric(test_data[:, 1], errors='coerce').astype('float32')

# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
train_scaled = scaler.fit_transform(train_data[:, 1].reshape(-1, 1))
test_scaled = scaler.transform(test_data[:, 1].reshape(-1, 1))

# Define the input and output data for the LSTM model
X_train = []
y_train = []
for i in range(3, len(train_scaled)):
    X_train.append(train_scaled[i-3:i])
    y_train.append(train_scaled[i])
X_train, y_train = np.array(X_train), np.array(y_train)

# Build the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(3, 1)))
model.add(LSTM(units=50))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the LSTM model
model.fit(X_train, y_train, epochs=100, batch_size=32)

# Predict the first three values in the test set using the last three values in the training set
inputs = train_data[-3:]
inputs_scaled = scaler.transform(inputs[:, 1].reshape(-1, 1))
X_test = np.array([inputs_scaled])
predicted_scaled = model.predict(X_test)
predicted = scaler.inverse_transform(predicted_scaled)

# Make predictions on the rest of the testing data
X_test = []
for i in range(3, len(test_scaled)):
    X_test.append(test_scaled[i-3:i])
X_test = np.array(X_test)
predicted_scaled = model.predict(X_test)
predicted = scaler.inverse_transform(predicted_scaled)

# Train the AR model
ar_model = AutoReg(train_data[:, 1], lags=1)
ar_fit = ar_model.fit()

# Make predictions on the testing data
ar_predicted = ar_fit.predict(start=len(train_data), end=len(df)-1, dynamic=False)

# Calculate the mean squared error for both models
mse_lstm = mean_squared_error(test_data[3:, 1], predicted)
mse_ar = mean_squared_error(test_data[:, 1], ar_predicted)

# print('MSE for LSTM model: {:.4f


print('MSE for LSTM model: {:.4f}'.format(mse_lstm))
print('MSE for AR model: {:.4f}'.format(mse_ar))


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78