In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional, Dropout
from tensorflow.keras.optimizers import Adam

data = pd.read_excel('Megamillions.xlsx')
next_date = input("Enter the date for the next prediction (MM/DD/YYYY): ")

sequence_length = 7

data_copy = data.copy()
data.drop(['Date'], axis=1, inplace=True)

feature_count = data.shape[1]

training_data = data.copy()

training_rows = training_data.values.shape[0]
training_samples = np.empty([training_rows - sequence_length, sequence_length, feature_count], dtype=float)
training_labels = np.empty([training_rows - sequence_length, feature_count], dtype=float)
for i in range(0, training_rows - sequence_length):
    training_samples[i] = training_data.iloc[i: i + sequence_length, 0: feature_count]
    training_labels[i] = training_data.iloc[i + sequence_length: i + sequence_length + 1, 0: feature_count]

scaler = StandardScaler()
scaled_data = scaler.fit_transform(training_data.values)
scaled_training_samples = pd.DataFrame(data=scaled_data, index=training_data.index)

x_train = np.empty([training_rows - sequence_length, sequence_length, feature_count], dtype=float)
y_train = np.empty([training_rows - sequence_length, feature_count], dtype=float)

for i in range(0, training_rows - sequence_length):
    x_train[i] = scaled_training_samples.iloc[i: i + sequence_length, 0: feature_count]
    y_train[i] = scaled_training_samples.iloc[i + sequence_length: i + sequence_length + 1, 0: feature_count]

rnn_model = Sequential()
rnn_model.add(Bidirectional(LSTM(240, input_shape=(sequence_length, feature_count), return_sequences=True)))
rnn_model.add(Dropout(0.2))
rnn_model.add(Bidirectional(LSTM(240, input_shape=(sequence_length, feature_count), return_sequences=True)))
rnn_model.add(Dropout(0.2))
rnn_model.add(Bidirectional(LSTM(240, input_shape=(sequence_length, feature_count), return_sequences=True)))
rnn_model.add(Bidirectional(LSTM(240, input_shape=(sequence_length, feature_count), return_sequences=False)))
rnn_model.add(Dropout(0.2))
rnn_model.add(Dense(70))
rnn_model.add(Dense(feature_count))

rnn_model.compile(optimizer=Adam(learning_rate=0.0001), loss='mse', metrics=['accuracy'])

rnn_model.fit(x=x_train, y=y_train, batch_size=100, epochs=1200, verbose=2)

print('-' * 40)
print('Prediction vs. Ground Truth without rounding up or down')
for i in range(1, 10):
    test_data = data_copy.copy()
    test_data = test_data.tail((sequence_length + 10 - i))
    test_data = test_data.head((sequence_length + 1))
    test_date = data_copy.iloc[(test_data.tail().index[-1])]['Date']
    test_input = test_data.head((sequence_length))
    test_input.drop(['Date'], axis=1, inplace=True)
    test_input = np.array(test_input)
    x_test = scaler.transform(test_input)
    y_test_pred = rnn_model.predict(np.array([x_test]))
    test_data.drop(['Date'], axis=1, inplace=True)
    y_test_true = test_data.tail(1)
    print('Drawing Date:', test_date)
    print('Prediction:\t', scaler.inverse_transform(y_test_pred).astype(int)[0])
    print('Ground Truth:\t', np.array(y_test_true)[0])
    print('-' * 40)

print('-' * 40)
print('Prediction vs. Ground Truth with rounding up')
for i in range(1, 10):
    test_data = data_copy.copy()
    test_data = test_data.tail((sequence_length + 10 - i))
    test_data = test_data.head((sequence_length + 1))
    test_date = data_copy.iloc[(test_data.tail().index[-1])]['Date']
    test_input = test_data.head((sequence_length))
    test_input.drop(['Date'], axis=1, inplace=True)
    test_input = np.array(test_input)
    x_test = scaler.transform(test_input)
    y_test_pred = rnn_model.predict(np.array([x_test]))
    test_data.drop(['Date'], axis=1, inplace=True)
    y_test_true = test_data.tail(1)
    print('Drawing Date:', test_date)
    print('Prediction:\t', scaler.inverse_transform(y_test_pred).astype(int)[0] + 1)
    print('Ground Truth:\t', np.array(y_test_true)[0])
    print('-' * 40)



print('-' * 40)
print('Predict the Future Drawing')
future_data = data.copy()
future_data = future_data.tail((sequence_length))
future_data = np.array(future_data)
x_future = scaler.transform(future_data)
y_future_pred = rnn_model.predict(np.array([x_future]))
print('Drawing Date:', next_date)
print('Prediction without rounding up or down:\t', scaler.inverse_transform(y_future_pred).astype(int)[0])
print('Prediction with rounding up           :\t', scaler.inverse_transform(y_future_pred).astype(int)[0] + 1)
print('Prediction with rounding down         :\t', scaler.inverse_transform(y_future_pred).astype(int)[0] - 1)
print('-' * 40)

Epoch 1/1200
22/22 - 29s - loss: 0.9147 - accuracy: 0.3728 - 29s/epoch - 1s/step
Epoch 2/1200
22/22 - 11s - loss: 0.8658 - accuracy: 0.3984 - 11s/epoch - 480ms/step
Epoch 3/1200
22/22 - 11s - loss: 0.8544 - accuracy: 0.3998 - 11s/epoch - 519ms/step
Epoch 4/1200
22/22 - 11s - loss: 0.8543 - accuracy: 0.3962 - 11s/epoch - 512ms/step
Epoch 5/1200
22/22 - 11s - loss: 0.8509 - accuracy: 0.4003 - 11s/epoch - 514ms/step
Epoch 6/1200
22/22 - 11s - loss: 0.8518 - accuracy: 0.4003 - 11s/epoch - 513ms/step
Epoch 7/1200
22/22 - 11s - loss: 0.8487 - accuracy: 0.3975 - 11s/epoch - 514ms/step
Epoch 8/1200
22/22 - 11s - loss: 0.8486 - accuracy: 0.3984 - 11s/epoch - 513ms/step
Epoch 9/1200
22/22 - 11s - loss: 0.8480 - accuracy: 0.3984 - 11s/epoch - 511ms/step
Epoch 10/1200
22/22 - 10s - loss: 0.8463 - accuracy: 0.4012 - 10s/epoch - 462ms/step
Epoch 11/1200
22/22 - 11s - loss: 0.8479 - accuracy: 0.3998 - 11s/epoch - 510ms/step
Epoch 12/1200
22/22 - 11s - loss: 0.8455 - accuracy: 0.3971 - 11s/epoch - 520

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)


Drawing Date: 06/14/2002
Prediction:	 [ 4  8 32 36 43  1]
Ground Truth:	 [ 4  8 32 37 43  2]
----------------------------------------
Drawing Date: 2002-11-06 00:00:00
Prediction:	 [ 4  5  8 32 43 51]
Ground Truth:	 [ 5  6  9 33 44 52]
----------------------------------------
Drawing Date: 2002-07-06 00:00:00
Prediction:	 [14 21 26 27 41 12]
Ground Truth:	 [14 22 27 28 42 13]
----------------------------------------


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)


Drawing Date: 2002-04-06 00:00:00
Prediction:	 [ 3 24 29 30 48 48]
Ground Truth:	 [ 3 25 29 30 48 48]
----------------------------------------
Drawing Date: 05/31/2002
Prediction:	 [11 28 45 46 52 46]
Ground Truth:	 [12 28 45 46 52 47]
----------------------------------------
Drawing Date: 05/28/2002
Prediction:	 [ 5 20 21 28 31 23]
Ground Truth:	 [ 6 21 22 29 32 24]
----------------------------------------


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)


Drawing Date: 05/24/2002
Prediction:	 [ 1  3 31 43 52 34]
Ground Truth:	 [ 2  4 32 44 52 36]
----------------------------------------
Drawing Date: 05/21/2002
Prediction:	 [ 3 28 39 40 44  9]
Ground Truth:	 [ 4 28 39 41 44  9]
----------------------------------------
Drawing Date: 05/17/2002
Prediction:	 [15 17 24 32 47 30]
Ground Truth:	 [15 18 25 33 47 30]
----------------------------------------
----------------------------------------
Prediction vs. Ground Truth with rounding up


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)


Drawing Date: 06/14/2002
Prediction:	 [ 5  9 33 37 44  2]
Ground Truth:	 [ 4  8 32 37 43  2]
----------------------------------------
Drawing Date: 2002-11-06 00:00:00
Prediction:	 [ 5  6  9 33 44 52]
Ground Truth:	 [ 5  6  9 33 44 52]
----------------------------------------
Drawing Date: 2002-07-06 00:00:00
Prediction:	 [15 22 27 28 42 13]
Ground Truth:	 [14 22 27 28 42 13]
----------------------------------------


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)


Drawing Date: 2002-04-06 00:00:00
Prediction:	 [ 4 25 30 31 49 49]
Ground Truth:	 [ 3 25 29 30 48 48]
----------------------------------------
Drawing Date: 05/31/2002
Prediction:	 [12 29 46 47 53 47]
Ground Truth:	 [12 28 45 46 52 47]
----------------------------------------
Drawing Date: 05/28/2002
Prediction:	 [ 6 21 22 29 32 24]
Ground Truth:	 [ 6 21 22 29 32 24]
----------------------------------------


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)


Drawing Date: 05/24/2002
Prediction:	 [ 2  4 32 44 53 35]
Ground Truth:	 [ 2  4 32 44 52 36]
----------------------------------------
Drawing Date: 05/21/2002
Prediction:	 [ 4 29 40 41 45 10]
Ground Truth:	 [ 4 28 39 41 44  9]
----------------------------------------
Drawing Date: 05/17/2002
Prediction:	 [16 18 25 33 48 31]
Ground Truth:	 [15 18 25 33 47 30]
----------------------------------------
----------------------------------------
Predict the Future Drawing


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_input.drop(['Date'], axis=1, inplace=True)


Drawing Date: 06/02/2023
Prediction without rounding up or down:	 [13 20 24 42 48 19]
Prediction with rounding up           :	 [14 21 25 43 49 20]
Prediction with rounding down         :	 [12 19 23 41 47 18]
----------------------------------------
