In [10]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor

df = pd.read_excel(r'Megamillions.xlsx')

window_length = 7

df1 = df.copy()
df.drop(['Date'], axis=1, inplace=True)

number_of_features = df.shape[1]

train = df.copy()
train.head((window_length + 1))

train_rows = train.values.shape[0]
train_samples = np.empty([train_rows - window_length, window_length * number_of_features], dtype=float)
train_labels = np.empty([train_rows - window_length, number_of_features], dtype=float)

for i in range(0, train_rows - window_length):
    train_samples[i] = train.iloc[i: i + window_length].values.flatten()
    train_labels[i] = train.iloc[i + window_length: i + window_length + 1].values

scaler = StandardScaler()
transformed_dataset = scaler.fit_transform(train.values)
scaled_train_samples = pd.DataFrame(data=transformed_dataset, index=train.index)

scaled_train_samples.head(window_length + 1)

x_train = np.empty([train_rows - window_length, window_length * number_of_features], dtype=float)
y_train = np.empty([train_rows - window_length, number_of_features], dtype=float)

for i in range(0, train_rows - window_length):
    x_train[i] = scaled_train_samples.iloc[i: i + window_length].values.flatten()
    y_train[i] = scaled_train_samples.iloc[i + window_length: i + window_length + 1].values

model = RandomForestRegressor(n_estimators=12000, random_state=1)
model.fit(x_train, y_train)

print('-' * 40)
print('Prediction vs. GroundTruth without rounding up or down')

for i in range(1, 10):
    test = df1.copy()
    test = test.tail(window_length + 10 - i)
    test = test.head(window_length + 1)
    test1 = test.drop(['Date'], axis=1)
    test1 = np.array(test1)
    x_test = scaler.transform(test1)
    x_test = x_test.flatten()[:window_length * number_of_features]  # Ajuste o tamanho dos dados de teste
    y_test_pred = model.predict(np.array([x_test]))
    y_test_true = test.drop(['Date'], axis=1).tail(1)
    print('Drawing Date', test.iloc[-1]['Date'])
    print('Prediction:\t', scaler.inverse_transform(y_test_pred).astype(int)[0] + 1)
    print('GroundTruth:\t', np.array(y_test_true)[0])
    print('-' * 40)

next_Date = '06/02/2023'

next = df.copy()
next = next.tail(window_length)
next = np.array(next)
x_next = scaler.transform(next)
x_next = x_next.flatten()[:window_length * number_of_features]  # Ajuste o tamanho dos dados de teste
y_next_pred = model.predict(np.array([x_next]))

print('-' * 40)
print('Predict the Future Drawing')
print('Drawing Date', next_Date)
print('Prediction without rounding up or down:\t', scaler.inverse_transform(y_next_pred).astype(int)[0])
print('Prediction with rounding up           :\t', scaler.inverse_transform(y_next_pred).astype(int)[0] + 1)
print('Prediction with rounding down         :\t', scaler.inverse_transform(y_next_pred).astype(int)[0] - 1)
print('-' * 40)


----------------------------------------
Prediction vs. GroundTruth without rounding up or down
Drawing Date 06/14/2002
Prediction:	 [ 6 12 31 37 45 11]
GroundTruth:	 [ 4  8 32 37 43  2]
----------------------------------------
Drawing Date 2002-11-06 00:00:00
Prediction:	 [ 7 11 16 35 45 42]
GroundTruth:	 [ 5  6  9 33 44 52]
----------------------------------------
Drawing Date 2002-07-06 00:00:00
Prediction:	 [12 21 27 32 44 18]
GroundTruth:	 [14 22 27 28 42 13]
----------------------------------------
Drawing Date 2002-04-06 00:00:00
Prediction:	 [ 6 23 29 33 48 40]
GroundTruth:	 [ 3 25 29 30 48 48]
----------------------------------------
Drawing Date 05/31/2002
Prediction:	 [12 25 39 43 50 39]
GroundTruth:	 [12 28 45 46 52 47]
----------------------------------------
Drawing Date 05/28/2002
Prediction:	 [ 8 21 25 33 38 25]
GroundTruth:	 [ 6 21 22 29 32 24]
----------------------------------------
Drawing Date 05/24/2002
Prediction:	 [ 5 10 31 42 51 32]
GroundTruth:	 [ 2  4 32 44 5