In [11]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor

df = pd.read_excel(r'powerball.xlsx')

window_length = 7

df1 = df.copy()
df.drop(['Date'], axis=1, inplace=True)

number_of_features = df.shape[1]

train = df.copy()
train.head((window_length + 1))

train_rows = train.values.shape[0]
train_samples = np.empty([train_rows - window_length, window_length * number_of_features], dtype=float)
train_labels = np.empty([train_rows - window_length, number_of_features], dtype=float)

for i in range(0, train_rows - window_length):
    train_samples[i] = train.iloc[i: i + window_length].values.flatten()
    train_labels[i] = train.iloc[i + window_length: i + window_length + 1].values

scaler = StandardScaler()
transformed_dataset = scaler.fit_transform(train.values)
scaled_train_samples = pd.DataFrame(data=transformed_dataset, index=train.index)

scaled_train_samples.head(window_length + 1)

x_train = np.empty([train_rows - window_length, window_length * number_of_features], dtype=float)
y_train = np.empty([train_rows - window_length, number_of_features], dtype=float)

for i in range(0, train_rows - window_length):
    x_train[i] = scaled_train_samples.iloc[i: i + window_length].values.flatten()
    y_train[i] = scaled_train_samples.iloc[i + window_length: i + window_length + 1].values

model = RandomForestRegressor(n_estimators=12000, random_state=1)
model.fit(x_train, y_train)

print('-' * 40)
print('Prediction vs. GroundTruth without rounding up or down')

for i in range(1, 10):
    test = df1.copy()
    test = test.tail(window_length + 10 - i)
    test = test.head(window_length + 1)
    test1 = test.drop(['Date'], axis=1)
    test1 = np.array(test1)
    x_test = scaler.transform(test1)
    x_test = x_test.flatten()[:window_length * number_of_features]  # Ajuste o tamanho dos dados de teste
    y_test_pred = model.predict(np.array([x_test]))
    y_test_true = test.drop(['Date'], axis=1).tail(1)
    print('Drawing Date', test.iloc[-1]['Date'])
    print('Prediction:\t', scaler.inverse_transform(y_test_pred).astype(int)[0] + 1)
    print('GroundTruth:\t', np.array(y_test_true)[0])
    print('-' * 40)

next_Date = '06/02/2023'

next = df.copy()
next = next.tail(window_length)
next = np.array(next)
x_next = scaler.transform(next)
x_next = x_next.flatten()[:window_length * number_of_features]  # Ajuste o tamanho dos dados de teste
y_next_pred = model.predict(np.array([x_next]))

print('-' * 40)
print('Predict the Future Drawing')
print('Drawing Date', next_Date)
print('Prediction without rounding up or down:\t', scaler.inverse_transform(y_next_pred).astype(int)[0])
print('Prediction with rounding up           :\t', scaler.inverse_transform(y_next_pred).astype(int)[0] + 1)
print('Prediction with rounding down         :\t', scaler.inverse_transform(y_next_pred).astype(int)[0] - 1)
print('-' * 40)


----------------------------------------
Prediction vs. GroundTruth without rounding up or down
Drawing Date 2023-10-05 00:00:00
Prediction:	 [28 48 57 26 34  8]
GroundTruth:	 [24 55 69 21 33  3]
----------------------------------------
Drawing Date 5/13/2023
Prediction:	 [14 42 25 28 22 13]
GroundTruth:	 [ 3 46 20 23 15 11]
----------------------------------------
Drawing Date 5/15/2023
Prediction:	 [49 14 30 48 31 21]
GroundTruth:	 [58  1 26 55 28 25]
----------------------------------------
Drawing Date 5/17/2023
Prediction:	 [46 25 36 42 35 15]
GroundTruth:	 [51 18 37 45 34 14]
----------------------------------------
Drawing Date 5/20/2023
Prediction:	 [29 52 33 37 24 21]
GroundTruth:	 [23 63 32 38 17 23]
----------------------------------------
Drawing Date 5/22/2023
Prediction:	 [37 45 44 19 56 22]
GroundTruth:	 [38 52 48  9 68 25]
----------------------------------------
Drawing Date 5/24/2023
Prediction:	 [50 41 28 45 21 22]
GroundTruth:	 [58 44 21 50 12 26]
------------------