# Step 1: Data Loading and Preprocess the data


In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [4]:
df = pd.read_csv('Lottery_Mega_Millions_Winning_Numbers.csv')
df.head()

Unnamed: 0,Draw Date,Winning Numbers,Mega Ball,Multiplier
0,09/25/2020,20 36 37 48 67,16,2.0
1,09/29/2020,14 39 43 44 67,19,3.0
2,10/02/2020,09 38 47 49 68,25,2.0
3,10/06/2020,15 16 18 39 59,17,3.0
4,10/09/2020,05 11 25 27 64,13,2.0


In [5]:
#Parse white balls into separate columns
white = df['Winning Numbers'] \
          .str.split(' ', expand=True) \
          .astype(int) 

In [11]:
# Dynamically find the max white-ball and mega-ball values
max_white = int(white.max().max())        # e.g. 75
max_mega  = int(df['Mega Ball'].max())    # e.g. 25


In [13]:
#Build multi-hot encoding for whites
X = np.zeros((len(df), max_white), dtype=int)
for i, nums in white.iterrows():
    # subtract 1 because Python is 0-indexed
    X[i, nums.values - 1] = 1

In [14]:
#One-hot encode the Mega Ball
y_labels = df['Mega Ball'].astype(int).values - 1
y = to_categorical(y_labels, num_classes=max_mega)

# Train/test split

In [17]:
#Split into train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.15, random_state=42
)

# Build the Keras model

In [21]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

n_whites = X.shape[1]         # now 75
n_mega   = y.shape[1]         # e.g. 25

model = Sequential([
    Dense(128, activation='relu', input_shape=(n_whites,)),
    Dense(64,  activation='relu'),
    Dense(n_mega, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
model.summary()

# Train 

In [22]:
history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=30,
    batch_size=32
)

Epoch 1/30
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0230 - loss: 3.9215 - val_accuracy: 0.0146 - val_loss: 3.8482
Epoch 2/30
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 760us/step - accuracy: 0.0526 - loss: 3.7540 - val_accuracy: 0.0194 - val_loss: 3.7728
Epoch 3/30
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 708us/step - accuracy: 0.0702 - loss: 3.6816 - val_accuracy: 0.0146 - val_loss: 3.7462
Epoch 4/30
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 717us/step - accuracy: 0.0923 - loss: 3.5737 - val_accuracy: 0.0291 - val_loss: 3.7301
Epoch 5/30
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 711us/step - accuracy: 0.1042 - loss: 3.5215 - val_accuracy: 0.0437 - val_loss: 3.7238
Epoch 6/30
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 701us/step - accuracy: 0.1415 - loss: 3.4303 - val_accuracy: 0.0194 - val_loss: 3.7304
Epoch 7/30
[1m58/58[0m [32m

# Evaluate 

In [23]:
loss, acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {acc:.3f}')

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0348 - loss: 4.9050 
Test accuracy: 0.041


In [24]:
# Make a preduction

In [25]:
# take one example from test set
sample_X = X_test[0:1]
pred_probs = model.predict(sample_X)[0]
pred_label = np.argmax(pred_probs) + 1   # back to 1–25

print(f'Predicted Mega Ball: {pred_label}')
print(f'Probabilities: {pred_probs.round(3)}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Predicted Mega Ball: 36
Probabilities: [0.005 0.001 0.006 0.011 0.002 0.001 0.049 0.004 0.001 0.004 0.017 0.003
 0.085 0.004 0.001 0.003 0.017 0.024 0.093 0.021 0.017 0.01  0.002 0.007
 0.056 0.    0.01  0.039 0.011 0.017 0.025 0.002 0.002 0.001 0.009 0.103
 0.021 0.045 0.026 0.045 0.06  0.051 0.018 0.021 0.002 0.014 0.005 0.008
 0.    0.    0.    0.019]
