In [368]:
def six_mark_reward(match_count):
    if match_count == 6:
        return 8 * 1000 * 1000 # Jackpot – usually a very large and variable amount
    elif match_count == 5.5:  # 5 numbers + special number
        return 40 * 1000  # Second Prize (variable, but using a typical value)
    elif match_count == 5:
        return 20 * 1000  # Third Prize (variable, but using a typical value)
    elif match_count == 4.5:  # 4 numbers + special number
        return 9600  # Fourth Prize (fixed)
    elif match_count == 4:
        return 640  # Fifth Prize (fixed)
    elif match_count == 3.5:  # 3 numbers + special number
        return 320  # Sixth Prize (fixed)
    elif match_count == 3:
        return 40  # Seventh Prize (fixed)
    else:
        return 0  # handles invalid input

### Expected Value

In [370]:
import math

def calculate_probability(n, k):
    """Calculates the combination C(n, k)."""
    return math.comb(n, k)  # More efficient than manual calculation

def calculate_expected_value():
    """Calculates the expected value of the Hong Kong Mark Six lottery."""
    total_combinations = calculate_probability(49, 6)
    expected_value = 0

    # Jackpot (6 numbers)
    prob_6 = 1 / total_combinations
    expected_value += prob_6 * six_mark_reward(6)

    # Second Prize (5 numbers + special number)
    prob_5_plus_special = calculate_probability(6, 5) * calculate_probability(1, 1) / total_combinations
    expected_value += prob_5_plus_special * six_mark_reward(5.5)

    # Third Prize (5 numbers)
    prob_5 = calculate_probability(6, 5) * calculate_probability(42, 1) / total_combinations #same as second prize
    expected_value += prob_5 * six_mark_reward(5)

    # Fourth Prize (4 numbers + special number)
    prob_4_plus_special = calculate_probability(6, 4) * calculate_probability(1, 1) / total_combinations
    expected_value += prob_4_plus_special * six_mark_reward(4.5)

    # Fifth Prize (4 numbers)
    prob_4 = calculate_probability(6, 4) * calculate_probability(42, 2) / total_combinations #same as fourth prize
    expected_value += prob_4 * six_mark_reward(4)

    # Sixth Prize (3 numbers + special number)
    prob_3_plus_special = calculate_probability(6, 3) * calculate_probability(1, 1) / total_combinations
    expected_value += prob_3_plus_special * six_mark_reward(3.5)

    # Seventh Prize (3 numbers)
    prob_3 = calculate_probability(6, 3) * calculate_probability(42, 3) / total_combinations #same as sixth prize
    expected_value += prob_3 * six_mark_reward(3)

    return expected_value

expected_value = calculate_expected_value()
print(f"Expected Value: {expected_value}")

Expected Value: 2.2082670424153177


# Simple LSTM Model

Click `Run All` button to see the result

## HYBER PARAMETER FOR LSTM MODEL HERE

In [371]:
TARGET_YEAR = 2000 # Count the year since TARGET_YEAR (please note that in 2019 there is 6 month missing data due to COVID and its dropped in this notebook)
SEQ_LENGTH = 10 # Default is 10 for LSTM model
TRAIN_SIZE = 0.9 # Default is 0.9 
LSTM_UNIT = 32 # Default is 50 // 

## HYBER PARAMETER FOR LSTM MODEL HERE

In [372]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [373]:
df = pd.read_json('data/data.json')

In [374]:
df.tail(5)

Unnamed: 0,YEAR,TIMES,N1,N2,N3,N4,N5,N6,S1
5827,2025,11.0,14.0,18.0,30.0,36.0,40.0,42.0,1.0
5828,2025,12.0,3.0,13.0,15.0,18.0,19.0,24.0,37.0
5829,2025,13.0,1.0,4.0,18.0,25.0,38.0,45.0,23.0
5830,2025,14.0,1.0,7.0,21.0,27.0,33.0,45.0,17.0
5831,2025,15.0,5.0,15.0,20.0,26.0,30.0,40.0,39.0


In [375]:
df.columns

Index(['YEAR', 'TIMES', 'N1', 'N2', 'N3', 'N4', 'N5', 'N6', 'S1'], dtype='object')

### YEAR 2021, data has some issue

In [376]:
df['YEAR'] = pd.to_numeric(df['YEAR'], errors='coerce') 
df = df[df['YEAR'] >= TARGET_YEAR]

# clean
df = df.dropna(subset=['YEAR'])

df = df[['N1', 'N2','N3', 'N4', 'N5', 'N6', 'S1']]
data = df.values

In [377]:
# Scale the data (important for LSTMs)
scaler = MinMaxScaler(feature_range=(0, 1))
data = scaler.fit_transform(data)

In [378]:
# Create sequences (e.g., use the last n draws [seq_length] to predict the next draw)
def create_sequences(data, seq_length):
    xs = []
    ys = []
    for i in range(len(data) - seq_length):
        x = data[i:(i + seq_length)]
        y = data[i + seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

In [379]:
seq_length = SEQ_LENGTH  # How many past draws to use for prediction
X, y = create_sequences(data, seq_length)

In [380]:
# Split data into training and testing sets
train_size = int(len(X) * TRAIN_SIZE)
X_train, X_test, y_train, y_test = X[:train_size], X[train_size:], y[:train_size], y[train_size:]

### Please note that there is the special num

In [381]:
special_num = [x[-1] for x in y_test]

In [382]:
# Build the LSTM model
model = Sequential()
model.add(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2]))) # 50 LSTM units
model.add(Dense(y_train.shape[1])) # Output layer with the number of lottery numbers
model.compile(loss='mse', optimizer='adam') # Use mean squared error loss

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32) # Adjust epochs and batch size

Epoch 1/50


  super().__init__(**kwargs)


[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.1129
Epoch 2/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0407
Epoch 3/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0416
Epoch 4/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0413
Epoch 5/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0400
Epoch 6/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0401
Epoch 7/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0411
Epoch 8/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0414
Epoch 9/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0414
Epoch 10/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0414
Epoch 11/50
[1m96/9

<keras.src.callbacks.history.History at 0x344f57280>

In [383]:
predictions = model.predict(X_test)

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step


In [384]:
real_predictions = np.round(scaler.inverse_transform(predictions)).astype(int)

In [385]:
y_test = np.round(scaler.inverse_transform(y_test)).astype(int)

In [386]:
result = []
for nth in range(len(y_test)):
    num_correct = 0
    pred_list = real_predictions[nth]
    ground_truth_list = y_test[nth]
    special_number = special_num[nth]
    for x in pred_list:
        if x in ground_truth_list:
            print
            if x in [special_number]:
                print('中特別號碼')
                num_correct += 0.5
            else:
                num_correct += 1
    result.append(num_correct)

## For the result, 買1組 10蚊

In [387]:
result = [float(r) for r in result]

In [388]:
print(f'buying: {len(result)} tickets')

total_spend = len(result) * 10
print(f'Spending: ${len(result) * 10}')

total_reward = sum([six_mark_reward(x) for x in result])
print(f'Total Reward: ${total_reward}')

final_earn = total_reward - total_spend
print(f'Final earn: {final_earn}')

print(f'Earn Ratio: {final_earn/(len(result) * 10) * 100:.2f}%')

buying: 341 tickets
Spending: $3410
Total Reward: $3880
Final earn: 470
Earn Ratio: 13.78%
