In [None]:
# 0. Initial Setup
#===============================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Bidirectional, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import os
from collections import Counter

# Mount Google Drive (if needed)
from google.colab import drive
drive.mount('/content/drive')
model_path = "/content/drive/MyDrive/loto6_model.keras"

# Loto6 data automatic retrieval
LOTO6_DATA_URL = "https://loto6.thekyo.jp/data/loto6.csv"
df = pd.read_csv(LOTO6_DATA_URL, encoding="shift-jis")

# Extract only the number columns (1st to 6th numbers)
loto_number_cols = [f"第{i}数字" for i in range(1, 7)]
df = df[loto_number_cols]
df = df.dropna(axis=1, how='any')
numbers = df.astype(int).values
print(f"Number of data acquired: {len(numbers)}")


In [None]:
# 1. Feature Engineering
#===============================
window_length = 7

# frequency (cumulative occurrence count)
frequency = np.zeros((len(df), 43))
counter = np.zeros(43)
for i, row in enumerate(numbers):
    for n in row:
        counter[n-1] += 1
    frequency[i] = counter.copy()

# gap (number of draws since last appearance)
last_seen = [-1]*43
gap = np.zeros((len(df), 43))
for i, row in enumerate(numbers):
    for n in range(1,44):
        if n in row:
            last_seen[n-1] = i
        gap[i,n-1] = i - last_seen[n-1] if last_seen[n-1] != -1 else 0

# Combine features
full_features = np.concatenate([
    numbers/43,                          # normalized main numbers
    frequency/np.max(frequency),         # frequency
    gap/np.max(gap)                      # gap
], axis=1)

# Generate sliding windows
X, y = [], []
for i in range(len(full_features)-window_length):
    X.append(full_features[i:i+window_length])
    label = np.zeros(43)
    for n in numbers[i+window_length]:
        label[n-1] = 1
    y.append(label)

X, y = np.array(X), np.array(y)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


In [None]:
# 2. Model construction and reuse
#===============================
from tensorflow.keras import regularizers

if os.path.exists(model_path):
    model = load_model(model_path)
    print("Loaded existing model.")
        # Fine-tune on the latest data with a small learning rate, then save
    model.compile(optimizer=Adam(learning_rate=1e-4), loss='mse')
    history = model.fit(
        x_train, y_train,
        epochs=50, batch_size=32,
        validation_data=(x_test, y_test),
        verbose=1
    )
    model.save(model_path)
    print("Fine-tuned existing model and saved.")
else:
    model = Sequential([
        Bidirectional(LSTM(128, return_sequences=True,
                           input_shape=(window_length, X.shape[2]),
                           kernel_regularizer=regularizers.l2(1e-4))),
        Dropout(0.3),
        Bidirectional(LSTM(128, return_sequences=True,
                           kernel_regularizer=regularizers.l2(1e-4))),
        Dropout(0.3),
        Bidirectional(LSTM(128, return_sequences=True,
                           kernel_regularizer=regularizers.l2(1e-4))),
        Dropout(0.3),
        Bidirectional(LSTM(128, return_sequences=False,
                           kernel_regularizer=regularizers.l2(1e-4))),
        Dropout(0.3),
        Dense(43, activation='linear')  # Regression output
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    history = model.fit(
        x_train, y_train,
        epochs=1500, batch_size=32,
        validation_data=(x_test, y_test),
        verbose=1
    )
    model.save(model_path)
    print("Trained and saved new model.")


In [None]:
# Plot learning curve
plt.figure(figsize=(8,4))
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.title('Learning Curve (MSE Loss)')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# 3. Monte Carlo Dropout prediction
#===============================
@tf.function
def monte_carlo_predict(model, x, T=30):
    return tf.stack([model(x, training=True)[0] for _ in range(T)])

x_future = X[-1:]
predictions = monte_carlo_predict(model, x_future, T=30).numpy()
mean_scores = predictions.mean(axis=0)
std_scores = predictions.std(axis=0)

# Frequent top 15 numbers
all_numbers = []
for pred in predictions:
    top6 = np.argsort(pred)[-6:]+1
    all_numbers.extend(top6.tolist())
top15 = [num for num,_ in Counter(all_numbers).most_common(15)]
print("Frequent TOP15:", sorted(top15))

# Representative 6-number prediction (confidence-based)
final_main_numbers = np.sort(np.argsort(mean_scores)[-6:]+1)
print("Representative prediction:", final_main_numbers)

# Monte Carlo variation candidates
unique_sets = []
for pred in predictions:
    s = tuple(np.sort(np.argsort(pred)[-6:]+1))
    if s not in unique_sets:
        unique_sets.append(s)
print("Monte Carlo variation candidates:", unique_sets[:5])

In [None]:
# Generate 3 subset tickets from Monte Carlo candidates
# Representative prediction is stored as final_main_numbers
num_subsets = 3

# Extract unique top 6-number sets from Monte Carlo output
all_candidate_sets = []
for pred in predictions:
    s = tuple(np.sort(np.argsort(pred)[-6:]+1))
    if s not in all_candidate_sets:
        all_candidate_sets.append(s)

# Exclude representative set and select top 3 subsets
subsets_for_purchase = []
for s in all_candidate_sets:
    if tuple(final_main_numbers) != s:
        subsets_for_purchase.append(s)
    if len(subsets_for_purchase) >= num_subsets:
        break

print("Representative prediction:", final_main_numbers)
print("3 subset tickets:", subsets_for_purchase)

In [None]:
# 4. Visualization
#===============================
plt.figure(figsize=(12,4))
plt.bar(np.arange(1,44), mean_scores, yerr=std_scores, capsize=3)
plt.title("Prediction score distribution (mean ± std)")
plt.xlabel("Number")
plt.ylabel("Score")
plt.show()