In [None]:
#!pip install pandas
!pip install scikit-learn
!pip install tensorflow-cpu
!pip install numpy pandas keras scikit-learn matplotlib scikeras

In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import warnings
from datetime import datetime
from contextlib import closing


# 불필요한 경고 메시지를 숨깁니다.
warnings.filterwarnings('ignore')

print(f'completed to load library. [{datetime.now()}]')

completed to load library. [2025-06-12 23:20:53.496453]


In [2]:
def load_data_by_db(db_file_path, last_round, length, reverse=False):
    import sqlite3
    from contextlib import closing
    with closing(sqlite3.connect(db_file_path)) as conn:
        with closing(conn.cursor()) as cur:
            results = []
            first_bonus = 0
            query = f'select * from results where round>={last_round-length} and round <={last_round} '
            query += "order by round desc" if reverse else "order by round asc"
            datas = cur.execute(query).fetchall()
            for data in datas:
                if first_bonus == 0:
                    first_bonus = data[2]
                results.append((data[1], [int(i) for i in data[2].split(',')], data[3]))
            return results, first_bonus


def create_randomforest_db(db_file_path, verbose=1):
    columns = ["id INTEGER PRIMARY KEY AUTOINCREMENT",
               "n_estimator INTEGER",
               "data_length INTEGER",
               "random_state INTEGER",
               "rounds TEXT",
               "matched_cnts TEXT",
               "sum_val INTEGER",
               "version TEXT"]
    # (n_estimator, data_length, random_state, predict_lens[n_estimator][data_length][random_state], sum_val)
    query = f'CREATE TABLE IF NOT EXISTS rndforest ({",".join(columns)})'
    if verbose > 0:
        print('query=', query)
    import sqlite3
    from contextlib import closing
    with closing(sqlite3.connect(db_file_path)) as conn:
        conn.execute(query)


"""
(25, 400, 2400, [[1176, 1175, 1174, 1173], [1, 1, 3, 2]], 7)
"""
def insert_randomforest_db(db_file_path, version, db_datas, auto_commit=True, verbose=0):
    """ insert_randomforest_db """
    import sqlite3
    from contextlib import closing
    from datetime import datetime
    if version == 0:
        version = str(datetime.now().timestamp())
    with closing(sqlite3.connect(db_file_path)) as conn:
        col_dicts = {"n_estimator": db_datas[0],
                     "data_length": db_datas[1],
                     "random_state": db_datas[2],
                     "rounds": ",".join([str(i) for i in db_datas[3][0]]),
                     "matched_cnts": ",".join([str(i) for i in db_datas[3][1]]),
                     "sum_val": db_datas[4],
                     "version": version
                     }
        columns = col_dicts.keys()
        values=list(col_dicts.values())
        column_val=["?" for i in range(len(columns))]
        query = f"INSERT INTO rndforest ({','.join(columns)}) "\
                f"VALUES ({','.join(column_val)})"
        with closing(conn.cursor()) as cursor:
            if verbose > 0:
                print(f'query={query}')
                print('-'*30)
            cursor.execute(query, values)
        if auto_commit:
            conn.commit()


In [3]:
created_db = True
if created_db == False:
    create_randomforest_db('../db/metrics.db')

In [25]:
def predict(train_X, n_estimators=100, random_state=350, trial=5, verbose=0):
    # 데이터를 Pandas DataFrame으로 변환합니다.
    df = pd.DataFrame(train_X, columns=['회차', '번호1', '번호2', '번호3', '번호4', '번호5', '번호6'])
    df = df.sort_values(by='회차').reset_index(drop=True)

    if verbose > 0:
        print("--- 로또 당첨 번호 이력 ---")
        print(df)

    # 기계 학습을 위한 데이터 준비
    # 각 회차의 당첨 번호(X)와 바로 다음 회차의 당첨 번호(y)를 사용합니다.
    X = df[['번호1', '번호2', '번호3', '번호4', '번호5', '번호6']]
    y = df[['번호1', '번호2', '번호3', '번호4', '번호5', '번호6']].shift(-1)

    # 마지막 행은 다음 회차 데이터가 없으므로 제거합니다.
    X = X[:-1]
    y = y.dropna()
    
    if verbose > 0:
        print("\n--- 훈련 데이터 (X) ---")
        print(X.head())
        print("\n--- 정답 데이터 (y) ---")
        print(y.head())

    # 머신러닝 모델 선택 및 훈련
    # 랜덤 포레스트 회귀 모델을 사용합니다.
    # the random_state parameter is used to control the randomness of the algorithm, ensuring reproducibility of results. 
    # the n_estimators parameter specifies the number of decision trees in the forest. 
    model = RandomForestRegressor(n_estimators=n_estimators, random_state=random_state) # n_estimators: 만들 트리의 개수
    model.fit(X, y)

    # 예측할 회차의 이전 회차 데이터 (1173회차)
    last_draw = df.iloc[-1][['번호1', '번호2', '번호3', '번호4', '번호5', '번호6']].values.reshape(1, -1)

    if verbose > 0:
        print(f"\n--- {df.iloc[-1]['회차']}회차 데이터로 다음 회차 예측 ---")
        print(last_draw)

    # 다음 번호 예측
    predicted_numbers_set = []
    for i in range(trial):
        predicted_numbers_float = model.predict(last_draw)
        # 예측된 번호 처리
        # 1. 소수점을 반올림하여 정수로 만듭니다.
        # 2. 1~45 사이의 값으로 보정합니다.
        # 3. 중복된 번호를 제거하고 6개를 선택합니다.
        predicted_numbers = set()
        for num in predicted_numbers_float[0]:
            # 반올림하여 정수로 변환
            int_num = int(round(num))
            # 1보다 작으면 1로, 45보다 크면 45로 보정
            if int_num < 1:
                int_num = 1
            elif int_num > 45:
                int_num = 45
            predicted_numbers.add(int_num)
        # 중복 제거 후 6개가 안되면, 부족한 만큼 다른 번호로 채웁니다.
        # (여기서는 가장 빈도가 높은 번호들 중 예측되지 않은 번호를 추가하는 방식을 사용)
        if len(predicted_numbers) < 6:
            all_numbers = df[['번호1', '번호2', '번호3', '번호4', '번호5', '번호6']].values.flatten()
            counts = pd.Series(all_numbers).value_counts()
    
            extra_needed = 6 - len(predicted_numbers)
            for num in counts.index:
                if extra_needed == 0:
                    break
                if num not in predicted_numbers:
                    predicted_numbers.add(num)
                    extra_needed -= 1
        # 최종 예측 번호를 정렬하여 출력
        final_prediction = sorted(list(predicted_numbers))[:6]
        predicted_numbers_set.append(final_prediction)
    return predicted_numbers_set


def test_prediction(round_nums,
                    actual_numbers,
                    bonus,
                    predicted_numbers_set,
                    n_estimators=500,
                    random_state=500,
                    only_one=True,
                    verbose=0):
    if verbose>0:
        print("======================================")
        print(f"\n--- 참고: {round_nums}회 실제 번호 ---")
        print(f"번호: {actual_numbers}, 보너스: {bonus}")

    matched_len = 0

    for final_prediction in predicted_numbers_set:
        # 예측 결과와 실제 결과 비교
        matching_numbers = set(final_prediction).intersection(set(actual_numbers))
        if verbose > 0:
            print(f"\n--- 예측과 실제 결과 비교 ---")
            print(f"일치하는 번호: {sorted(list(matching_numbers))}")
            print(f"일치 개수: {len(matching_numbers)}")
            print(F"예측 번호: {final_prediction}")
            print(f"n_estimators: {n_estimators}")
            print(f"random_state: {random_state}")
        matched_len = len(matching_numbers)
        if only_one:
            break
    return matched_len


def list_to_dict(item_lists):
    dicts = {}
    for item in item_lists:
        dicts[item[0]] = (item[1], item[2])
    return dicts


print(f'completed to define. #1 [{datetime.now()}]')

completed to define. #1 [2025-06-13 00:51:55.787233]


In [5]:
verbose = 1

# 실제 번호
actual_numbers_or, _ = load_data_by_db(db_file_path='../db/metrics.db',
                                 last_round=1176,
                                 length=10, reverse=True)
actual_numbers = [actual_sets[1] for actual_sets in actual_numbers_or]

if verbose > 0:
    print('actual_numbers', type(actual_numbers), actual_numbers)

data_list_cnt = 3
data_length = 25
results_set=[]

print(f'completed to set env. [{datetime.now()}]')

actual_numbers <class 'list'> [[1, 2, 3, 4, 5, 6], [3, 4, 6, 8, 32, 42], [8, 11, 14, 17, 36, 39], [1, 5, 18, 20, 30, 35], [7, 9, 24, 40, 42, 44], [3, 6, 7, 11, 12, 17], [3, 13, 28, 34, 38, 42], [5, 12, 24, 26, 39, 42], [9, 21, 24, 30, 33, 37], [8, 23, 31, 35, 39, 40], [14, 23, 25, 27, 29, 42]]
completed to set env. [2025-06-12 23:21:07.943367]


**Notice**
- the random_state parameter is used to control the randomness of the algorithm, ensuring reproducibility of results. 
- the n_estimators parameter specifies the number of decision trees in the forest. This parameter is crucial as it directly impacts the model's performance and computational cost.

In [26]:
def predit_and_test_parts(round, bonus, h_data, n_estimators, random_state, trial=5, verbose=0):
    """ predit_and_test_parts """
    train_X = []
    test_X = []
    for i in range(len(h_data) - 1):
        train_X.append(h_data[i][1])
    test_X = h_data[len(h_data) - 1][1]
    predicted_numbers_set = predict(train_X=train_X,
                                    n_estimators=n_estimators,
                                    random_state=random_state,
                                    trial=trial,
                                    verbose=verbose)
    predict_len1=test_prediction(round,
                                 test_X,
                                 bonus,
                                 predicted_numbers_set,
                                 n_estimators,
                                 random_state,
                                 verbose)
    return predicted_numbers_set, predict_len1


def predict_and_test(round, bonus, h_data, n_estimators, random_state, trial, verbose=0):
    """ predict_and_test """
    _, predict_len = predit_and_test_parts(round, bonus, h_data, n_estimators, random_state, trial, 0)
    return predict_len


print(f'completed to define. #2 [{datetime.now()}]')

completed to define. #2 [2025-06-13 00:51:58.633636]


In [7]:
results_set = []

print(f'completed to reset results_set. #2 [{datetime.now()}]')

completed to reset results_set. #2 [2025-06-12 23:21:12.981096]


In [27]:
def main_predict(n_estimator,
                 last_round,
                 data_length,
                 random_state,
                 db_file_path,
                 trial,
                 verbose=0):
    """ main_predict """
    predict_lens = {}
    hist_data, first_bonus = load_data_by_db(db_file_path=db_file_path, last_round=last_round, length=data_length)
    for train_x in hist_data:
        train_x[1].insert(0, train_x[0])
    predicted_numbers_set, _ = predit_and_test_parts(round=last_round,
                                                     bonus=first_bonus,
                                                     h_data=hist_data,
                                                     n_estimators=n_estimator,
                                                     random_state=random_state,
                                                     trial=trial,
                                                     verbose=verbose)
    return predicted_numbers_set

print(f'completed to define. #3 [{datetime.now()}]')

completed to define. #3 [2025-06-13 00:52:03.179575]


In [30]:
### Prepare and train, predict
def main_process(version,
                 n_estimators,
                 last_rounds,
                 data_lengths,
                 random_state_gap=100,
                 random_state_begin=0,
                 random_state_end=300000,
                 db_file_path='../db/metrics.db',
                 write_to_db=False,
                 write_db_file_path='../db/metrics.db',
                 trial=5,
                 verbose=0):
    predict_lens = {}
    for n_estimator in n_estimators:
        predict_lens[n_estimator] = {}
        if n_estimator > 1:
            for last_round in last_rounds:
                cnt = 0
                for data_length in data_lengths:
                    hist_data, first_bonus = load_data_by_db(db_file_path=db_file_path, last_round=last_round, length=data_length)
                    for train_x in hist_data:
                        train_x[1].insert(0, train_x[0])
                    if data_length not in predict_lens[n_estimator]:
                        predict_lens[n_estimator][data_length] = {}
                    for random_state in range(random_state_begin, random_state_end, random_state_gap):
                        predict_len = predict_and_test(last_round, first_bonus, hist_data, n_estimator, random_state, trial, 0)
                        if n_estimator not in predict_lens:
                            predict_lens[n_estimator] = {}
                        if random_state not in predict_lens[n_estimator][data_length]:
                            predict_lens[n_estimator][data_length][random_state] = [[last_round], [predict_len]]
                        else:
                            predict_lens[n_estimator][data_length][random_state][0].append(last_round)
                            predict_lens[n_estimator][data_length][random_state][1].append(predict_len)
                        cnt += 1
                        if cnt % 1000 == 0:
                            print(f'proceed [now={datetime.now()}, n_estimator={n_estimator}, cnt={cnt}, last_round={last_round}, random_state={random_state}, data_length={data_length}]')
    return predict_lens

print(f'completed to define main_process. #7 [{datetime.now()}]')

completed to define main_process. #7 [2025-06-13 00:52:48.544781]


In [29]:
def print_predicts(predict_lens, sum_min=-1, sum_max=-1, verbose=0, write_to_file=False, write_to_db=False):
    result_set = []
    for n_estimator in predict_lens:
        for data_length in predict_lens[n_estimator]:
            for random_state in predict_lens[n_estimator][data_length]:
                sum_val = sum(predict_lens[n_estimator][data_length][random_state][1])
                if sum_min == -1 and sum_max == -1:
                    if verbose > 0:
                        print(n_estimator, data_length, random_state, predict_lens[n_estimator][data_length][random_state])
                    result_set.append((n_estimator, data_length, random_state, predict_lens[n_estimator][data_length][random_state], sum_val))
                else:
                    if sum_min > 0 and sum_max > 0 and sum_val >= sum_min and sum_val <= sum_max:
                        if verbose > 0:
                            print(n_estimator, data_length, random_state, predict_lens[n_estimator][data_length][random_state])
                        result_set.append((n_estimator, data_length, random_state, predict_lens[n_estimator][data_length][random_state], sum_val))
                    else:
                        if sum_min > 0 and sum_val >= sum_min:
                            if verbose > 0:
                                print(n_estimator, data_length, random_state, predict_lens[n_estimator][data_length][random_state])
                            result_set.append((n_estimator, data_length, random_state, predict_lens[n_estimator][data_length][random_state], sum_val))
                        elif sum_max > 0 and sum_val <= sum_max:
                            if verbose > 0:
                                print(n_estimator, data_length, random_state, predict_lens[n_estimator][data_length][random_state])
                            result_set.append((n_estimator, data_length, random_state, predict_lens[n_estimator][data_length][random_state], sum_val))
                if write_to_file:
                    with closing(open("metric_temp.txt", "at")) as fd:
                        fd.write(f'{n_estimator},{data_length},{random_state},')
                        fd.write(f'{"/".join([str(i) for i in predict_lens[n_estimator][data_length][random_state][0]])},')
                        fd.write(f'{"/".join([str(i) for i in predict_lens[n_estimator][data_length][random_state][0]])},')
                        fd.write(f'{sum_val}\n')
                if write_to_db:
                    db_datas = (n_estimator, data_length, random_state, predict_lens[n_estimator][data_length][random_state], sum_val)
                    insert_randomforest_db('../db/metrics.db',
                                           version=version,
                                           db_datas=db_datas,
                                           verbose=verbose)


    return result_set


print(f'completed to define print_predicts. #8 [{datetime.now()}]')

completed to define print_predicts. #8 [2025-06-13 00:52:05.861763]


In [33]:
def main(parameters, version, sum_min=-1, sum_max=-1, write_to_file=False, write_to_db=False, trial=5, verbose=0):
    print(f'start   [now={datetime.now()}]')    
    predict_lens = main_process(version=version,
                                n_estimators=parameters["n_estimators"],
                                last_rounds=parameters["last_rounds"],
                                data_lengths=parameters["data_lengths"],
                                random_state_gap=parameters["random_state_gap"],
                                random_state_begin=parameters["random_state_begin"],
                                random_state_end=parameters["random_state_end"],
                                trial=trial,
                                verbose=verbose)
    print(f'completed [now={datetime.now()}]')
    print(f'start to read and write data: [now={datetime.now()}]')
    result_set = print_predicts(predict_lens=predict_lens,
                                sum_min=sum_min,
                                sum_max=sum_max,
                                write_to_file=write_to_file,
                                write_to_db=write_to_db)
    print(f'complete to read and data: [now={datetime.now()}]')
    return result_set

print(f'completed to define main. #9 [{datetime.now()}]')

completed to define main. #9 [2025-06-13 00:59:42.920392]


In [None]:
parameters = {
    "n_estimators": [25, 50, 10],
    "data_lengths": [40],
    "random_state_gap": 1,
    "random_state_begin": 0,
    "random_state_end": 1000,
    "last_rounds": [1175, 1174, 1173],
}
version="T_01_02"
result_sets = main(parameters=parameters, sum_min=2, version=version)

In [28]:
parameters = {
    "n_estimators": [25, 10],
    "data_lengths": [40],
    "random_state_gap": 1,
    "random_state_begin": 0,
    "random_state_end": 1000,
    "last_rounds": [1175, 1174, 1173, 1172, 1171, 1170, 1169, 1168, 1167, 1166],
}
version="T_01_03"
result_sets = main(parameters=parameters, sum_min=2, version=version)

start   [now=2025-06-11 23:05:14.736151]
proceed [now=2025-06-11 23:05:37.338800, cnt=1000, last_round=1176, random_state=999, data_length=40]
proceed [now=2025-06-11 23:06:03.904516, cnt=1000, last_round=1175, random_state=999, data_length=40]
proceed [now=2025-06-11 23:06:29.714422, cnt=1000, last_round=1174, random_state=999, data_length=40]
proceed [now=2025-06-11 23:06:55.629254, cnt=1000, last_round=1173, random_state=999, data_length=40]
proceed [now=2025-06-11 23:07:18.650509, cnt=1000, last_round=1172, random_state=999, data_length=40]
proceed [now=2025-06-11 23:07:44.495763, cnt=1000, last_round=1171, random_state=999, data_length=40]
proceed [now=2025-06-11 23:08:10.484503, cnt=1000, last_round=1170, random_state=999, data_length=40]
proceed [now=2025-06-11 23:08:33.761643, cnt=1000, last_round=1169, random_state=999, data_length=40]
proceed [now=2025-06-11 23:08:59.960513, cnt=1000, last_round=1168, random_state=999, data_length=40]
proceed [now=2025-06-11 23:09:14.003625, 

In [36]:
parameters = {
    "n_estimators": [25, 10],
    "data_lengths": [40],
    "random_state_gap": 1,
    "random_state_begin": 0,
    "random_state_end": 1000,
    "last_rounds": [1176, 1175, 1174],
}
version="T_01_04"
result_sets = main(parameters=parameters, sum_min=2, version=version)

start   [now=2025-06-11 23:20:22.639013]
proceed [now=2025-06-11 23:20:48.674119, n_estimator=25, cnt=1000, last_round=1176, random_state=999, data_length=40]
proceed [now=2025-06-11 23:21:14.995542, n_estimator=25, cnt=1000, last_round=1175, random_state=999, data_length=40]
proceed [now=2025-06-11 23:21:37.827796, n_estimator=25, cnt=1000, last_round=1174, random_state=999, data_length=40]
proceed [now=2025-06-11 23:21:52.514248, n_estimator=10, cnt=1000, last_round=1176, random_state=999, data_length=40]
proceed [now=2025-06-11 23:22:03.630452, n_estimator=10, cnt=1000, last_round=1175, random_state=999, data_length=40]
proceed [now=2025-06-11 23:22:14.851538, n_estimator=10, cnt=1000, last_round=1174, random_state=999, data_length=40]
completed [now=2025-06-11 23:22:14.851758]


In [42]:
parameters = {
    "n_estimators": [25, 10],
    "data_lengths": [40],
    "random_state_gap": 1,
    "random_state_begin": 0,
    "random_state_end": 1000,
    "last_rounds": [1175, 1175],
}
version="T_01_05"
result_sets = main(parameters=parameters, sum_min=2, version=version)

start   [now=2025-06-11 23:33:51.682807]
proceed [now=2025-06-11 23:34:18.700060, n_estimator=25, cnt=1000, last_round=1176, random_state=999, data_length=40]
proceed [now=2025-06-11 23:34:42.910211, n_estimator=25, cnt=1000, last_round=1175, random_state=999, data_length=40]
proceed [now=2025-06-11 23:34:58.457405, n_estimator=10, cnt=1000, last_round=1176, random_state=999, data_length=40]
proceed [now=2025-06-11 23:35:11.968236, n_estimator=10, cnt=1000, last_round=1175, random_state=999, data_length=40]
completed [now=2025-06-11 23:35:11.968854]
start to read data: [now=2025-06-11 23:35:11.968929]
complete to read data: [now=2025-06-11 23:35:11.970196]
start to write db: [now=2025-06-11 23:35:11.970231]
complete to write db: [now=2025-06-11 23:35:34.319251]


In [None]:
parameters = {
    "n_estimators": [25, 10],
    "data_lengths": [40],
    "random_state_gap": 1,
    "random_state_begin": 0,
    "random_state_end": 1000,
    "last_rounds": [1175],
}
version="T_01_06"
result_sets = main(parameters=parameters, sum_min=2, version=version)

In [62]:
parameters = {
    "n_estimators": [25],
    "data_lengths": [400],
    "random_state_gap": 1,
    "random_state_begin": 0,
    "random_state_end": 3000,
    "last_rounds": [1175, 1174, 1173, 1172, 1171, 1170, 1169, 1168, 1167, 1166],
}
version="T_01_07"
trial=1
result_sets = main(parameters=parameters, sum_min=2, trial=trial, version=version)

start   [now=2025-06-12 00:18:32.976156]
proceed [now=2025-06-12 00:19:31.586459, n_estimator=25, cnt=1000, last_round=1176, random_state=999, data_length=400]
proceed [now=2025-06-12 00:20:31.094794, n_estimator=25, cnt=2000, last_round=1176, random_state=1999, data_length=400]
proceed [now=2025-06-12 00:21:30.204987, n_estimator=25, cnt=3000, last_round=1176, random_state=2999, data_length=400]
proceed [now=2025-06-12 00:22:28.500770, n_estimator=25, cnt=1000, last_round=1175, random_state=999, data_length=400]
proceed [now=2025-06-12 00:23:29.680448, n_estimator=25, cnt=2000, last_round=1175, random_state=1999, data_length=400]
proceed [now=2025-06-12 00:24:30.779908, n_estimator=25, cnt=3000, last_round=1175, random_state=2999, data_length=400]
proceed [now=2025-06-12 00:25:27.806711, n_estimator=25, cnt=1000, last_round=1174, random_state=999, data_length=400]
proceed [now=2025-06-12 00:26:23.534800, n_estimator=25, cnt=2000, last_round=1174, random_state=1999, data_length=400]
pr

In [40]:
parameters = {
    "n_estimators": [25,10],
    "data_lengths": [400],
    "random_state_gap": 10,
    "random_state_begin": 300000,
    "random_state_end": 400000,
    "last_rounds": [1175, 1174, 1173, 1172, 1171, 1170, 1169, 1168, 1167, 1166],
}
version="T_01_08"
result_sets = main(parameters=parameters, version=version, sum_min=0, sum_max=-1, trial=1, write_to_db=True)

start   [now=2025-06-12 16:01:48.633986]
proceed [now=2025-06-12 16:02:52.170550, n_estimator=25, cnt=1000, last_round=1175, random_state=309990, data_length=400]
proceed [now=2025-06-12 16:03:54.539848, n_estimator=25, cnt=2000, last_round=1175, random_state=319990, data_length=400]
proceed [now=2025-06-12 16:04:56.995352, n_estimator=25, cnt=3000, last_round=1175, random_state=329990, data_length=400]
proceed [now=2025-06-12 16:05:59.742052, n_estimator=25, cnt=4000, last_round=1175, random_state=339990, data_length=400]
proceed [now=2025-06-12 16:07:02.568713, n_estimator=25, cnt=5000, last_round=1175, random_state=349990, data_length=400]
proceed [now=2025-06-12 16:08:05.782773, n_estimator=25, cnt=6000, last_round=1175, random_state=359990, data_length=400]
proceed [now=2025-06-12 16:09:08.845926, n_estimator=25, cnt=7000, last_round=1175, random_state=369990, data_length=400]
proceed [now=2025-06-12 16:10:11.810140, n_estimator=25, cnt=8000, last_round=1175, random_state=379990, 

In [36]:
for result in result_sets:
    print(result)

In [37]:
parameters = {
    "n_estimators": [25,10],
    "data_lengths": [400],
    "random_state_gap": 10,
    "random_state_begin": 100000,
    "random_state_end": 150000,
    "last_rounds": [1175, 1174, 1173, 1172, 1171, 1170, 1169, 1168, 1167, 1166],
}
version="T_01_08"
result_sets = main(parameters=parameters, version=version, sum_min=0, sum_max=-1, trial=1, write_to_db=True)

start   [now=2025-06-12 14:40:37.246073]
proceed [now=2025-06-12 14:41:42.718049, n_estimator=25, cnt=1000, last_round=1175, random_state=109990, data_length=400]
proceed [now=2025-06-12 14:42:47.658780, n_estimator=25, cnt=2000, last_round=1175, random_state=119990, data_length=400]
proceed [now=2025-06-12 14:43:50.521634, n_estimator=25, cnt=3000, last_round=1175, random_state=129990, data_length=400]
proceed [now=2025-06-12 14:44:52.960051, n_estimator=25, cnt=4000, last_round=1175, random_state=139990, data_length=400]
proceed [now=2025-06-12 14:45:55.507010, n_estimator=25, cnt=5000, last_round=1175, random_state=149990, data_length=400]
proceed [now=2025-06-12 14:46:58.081716, n_estimator=25, cnt=1000, last_round=1174, random_state=109990, data_length=400]
proceed [now=2025-06-12 14:48:00.428461, n_estimator=25, cnt=2000, last_round=1174, random_state=119990, data_length=400]
proceed [now=2025-06-12 14:49:02.731608, n_estimator=25, cnt=3000, last_round=1174, random_state=129990, 

In [41]:
parameters = {
    "n_estimators": [25,10],
    "data_lengths": [400],
    "random_state_gap": 10,
    "random_state_begin": 150000,
    "random_state_end": 200000,
    "last_rounds": [1175, 1174, 1173, 1172, 1171, 1170, 1169, 1168, 1167, 1166],
}
version="T_01_08"
result_sets = main(parameters=parameters, version=version, sum_min=0, sum_max=-1, trial=1, write_to_db=True)

start   [now=2025-06-12 19:17:33.396619]
proceed [now=2025-06-12 19:18:36.171596, n_estimator=25, cnt=1000, last_round=1175, random_state=159990, data_length=400]
proceed [now=2025-06-12 19:19:38.722645, n_estimator=25, cnt=2000, last_round=1175, random_state=169990, data_length=400]
proceed [now=2025-06-12 19:20:41.331990, n_estimator=25, cnt=3000, last_round=1175, random_state=179990, data_length=400]
proceed [now=2025-06-12 19:21:43.808543, n_estimator=25, cnt=4000, last_round=1175, random_state=189990, data_length=400]
proceed [now=2025-06-12 19:22:46.226923, n_estimator=25, cnt=5000, last_round=1175, random_state=199990, data_length=400]
proceed [now=2025-06-12 19:23:49.239496, n_estimator=25, cnt=1000, last_round=1174, random_state=159990, data_length=400]
proceed [now=2025-06-12 19:24:52.270011, n_estimator=25, cnt=2000, last_round=1174, random_state=169990, data_length=400]
proceed [now=2025-06-12 19:25:55.311937, n_estimator=25, cnt=3000, last_round=1174, random_state=179990, 

In [11]:
parameters = {
    "n_estimators": [25,10],
    "data_lengths": [400],
    "random_state_gap": 10,
    "random_state_begin": 50000,
    "random_state_end": 100000,
    "last_rounds": [1175, 1174, 1173, 1172, 1171, 1170, 1169, 1168, 1167, 1166],
}
version="T_01_08"
result_sets = main(parameters=parameters, version=version, sum_min=0, sum_max=-1, trial=1, write_to_db=True)

start   [now=2025-06-12 21:14:33.995039]
proceed [now=2025-06-12 21:15:19.547135, n_estimator=25, cnt=1000, last_round=1175, random_state=59990, data_length=400]
proceed [now=2025-06-12 21:16:08.253959, n_estimator=25, cnt=2000, last_round=1175, random_state=69990, data_length=400]
proceed [now=2025-06-12 21:16:54.371636, n_estimator=25, cnt=3000, last_round=1175, random_state=79990, data_length=400]
proceed [now=2025-06-12 21:17:40.349232, n_estimator=25, cnt=4000, last_round=1175, random_state=89990, data_length=400]
proceed [now=2025-06-12 21:18:29.254669, n_estimator=25, cnt=5000, last_round=1175, random_state=99990, data_length=400]
proceed [now=2025-06-12 21:19:15.281548, n_estimator=25, cnt=1000, last_round=1174, random_state=59990, data_length=400]
proceed [now=2025-06-12 21:20:01.278225, n_estimator=25, cnt=2000, last_round=1174, random_state=69990, data_length=400]
proceed [now=2025-06-12 21:20:50.293894, n_estimator=25, cnt=3000, last_round=1174, random_state=79990, data_len

In [None]:
parameters = {
    "n_estimators": [25,10],
    "data_lengths": [400],
    "random_state_gap": 10,
    "random_state_begin": 0,
    "random_state_end": 50000,
    "last_rounds": [1175, 1174, 1173, 1172, 1171, 1170, 1169, 1168, 1167, 1166],
}
version="T_01_08"
result_sets = main(parameters=parameters, version=version, sum_min=0, sum_max=-1, trial=1, write_to_db=True)

start   [now=2025-06-12 22:20:51.525562]
proceed [now=2025-06-12 22:21:37.785743, n_estimator=25, cnt=1000, last_round=1175, random_state=9990, data_length=400]
proceed [now=2025-06-12 22:22:27.019867, n_estimator=25, cnt=2000, last_round=1175, random_state=19990, data_length=400]
proceed [now=2025-06-12 22:23:13.264175, n_estimator=25, cnt=3000, last_round=1175, random_state=29990, data_length=400]
proceed [now=2025-06-12 22:23:59.746643, n_estimator=25, cnt=4000, last_round=1175, random_state=39990, data_length=400]
proceed [now=2025-06-12 22:24:49.414732, n_estimator=25, cnt=5000, last_round=1175, random_state=49990, data_length=400]
proceed [now=2025-06-12 22:25:36.239075, n_estimator=25, cnt=1000, last_round=1174, random_state=9990, data_length=400]
proceed [now=2025-06-12 22:26:22.980424, n_estimator=25, cnt=2000, last_round=1174, random_state=19990, data_length=400]
proceed [now=2025-06-12 22:27:12.514231, n_estimator=25, cnt=3000, last_round=1174, random_state=29990, data_lengt

In [11]:
parameters = {
    "n_estimators": [25,10],
    "data_lengths": [400],
    "random_state_gap": 10,
    "random_state_begin": 400000,
    "random_state_end": 450000,
    "last_rounds": [1175, 1174, 1173, 1172, 1171, 1170, 1169, 1168, 1167, 1166],
}
version="T_01_08"
result_sets = main(parameters=parameters, version=version, sum_min=0, sum_max=-1, trial=1, write_to_db=True)

start   [now=2025-06-12 23:22:16.667982]
proceed [now=2025-06-12 23:23:03.381821, n_estimator=25, cnt=1000, last_round=1175, random_state=409990, data_length=400]
proceed [now=2025-06-12 23:23:51.569630, n_estimator=25, cnt=2000, last_round=1175, random_state=419990, data_length=400]
proceed [now=2025-06-12 23:24:38.801259, n_estimator=25, cnt=3000, last_round=1175, random_state=429990, data_length=400]
proceed [now=2025-06-12 23:25:28.975733, n_estimator=25, cnt=4000, last_round=1175, random_state=439990, data_length=400]
proceed [now=2025-06-12 23:26:16.313665, n_estimator=25, cnt=5000, last_round=1175, random_state=449990, data_length=400]
proceed [now=2025-06-12 23:27:02.797789, n_estimator=25, cnt=1000, last_round=1174, random_state=409990, data_length=400]
proceed [now=2025-06-12 23:27:52.023784, n_estimator=25, cnt=2000, last_round=1174, random_state=419990, data_length=400]
proceed [now=2025-06-12 23:28:38.219156, n_estimator=25, cnt=3000, last_round=1174, random_state=429990, 

In [34]:
parameters = {
    "n_estimators": [25,10],
    "data_lengths": [400],
    "random_state_gap": 10,
    "random_state_begin": 450000,
    "random_state_end": 460000,
    "last_rounds": [1175, 1174, 1173, 1172, 1171, 1170, 1169, 1168, 1167, 1166],
}
version="T_01_08"
result_sets = main(parameters=parameters, version=version, sum_min=0, sum_max=-1, trial=1, write_to_db=True)

start   [now=2025-06-13 00:59:50.486471]
proceed [now=2025-06-13 01:00:46.825518, n_estimator=25, cnt=1000, last_round=1175, random_state=459990, data_length=400]
proceed [now=2025-06-13 01:01:34.498249, n_estimator=25, cnt=1000, last_round=1174, random_state=459990, data_length=400]
proceed [now=2025-06-13 01:02:31.797651, n_estimator=25, cnt=1000, last_round=1173, random_state=459990, data_length=400]
proceed [now=2025-06-13 01:03:33.416927, n_estimator=25, cnt=1000, last_round=1172, random_state=459990, data_length=400]
proceed [now=2025-06-13 01:04:29.439487, n_estimator=25, cnt=1000, last_round=1171, random_state=459990, data_length=400]
proceed [now=2025-06-13 01:05:26.612134, n_estimator=25, cnt=1000, last_round=1170, random_state=459990, data_length=400]
proceed [now=2025-06-13 01:06:23.117099, n_estimator=25, cnt=1000, last_round=1169, random_state=459990, data_length=400]
proceed [now=2025-06-13 01:07:16.167696, n_estimator=25, cnt=1000, last_round=1168, random_state=459990, 

In [31]:
parameters = {
    "n_estimators": [25,10],
    "data_lengths": [400],
    "random_state_gap": 1,
    "random_state_begin": 300000,
    "random_state_end": 301000,
    "last_rounds": [1175],
}
version="T_01_99"
result_sets = main(parameters=parameters, version=version, sum_min=0, sum_max=-1, trial=1, write_to_db=True)

start   [now=2025-06-12 11:24:56.202755]
proceed [now=2025-06-12 11:25:59.665769, n_estimator=25, cnt=1000, last_round=1175, random_state=300999, data_length=400]
proceed [now=2025-06-12 11:26:29.527030, n_estimator=10, cnt=1000, last_round=1175, random_state=300999, data_length=400]
completed [now=2025-06-12 11:26:29.527225]
completed [now=2025-06-12 11:26:29.527241]
start to read data: [now=2025-06-12 11:26:29.527249]
complete to read data: [now=2025-06-12 11:26:54.347066]
start to write db: [now=2025-06-12 11:26:54.347180]
complete to write db: [now=2025-06-12 11:26:54.347199]


In [51]:
## SQL Query

In [None]:
for result in result_set:
    if result[4] > 6:
        print(result)

In [13]:
print(f'check answer : [{datetime.now()}]')
for result in results_set:
    if result[2][0] > 3 or result[2][1] > 3:
        print(result)

check answer : [2025-06-09 11:35:00.484062]


In [None]:
n_estimators=45
begin_pos=0
end_pos=600000
print(f'start   [now={datetime.now()}]')
for minus_value in [2, 1, 0]:
    cnt = 0
    for random_state in range(begin_pos, end_pos, 100):
        predict_lens = predict_and_test(minus_value, n_estimators, random_state, 0)
        if predict_lens[0] > 2 and predict_lens[1] > 2:
            results_set.append((n_estimators, random_state, predict_lens, minus_value))
            print('found = ', n_estimators, random_state, predict_lens, minus_value)
        cnt += 1
        if cnt % 1000 == 0:
            print(f'proceed [now={datetime.now()}, {cnt}]')        
print(f'completed [now={datetime.now()}]')

In [None]:
n_estimators=6
begin_pos=100000
end_pos=500000
print(f'start   [now={datetime.now()}]')
for minus_value in [8, 7, 6, 5, 4, 3, 2, 1]:
    cnt = 0
    for random_state in range(begin_pos, end_pos, 100):
        predict_lens = predict_and_test(minus_value, n_estimators, random_state, 0)
        if predict_lens[0] > 1 and predict_lens[1] > 1 and predict_lens[2] > 1:
            results_set.append((n_estimators, random_state, predict_lens, minus_value))
            print('found = ', n_estimators, random_state, predict_lens, minus_value)
        cnt += 1
        if cnt % 1000 == 0:
            print(f'proceed [now={datetime.now()}, {cnt}]')        
print(f'completed [now={datetime.now()}]')

```
(25, 219100, [3, 4], 0)
(25, 560100, [3, 4], 0)
(25, 322300, [3, 4], 5)
(25, 520500, [3, 4], 5)
(25, 148700, [3, 4], 6)
(25, 283500, [3, 4], 6)
(25, 358100, [3, 4], 6)
(25, 451600, [4, 3], 6)
(25, 489000, [4, 3], 6)
(25, 295500, [3, 4], 7)
(25, 241500, [3, 4], 8)
(45, 52100, [3, 4], 6)
(45, 587300, [3, 4], 4)
(10, 102500, [3, 4], 7)
(10, 113700, [4, 4], 6)
(10, 164200, [3, 4], 1)
(10, 225300, [3, 4], 8)
(10, 371400, [3, 4], 8)
(10, 255700, [3, 4], 7)
(10, 15200, [3, 4], 7)
(10, 63300, [3, 4], 6)
(6, 131400, [3, 4], 5)
(6, 195200, [4, 3], 5)
(6, 406100, [4, 3], 8)
(6, 325200, [4, 3], 7)
(6, 353200, [4, 3], 7)
(6, 473900, [4, 4], 6)
(6, 363000, [4, 3], 5)
[
(25, 451600, [4, 3], 6),
(25, 489000, [4, 3], 6),
(6, 473900, [4, 4], 6),
(10, 113700, [4, 4], 6),
(10, 15200, [3, 4], 7),
]

```

In [None]:
for result in results_set:
    if result[2][0] > 3 or result[2][1] > 3:
        print(result)

In [None]:
datas = [
    [45,208000,4,3,5],
    [45,101700,4,3,2],
    [45,101700,4,3,2],
    [45,185400,4,3,2],
    [45,96800,4,3,1],
    [45,58600,3,3,0],
    ]
datas = [
    (25, 451600, [4, 3], 6),
    (25, 489000, [4, 3], 6),
    (6, 473900, [4, 4], 6),
    (10, 113700, [4, 4], 6),
    (10, 15200, [3, 4], 7),
    ]
for data in datas:
    n_estimators=data[0]
    random_state=data[1]
    minus_value=data[3]
    results=[]
    print(f'start [now={datetime.now()}]')
    data_pos=1
    h_data=lotto_history_data[r_pos[data_pos][0]:r_pos[data_pos][1]-minus_value]
    predicted_numbers_set = predict(lotto_history_data=h_data,
                                    n_estimators=n_estimators,
                                    random_state=random_state,
                                    verbose=0)
    predict_len1=test_prediction(round_nums[data_pos],
                                 actual_numbers[data_pos],
                                 bonus[data_pos],
                                 predicted_numbers_set,
                                 n_estimators,
                                 random_state,
                                 verbose=1)
    data_pos=2
    h_data=lotto_history_data[r_pos[data_pos][0]:r_pos[data_pos][1]-minus_value]
    predicted_numbers_set = predict(lotto_history_data=h_data,
                                    n_estimators=n_estimators,
                                    random_state=random_state,
                                    verbose=0)
    predict_len2 = test_prediction(round_nums[data_pos],
                                   actual_numbers[data_pos],
                                   bonus[data_pos],
                                   predicted_numbers_set,
                                   n_estimators,
                                   random_state,
                                   verbose=1)
print(f'completed [now={datetime.now()}]')

In [None]:
# datas = [
#     [45,208000,4,3,5],
#     [45,101700,4,3,2],
#     [45,185400,4,3,2],
#     [45,96800,4,3,1],
#     [45,58600,3,3,0],
#     ]
datas = [
    (25, 451600, [4, 3], 6),
    (25, 489000, [4, 3], 6),
    (6, 473900, [4, 4], 6),
    (10, 113700, [4, 4], 6),
    (10, 15200, [3, 4], 7),
    ]
print(f'start [now={datetime.now()}]')
results=[]
for data in datas:
    n_estimators=data[0]
    random_state=data[1]
    minus_value=data[3]    
    data_pos=0
    h_data=lotto_history_data[r_pos[data_pos][0]:r_pos[data_pos][1]-minus_value]
    predicted_numbers_set = predict(lotto_history_data=h_data,
                                    n_estimators=n_estimators,
                                    random_state=random_state,
                                    verbose=0)
    results.append(predicted_numbers_set[0])
    predict_len2 = test_prediction(round_nums[data_pos],
                                   actual_numbers[1],
                                   bonus[data_pos],
                                   predicted_numbers_set,
                                   n_estimators,
                                   random_state,
                                   verbose=1)
print(f'completed [now={datetime.now()}]')
print('Predicted Numbers.')
for result in results:
    print(result)

In [None]:
print(results)

```
[
[45,208000,4,3,5],
[45,101700,4,3,2],
[45,101700,4,3,2],
[45,185400,4,3,2],
[45,96800,4,3,1],
    [45,58600,3,3,0],
]
```

In [None]:
n_estimators=10
random_state=5000
data_pos=1
h_data=lotto_history_data[r_pos[data_pos][0]:r_pos[data_pos][1]]
predicted_numbers_set = predict(lotto_history_data=h_data,
                                n_estimators=n_estimators,
                                random_state=random_state,
                                verbose=0
                               )
test_prediction(round_nums[data_pos],
                actual_numbers[data_pos],
                bonus[data_pos],
                predicted_numbers_set,
                n_estimators,
                random_state)
data_pos=2
h_data=lotto_history_data[r_pos[data_pos][0]:r_pos[data_pos][1]]
predicted_numbers_set = predict(lotto_history_data=h_data,
                                n_estimators=n_estimators,
                                random_state=random_state,
                                verbose=0)
test_prediction(round_nums[data_pos],
                actual_numbers[data_pos],
                bonus[data_pos],
                predicted_numbers_set,
                n_estimators,
                random_state)

In [None]:
[
    ([14, 17, 23, 27, 34, 36], [14, 17, 36], (45, 1000), (1160, 1173),
    ([13, 16, 23, 27, 34, 36], [36], (45, 1300), (1160, 1173)),
    ([11, 15, 21, 30, 36, 39], [11, 36, 39], (45, 1000), (1160, 1173),

]

In [31]:
params = [
    ['25', '400', '168060', '1175,1174,1173,1172,1171,1170,1169,1168,1167,1166', '1,3,1,2,0,1,4,4,0,1', '17', 'T_01_08'],
    ['25', '400', '680', '1175,1174,1173,1172,1171,1170,1169,1168,1167,1166', '1,1,1,1,2,1,4,4,1,1', '17', 'T_01_08'],
    ['10', '400', '338000', '1175,1174,1173,1172,1171,1170,1169,1168,1167,1166', '3,3,1,0,3,0,1,3,3,2', '19', 'T_01_08'],
    ['25', '400', '116690', '1175,1174,1173,1172,1171,1170,1169,1168,1167,1166', '1,3,2,2,3,0,3,2,1,1', '18', 'T_01_08'],
    ['25', '400', '143050', '1175,1174,1173,1172,1171,1170,1169,1168,1167,1166', '1,3,3,3,2,0,3,1,1,1', '18', 'T_01_08'],
    ['10', '400', '129920', '1175,1174,1173,1172,1171,1170,1169,1168,1167,1166', '2,0,2,3,2,3,1,3,1,1', '18', 'T_01_08'],
    ['25', '400', '319350', '1175,1174,1173,1172,1171,1170,1169,1168,1167,1166', '1,3,1,3,1,1,3,3,2,0', '18', 'T_01_08'],
    ['10', '400', '374630', '1175,1174,1173,1172,1171,1170,1169,1168,1167,1166', '3,3,1,2,3,0,2,2,2,0', '18', 'T_01_08'],
    ['25', '400', '381150', '1175,1174,1173,1172,1171,1170,1169,1168,1167,1166', '1,3,1,1,2,0,3,3,2,1', '17', 'T_01_08'],
    ['10', '400', '373380', '1175,1174,1173,1172,1171,1170,1169,1168,1167,1166', '1,3,1,2,0,3,2,3,1,1', '17', 'T_01_08'],
    ['10', '400', '397710', '1175,1174,1173,1172,1171,1170,1169,1168,1167,1166', '1,3,2,1,1,0,2,3,3,1', '17', 'T_01_08'],
    ['10', '400', '73700', '1175,1174,1173,1172,1171,1170,1169,1168,1167,1166', '2,3,1,1,1,2,3,0,3,1', '17', 'T_01_08'],
]

predicted_numbers_set = []

db_file_path = '../db/metrics.db'

for param in params:
    predicted_numbers = main_predict(n_estimator=int(param[0]),
                                     last_round=1176,
                                     data_length=int(param[1]),
                                     random_state=int(param[2]),
                                     db_file_path=db_file_path,
                                     trial=1,
                                     verbose=0)
    predicted_numbers_set.append(predicted_numbers)

for numbers in predicted_numbers_set:
    print(numbers)

[[9, 14, 20, 26, 29, 39]]
[[9, 14, 22, 27, 30, 37]]
[[10, 17, 22, 29, 36, 40]]
[[7, 13, 17, 24, 29, 36]]
[[6, 12, 18, 24, 28, 38]]
[[10, 17, 22, 29, 34, 40]]
[[8, 13, 22, 26, 29, 37]]
[[9, 13, 23, 30, 34, 40]]
[[8, 13, 20, 26, 30, 38]]
[[8, 12, 19, 23, 28, 38]]
[[10, 16, 23, 29, 33, 38]]
[[8, 17, 21, 28, 35, 41]]


In [35]:
#test