In [8]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import warnings
from datetime import datetime

# 불필요한 경고 메시지를 숨깁니다.
warnings.filterwarnings('ignore')

print(f'completed to load library. [{datetime.now()}]')

completed to load library. [2025-06-09 13:18:55.806273]


In [9]:
def predict(lotto_history_data, n_estimators=100, random_state=350, verbose=0):
    # 데이터를 Pandas DataFrame으로 변환합니다.
    df = pd.DataFrame(lotto_history_data, columns=['회차', '번호1', '번호2', '번호3', '번호4', '번호5', '번호6'])
    df = df.sort_values(by='회차').reset_index(drop=True)

    if verbose > 0:
        print("--- 로또 당첨 번호 이력 ---")
        print(df)

    # 기계 학습을 위한 데이터 준비
    # 각 회차의 당첨 번호(X)와 바로 다음 회차의 당첨 번호(y)를 사용합니다.
    X = df[['번호1', '번호2', '번호3', '번호4', '번호5', '번호6']]
    y = df[['번호1', '번호2', '번호3', '번호4', '번호5', '번호6']].shift(-1)

    # 마지막 행은 다음 회차 데이터가 없으므로 제거합니다.
    X = X[:-1]
    y = y.dropna()
    
    if verbose > 0:
        print("\n--- 훈련 데이터 (X) ---")
        print(X.head())
        print("\n--- 정답 데이터 (y) ---")
        print(y.head())

    # 머신러닝 모델 선택 및 훈련
    # 랜덤 포레스트 회귀 모델을 사용합니다.
    # the random_state parameter is used to control the randomness of the algorithm, ensuring reproducibility of results. 
    # the n_estimators parameter specifies the number of decision trees in the forest. 
    # model = RandomForestRegressor(n_estimators=50, random_state=1000) # n_estimators: 만들 트리의 개수 => 3개 일치
    model = RandomForestRegressor(n_estimators=n_estimators, random_state=random_state) # n_estimators: 만들 트리의 개수
    model.fit(X, y)

    # 예측할 회차의 이전 회차 데이터 (1173회차)
    last_draw = df.iloc[-1][['번호1', '번호2', '번호3', '번호4', '번호5', '번호6']].values.reshape(1, -1)

    if verbose > 0:
        print(f"\n--- {df.iloc[-1]['회차']}회차 데이터로 다음 회차 예측 ---")
        print(last_draw)

    # 다음 회차(1174회) 번호 예측
    predicted_numbers_set = []
    trial = 5
    for i in range(trial):
        predicted_numbers_float = model.predict(last_draw)

        # 예측된 번호 처리
        # 1. 소수점을 반올림하여 정수로 만듭니다.
        # 2. 1~45 사이의 값으로 보정합니다.
        # 3. 중복된 번호를 제거하고 6개를 선택합니다.
        predicted_numbers = set()
        for num in predicted_numbers_float[0]:
            # 반올림하여 정수로 변환
            int_num = int(round(num))
    
            # 1보다 작으면 1로, 45보다 크면 45로 보정
            if int_num < 1:
                int_num = 1
            elif int_num > 45:
                int_num = 45
            predicted_numbers.add(int_num)

        # 중복 제거 후 6개가 안되면, 부족한 만큼 다른 번호로 채웁니다.
        # (여기서는 가장 빈도가 높은 번호들 중 예측되지 않은 번호를 추가하는 방식을 사용)
        if len(predicted_numbers) < 6:
            all_numbers = df[['번호1', '번호2', '번호3', '번호4', '번호5', '번호6']].values.flatten()
            counts = pd.Series(all_numbers).value_counts()
    
            extra_needed = 6 - len(predicted_numbers)
            for num in counts.index:
                if extra_needed == 0:
                    break
                if num not in predicted_numbers:
                    predicted_numbers.add(num)
                    extra_needed -= 1
        # 최종 예측 번호를 정렬하여 출력
        final_prediction = sorted(list(predicted_numbers))[:6]
        predicted_numbers_set.append(final_prediction)
    return predicted_numbers_set


def test_prediction(round_nums,
                    actual_numbers,
                    bonus,
                    predicted_numbers_set,
                    n_estimators=500,
                    random_state=500,
                    only_one=True,
                    verbose=0):
    if verbose>0:
        print("======================================")
        print(f"\n--- 참고: {round_nums}회 실제 당첨 번호 ---")
        print(f"당첨번호: {actual_numbers}, 보너스: {bonus}")

    matched_len = 0

    for final_prediction in predicted_numbers_set:
        # 예측 결과와 실제 결과 비교
        matching_numbers = set(final_prediction).intersection(set(actual_numbers))
        if verbose > 0:
            print(f"\n--- 예측과 실제 결과 비교 ---")
            print(f"일치하는 번호: {sorted(list(matching_numbers))}")
            print(f"일치 개수: {len(matching_numbers)}")
            print(F"예측 번호: {final_prediction}")
            print(f"n_estimators: {n_estimators}")
            print(f"random_state: {random_state}")
        matched_len = len(matching_numbers)
        if only_one:
            break
    return matched_len

print(f'completed to define. #1 [{datetime.now()}]')

completed to define. #1 [2025-06-09 13:18:56.462516]


In [17]:
# 실제 1174회 당첨 번호
actual_numbers = [
    [0, 0, 0, 0, 0, 0],
    [3, 4, 6, 8, 32, 42],
    [8, 11, 14, 17, 36, 39],
    [1, 5, 18, 20, 30, 35],
    [7, 9, 24, 40, 42, 44],
]
bonus = [0, 22, 22, 22, 22]
round_nums = [1176, 1175, 1174, 1173, 1172]
# 역대 로또 당첨 번호 데이터 (최신 회차부터 순서대로)
# 실제로는 더 많은 데이터가 필요하지만, 예시를 위해 일부만 사용합니다.
# 데이터 출처: https://data.soledot.com/lottowinnumber/fo/lottowinnumberlist.sd
lotto_history_data = [
    [1175, 3, 4, 6, 8, 32, 42],
    [1174, 8, 11, 14, 17, 36, 39],
    [1173, 1, 5, 18, 20, 30, 35],
    [1172, 7, 9, 24, 40, 42, 44],
    [1171, 3, 6, 7, 11, 12, 17],
    [1170, 3, 13, 28, 34, 38, 42],
    [1169, 5, 12, 24, 26, 39, 42],
    [1168, 9, 21, 24, 30, 33, 37],
    [1167, 8, 23, 31, 35, 39, 40],
    [1166, 14, 23, 25, 27, 29, 42],
    [1165, 6, 7, 27, 29, 38, 45],
    [1164, 17, 18, 23, 25, 38, 39],
    [1163, 2, 13, 15, 16, 33, 43],
    [1162, 20, 21, 22, 25, 28, 29],
    [1161, 2, 12, 20, 24, 34, 42],
    [1160, 7, 13, 18, 36, 39, 45],
    [1159, 3, 9, 27, 28, 38, 39],
    [1158, 21, 25, 27, 32, 37,38],
    [1157, 5, 7, 12, 20, 25, 26],
    [1156, 30, 31, 34, 39, 41, 45],
    [1155, 10, 16, 19, 27, 37, 38],
    [1154, 4, 8, 22, 26, 32, 38],
    [1153, 1, 9, 10, 13, 35, 44],
    [1152, 30, 31, 32, 35, 36, 37],
    [1151, 2, 3, 9, 15, 27, 29],
    [1150, 8, 9, 18, 35, 39, 45],
    [1149, 8, 15, 19, 21, 32, 36],
    [1148, 3, 6, 13, 15, 16, 22],
    [1147, 7, 11, 24, 26, 27, 37],
    [1146, 6, 11, 17, 19, 40, 43],
    [1145, 2, 11, 31, 33, 37, 44],
    [1144, 3, 4, 12, 15, 26, 34],
    [1143, 10, 16, 17, 27, 28, 36],
    [1142, 2, 8, 28, 30, 37, 41],
    [1141, 7, 11, 12, 21, 26, 35],
    [1140, 7, 10, 22, 29, 31, 38],
    [1139, 5, 12, 15, 30, 37, 40],
    [1138, 14, 16, 19, 20, 29, 34],
    [1137, 4, 9, 12, 15, 33, 45],
    [1136, 21, 33, 35, 38, 42, 44],
    [1135, 1, 6, 13, 19, 21, 33],
    [1134, 3, 7, 9, 13, 19, 24],
    [1133, 13, 14, 20, 28, 29, 34],
    [1132, 6, 7, 19, 28, 34, 41],
    [1131, 1, 2, 6, 14, 27, 38],
    [1130, 15, 19, 21, 25, 27, 28],
    # [1129, 5, 10, 11, 17, 28, 34],
]

results_set=[]
data_poses = [1, 2]

data_length = 25


if data_length > 0 and data_length < len(lotto_history_data):
    end_pod = data_length
else:
    end_pod = len(lotto_history_data)

r_pos = [
    (0, end_pod - 2),
    (1, end_pod - 1),
    (2, end_pod - 0),
    # (3, len(lotto_history_data)-0),
]


print(f'completed to set env. [{datetime.now()}]')

completed to set env. [2025-06-09 13:59:01.807174]


**Notice**
- the random_state parameter is used to control the randomness of the algorithm, ensuring reproducibility of results. 
- the n_estimators parameter specifies the number of decision trees in the forest. This parameter is crucial as it directly impacts the model's performance and computational cost.

In [11]:
def predit_and_test_parts(data_pos, h_data, minus_value, n_estimators, random_state, verbose=0):
    predicted_numbers_set = predict(lotto_history_data=h_data,
                                    n_estimators=n_estimators,
                                    random_state=random_state,
                                    verbose=verbose)
    predict_len1=test_prediction(round_nums[data_pos],
                                 actual_numbers[data_pos],
                                 bonus[data_pos],
                                 predicted_numbers_set,
                                 n_estimators,
                                 random_state,
                                 verbose)
    return predicted_numbers_set, predict_len1


def predict_and_test(minus_value, n_estimators, random_state, verbose=0):
    predict_lens = []
    for data_pos in data_poses:
        h_data=lotto_history_data[r_pos[data_pos][0]:r_pos[data_pos][1]-minus_value]
        _, predict_len = predit_and_test_parts(data_pos, h_data, minus_value, n_estimators, random_state, 0)
        predict_lens.append(predict_len)
    return predict_lens

print(f'completed to define. #2 [{datetime.now()}]')

completed to define. #2 [2025-06-09 13:18:58.362147]


In [12]:
results_set = []

print(f'completed to reset results_set. #2 [{datetime.now()}]')

completed to reset results_set. #2 [2025-06-09 13:18:59.404910]


In [19]:
n_estimators=25
begin_pos=100000
end_pos=200000
print(f'start   [now={datetime.now()}]')
for minus_value in [3, 2, 1]:
    cnt = 0
    for random_state in range(begin_pos, end_pos, 100):
        predict_lens = predict_and_test(minus_value, n_estimators, random_state, 0)
        if predict_lens[0] > 2 and predict_lens[1] > 2:
            results_set.append((n_estimators, random_state, predict_lens, minus_value))
            print('found = ', n_estimators, random_state, predict_lens, minus_value)
        cnt += 1
        if cnt % 1000 == 0:
            print(f'proceed [now={datetime.now()}, {cnt}]')        
print(f'completed [now={datetime.now()}]')

start   [now=2025-06-09 14:01:54.675545]
proceed [now=2025-06-09 14:02:44.435974, 1000]
proceed [now=2025-06-09 14:03:34.668991, 1000]
proceed [now=2025-06-09 14:04:23.861116, 1000]
completed [now=2025-06-09 14:04:23.861301]


In [13]:
print(f'check answer : [{datetime.now()}]')
for result in results_set:
    if result[2][0] > 3 or result[2][1] > 3:
        print(result)

check answer : [2025-06-09 11:35:00.484062]


In [None]:
n_estimators=45
begin_pos=0
end_pos=600000
print(f'start   [now={datetime.now()}]')
for minus_value in [8, 7, 6, 5, 4, 3, 2, 1]:
    cnt = 0
    for random_state in range(begin_pos, end_pos, 100):
        predict_lens = predict_and_test(minus_value, n_estimators, random_state, 0)
        if predict_lens[0] > 2 and predict_lens[1] > 2:
            results_set.append((n_estimators, random_state, predict_lens, minus_value))
            print('found = ', n_estimators, random_state, predict_lens, minus_value)
        cnt += 1
        if cnt % 1000 == 0:
            print(f'proceed [now={datetime.now()}, {cnt}]')        
print(f'completed [now={datetime.now()}]')

In [None]:
n_estimators=6
begin_pos=100000
end_pos=500000
print(f'start   [now={datetime.now()}]')
for minus_value in [8, 7, 6, 5, 4, 3, 2, 1]:
    cnt = 0
    for random_state in range(begin_pos, end_pos, 100):
        predict_lens = predict_and_test(minus_value, n_estimators, random_state, 0)
        if predict_lens[0] > 1 and predict_lens[1] > 1 and predict_lens[2] > 1:
            results_set.append((n_estimators, random_state, predict_lens, minus_value))
            print('found = ', n_estimators, random_state, predict_lens, minus_value)
        cnt += 1
        if cnt % 1000 == 0:
            print(f'proceed [now={datetime.now()}, {cnt}]')        
print(f'completed [now={datetime.now()}]')

```
(25, 219100, [3, 4], 0)
(25, 560100, [3, 4], 0)
(25, 322300, [3, 4], 5)
(25, 520500, [3, 4], 5)
(25, 148700, [3, 4], 6)
(25, 283500, [3, 4], 6)
(25, 358100, [3, 4], 6)
(25, 451600, [4, 3], 6)
(25, 489000, [4, 3], 6)
(25, 295500, [3, 4], 7)
(25, 241500, [3, 4], 8)
(45, 52100, [3, 4], 6)
(45, 587300, [3, 4], 4)
(10, 102500, [3, 4], 7)
(10, 113700, [4, 4], 6)
(10, 164200, [3, 4], 1)
(10, 225300, [3, 4], 8)
(10, 371400, [3, 4], 8)
(10, 255700, [3, 4], 7)
(10, 15200, [3, 4], 7)
(10, 63300, [3, 4], 6)
(6, 131400, [3, 4], 5)
(6, 195200, [4, 3], 5)
(6, 406100, [4, 3], 8)
(6, 325200, [4, 3], 7)
(6, 353200, [4, 3], 7)
(6, 473900, [4, 4], 6)
(6, 363000, [4, 3], 5)
[
(25, 451600, [4, 3], 6),
(25, 489000, [4, 3], 6),
(6, 473900, [4, 4], 6),
(10, 113700, [4, 4], 6),
(10, 15200, [3, 4], 7),
]

```

In [None]:
for result in results_set:
    if result[2][0] > 3 or result[2][1] > 3:
        print(result)

In [None]:
datas = [
    [45,208000,4,3,5],
    [45,101700,4,3,2],
    [45,101700,4,3,2],
    [45,185400,4,3,2],
    [45,96800,4,3,1],
    [45,58600,3,3,0],
    ]
datas = [
    (25, 451600, [4, 3], 6),
    (25, 489000, [4, 3], 6),
    (6, 473900, [4, 4], 6),
    (10, 113700, [4, 4], 6),
    (10, 15200, [3, 4], 7),
    ]
for data in datas:
    n_estimators=data[0]
    random_state=data[1]
    minus_value=data[3]
    results=[]
    print(f'start [now={datetime.now()}]')
    data_pos=1
    h_data=lotto_history_data[r_pos[data_pos][0]:r_pos[data_pos][1]-minus_value]
    predicted_numbers_set = predict(lotto_history_data=h_data,
                                    n_estimators=n_estimators,
                                    random_state=random_state,
                                    verbose=0)
    predict_len1=test_prediction(round_nums[data_pos],
                                 actual_numbers[data_pos],
                                 bonus[data_pos],
                                 predicted_numbers_set,
                                 n_estimators,
                                 random_state,
                                 verbose=1)
    data_pos=2
    h_data=lotto_history_data[r_pos[data_pos][0]:r_pos[data_pos][1]-minus_value]
    predicted_numbers_set = predict(lotto_history_data=h_data,
                                    n_estimators=n_estimators,
                                    random_state=random_state,
                                    verbose=0)
    predict_len2 = test_prediction(round_nums[data_pos],
                                   actual_numbers[data_pos],
                                   bonus[data_pos],
                                   predicted_numbers_set,
                                   n_estimators,
                                   random_state,
                                   verbose=1)
print(f'completed [now={datetime.now()}]')

In [None]:
# datas = [
#     [45,208000,4,3,5],
#     [45,101700,4,3,2],
#     [45,185400,4,3,2],
#     [45,96800,4,3,1],
#     [45,58600,3,3,0],
#     ]
datas = [
    (25, 451600, [4, 3], 6),
    (25, 489000, [4, 3], 6),
    (6, 473900, [4, 4], 6),
    (10, 113700, [4, 4], 6),
    (10, 15200, [3, 4], 7),
    ]
print(f'start [now={datetime.now()}]')
results=[]
for data in datas:
    n_estimators=data[0]
    random_state=data[1]
    minus_value=data[3]    
    data_pos=0
    h_data=lotto_history_data[r_pos[data_pos][0]:r_pos[data_pos][1]-minus_value]
    predicted_numbers_set = predict(lotto_history_data=h_data,
                                    n_estimators=n_estimators,
                                    random_state=random_state,
                                    verbose=0)
    results.append(predicted_numbers_set[0])
    predict_len2 = test_prediction(round_nums[data_pos],
                                   actual_numbers[1],
                                   bonus[data_pos],
                                   predicted_numbers_set,
                                   n_estimators,
                                   random_state,
                                   verbose=1)
print(f'completed [now={datetime.now()}]')
print('Predicted Numbers.')
for result in results:
    print(result)

In [None]:
print(results)

```
[
[45,208000,4,3,5],
[45,101700,4,3,2],
[45,101700,4,3,2],
[45,185400,4,3,2],
[45,96800,4,3,1],
    [45,58600,3,3,0],
]
```

In [None]:
n_estimators=10
random_state=5000
data_pos=1
h_data=lotto_history_data[r_pos[data_pos][0]:r_pos[data_pos][1]]
predicted_numbers_set = predict(lotto_history_data=h_data,
                                n_estimators=n_estimators,
                                random_state=random_state,
                                verbose=0
                               )
test_prediction(round_nums[data_pos],
                actual_numbers[data_pos],
                bonus[data_pos],
                predicted_numbers_set,
                n_estimators,
                random_state)
data_pos=2
h_data=lotto_history_data[r_pos[data_pos][0]:r_pos[data_pos][1]]
predicted_numbers_set = predict(lotto_history_data=h_data,
                                n_estimators=n_estimators,
                                random_state=random_state,
                                verbose=0)
test_prediction(round_nums[data_pos],
                actual_numbers[data_pos],
                bonus[data_pos],
                predicted_numbers_set,
                n_estimators,
                random_state)

In [None]:
[
    ([14, 17, 23, 27, 34, 36], [14, 17, 36], (45, 1000), (1160, 1173),
    ([13, 16, 23, 27, 34, 36], [36], (45, 1300), (1160, 1173)),
    ([11, 15, 21, 30, 36, 39], [11, 36, 39], (45, 1000), (1160, 1173),

]