In [None]:
import pandas as pd
import numpy as np
import requests
from tqdm import tqdm
import json
import gc

def getLotto(minDate, maxDate):
  lotto_dict = {}
  empty_df = pd.DataFrame()
  for i in tqdm(range(minDate, maxDate+1)):
    request_url = 'https://www.dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(i)
    request_lotto = requests.get(request_url)
    lotto_info = request_lotto.json()
    lotto_dict['drwtNo1'] = lotto_info['drwtNo1']
    lotto_dict['drwtNo2'] = lotto_info['drwtNo2']
    lotto_dict['drwtNo3'] = lotto_info['drwtNo3']
    lotto_dict['drwtNo4'] = lotto_info['drwtNo4']
    lotto_dict['drwtNo5'] = lotto_info['drwtNo5']
    lotto_dict['drwtNo6'] = lotto_info['drwtNo6']
    lotto_dict['bnusNo'] = lotto_info['bnusNo']

    lotto_df = pd.DataFrame(lotto_dict, index = [i])
    empty_df = pd.concat([empty_df, lotto_df], axis = 0)
  
  result_df = empty_df.copy()
  del empty_df, lotto_df, lotto_dict
  gc.collect()
  return result_df

In [None]:
firstDate = 1
lastDate = 100
lotto_df = getLotto(firstDate, lastDate)

100%|██████████| 100/100 [01:23<00:00,  1.20it/s]


In [None]:
def numbers2onbin(numbers):
  onbin = np.zeros(45)
  for i in range(6):
    onbin[int(numbers[i])-1] = 1
  return onbin

def onbin2numbers(onbin):
  numbers = []
  for i in range(len(onbin)):
    if onbin[i] == 1.0:
      numbers.append(i+1)
  return numbers

numbers = lotto_df[lotto_df.columns[:-1]].values
onbins = list(map(numbers2onbin, numbers))

In [None]:
onbins

[array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0.,
        0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0.,
        0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0.,
        0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 1., 0., 0., 0.,
        0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
        0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 

In [103]:
X_samples = onbins[0:lastDate-1]
y_samples = onbins[1:lastDate]

train_idx = (firstDate, int(lastDate*0.8))
valid_idx = (int(lastDate*0.8) + 1, int(lastDate * 0.9))
test_idx = (int(lastDate * 0.9) + 1, lastDate)

In [104]:
train_idx

(1, 80)

In [105]:
valid_idx

(81, 90)

In [106]:
test_idx

(91, 100)

In [111]:
import torch
import torch.nn as nn
import torch.optim as optim

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.dense = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.dense(out[:, -1, :])
        out = self.sigmoid(out)
        return out

num_epochs = 10
input_size = 45
hidden_size = 128
output_size = 45
model = LSTMModel(input_size, hidden_size, output_size)
optimizer = optim.Adam(model.parameters(), lr = 0.01)
criterion = nn.CrossEntropyLoss()

train_loss = []
train_acc = []
val_loss = []
val_acc = []

for epoch in range(num_epochs):
    model.train()
    batch_train_loss = []
    batch_train_acc = []
    
    for i in range(train_idx[0], train_idx[1]):
        xs = torch.Tensor(X_samples[i]).unsqueeze(0).unsqueeze(0)
        ys = torch.Tensor(y_samples[i]).unsqueeze(0)

        optimizer.zero_grad()
        outputs = model(xs)
        
        loss = criterion(outputs, ys)
        loss.backward()
        optimizer.step()
        
        batch_train_loss.append(loss.item())
        acc = ((outputs > 0.5).float() == ys).sum().item() / ys.numel()
        batch_train_acc.append(acc)
        
    train_loss.append(np.mean(batch_train_loss))
    train_acc.append(np.mean(batch_train_acc))
    
    model.eval()
    batch_val_loss = []
    batch_val_acc = []
    
    for i in range(train_idx[0], train_idx[1]):
        xs = torch.Tensor(X_samples[i]).unsqueeze(0).unsqueeze(0)
        ys = torch.Tensor(y_samples[i]).unsqueeze(0)
        
        with torch.no_grad():
            outputs = model(xs)
            loss = criterion(outputs, ys)
        
        batch_val_loss.append(loss.item())
        acc = ((outputs > 0.5).float() == ys).sum().item() / ys.numel()
        batch_val_acc.append(acc)
    
    val_loss.append(np.mean(batch_val_loss))
    val_acc.append(np.mean(batch_val_acc))
    
    print('epoch {0:4d} train acc {1:0.3f} train loss {2:0.3f} val acc {3:0.3f} val loss {4:0.3f}'.format(
        epoch, np.mean(train_acc), np.mean(train_loss), np.mean(val_acc), np.mean(val_loss))
    )

epoch    0 train acc 0.601 train loss 22.866 val acc 0.671 val loss 22.365
epoch    1 train acc 0.627 train loss 22.572 val acc 0.655 val loss 21.867
epoch    2 train acc 0.640 train loss 22.189 val acc 0.671 val loss 21.457
epoch    3 train acc 0.661 train loss 21.811 val acc 0.699 val loss 21.106
epoch    4 train acc 0.688 train loss 21.463 val acc 0.726 val loss 20.808
epoch    5 train acc 0.716 train loss 21.157 val acc 0.755 val loss 20.554
epoch    6 train acc 0.741 train loss 20.896 val acc 0.779 val loss 20.335
epoch    7 train acc 0.763 train loss 20.670 val acc 0.799 val loss 20.147
epoch    8 train acc 0.783 train loss 20.469 val acc 0.815 val loss 19.995
epoch    9 train acc 0.800 train loss 20.295 val acc 0.829 val loss 19.862


In [115]:
def gen_numbers_from_probability(nums_prob):
  ball_box = []
  for n in range(45):
    ball_count = int(nums_prob[n] * 100 + 1)
    ball = np.full((ball_count), n + 1)
    ball_box += list(ball)
  
  selected_balls = []

  while True:
    if len(selected_balls) == 6:
      break

    ball_index = index = np.random.randint(len(ball_box), size = 1)[0]
    ball = ball_box[ball_index]

    if ball not in selected_balls:
      selected_balls.append(ball)

    return selected_balls

In [151]:
xs = X_samples[-1].reshape(1, 1, 45)
xs = torch.Tensor(xs)

ys_pred = model(xs)
list_numbers = []

print('번호 추첨')
for n in range(5):
  numbers = gen_numbers_from_probability(ys_pred[0])
  numbers.sort()
  print(f'{numbers[0]}', end = ' ')

번호 추첨
15 29 10 32 40 