In [2]:
import sys
sys.path.append('..')

In [3]:
import pandas as pd
import requests
import re
from bs4 import BeautifulSoup
from lib.progress_bar import progress_bar
import json
import sqlite3

In [4]:
lotto_result_url = "https://www.dhlottery.co.kr/gameResult.do?method=byWin"
lotto_result_draw = lotto_result_url + "&drwNo="
store_url = "https://www.dhlottery.co.kr/store.do?method=topStore&pageGubun=L645&drwNo="

In [5]:
def recentDraw():
    request = requests.get(lotto_result_url)
    soup = BeautifulSoup(request.text, "lxml")
    content = soup.find("meta", {"id" : "desc", "name" : "description"})['content']
    drawCount = re.compile(r'\d+회')
    mo = drawCount.search(content)
    return int(mo.group()[:-1])

In [6]:
mostRecent = recentDraw()
print(f'Latest Draw : {mostRecent}')

Latest Draw : 975


In [223]:
def collectDraws(lst, start=1, end=mostRecent):
    print("Collecting Draws...")
    for idx in range(start, end + 1):
        draw = {'draw': idx}

        req = requests.get(lotto_result_draw + str(idx))
        soup = BeautifulSoup(req.text, 'lxml')

        meta = soup.find("meta", {"id" : "desc", "name" : "description"})['content']
        first, last = re.compile(r'(\d+,){4}\d+'), re.compile('\d+\+\d+')
        first_mo, last_mo = first.search(meta), last.search(meta)
        draw['nums'] = list(map(int, first_mo.group().split(',') + last_mo.group().split('+')))

        total, winners = [], [0] * 5
        table = soup.find("table").find_all("td", {"class":"tar"})
        regex = re.compile(r'>\d+.*원')
        for i, c in enumerate(table):
            mo = regex.search(str(c))
            num = mo.group()[1:-1]
            num = int(num.replace(",", ""))
            if i % 2 == 0: total.append(num)
            elif num > 0: winners[i // 2] = total[-1] // num
        
        draw['winners'] = winners
        draw['price'] = total
        progress_bar(idx - start, end - start)
        lst.append(draw)
    progress_bar(end, end)
    print("Done Collecting Draws!")

In [188]:
draws = []
collectDraws(lst=draws, start=1)



In [190]:
print(f"We have collected {len(draws)} draws.")
print(f"First draw : {draws[0]}")
print(f"Last draw : {draws[-1]}")

We have collected 975 draws.
First draw : {'draw': 1, 'nums': [10, 23, 29, 33, 37, 40, 16], 'winners': [0, 1, 28, 2537, 40155], 'price': [0, 143934100, 143934000, 287695800, 401550000]}
Last draw : {'draw': 975, 'nums': [7, 8, 9, 17, 22, 24, 5], 'winners': [9, 61, 2828, 135604, 2212347], 'price': [21963693375, 3660615612, 3660616932, 6780200000, 11061735000]}


In [191]:
with open("draws.json", "w") as outfile: 
    json.dumps(draws, indent = 4)  
    json.dump(draws, outfile, indent = 4)

In [216]:
def parseStores(content, win = 1):
    stores = []
    rows = content.find_all("tr")
    reg, regGen = re.compile(r'>.*<'), re.compile(r'[가-힣]+')

    for r in rows:
      tds = r.find_all('td')[1:4]
      if win == 2: tds = tds[:-1]
      store = []
      for i, td in enumerate(tds):
        if win == 1 and i == 1: td = str(regGen.search(str(td)).group())
        else: td = str(reg.search(str(td)).group()[1:-1])
        td = td.strip()
        store.append(td)
      stores.append(store)
    return stores

In [221]:
##### 924회부터 제공
def collectLocations(lst, start=924, end=mostRecent):
    if start < 924:
        print("Stores information is available since 924th draw.")
        return
    print("Collecting Locations...")

    table_class = "tbl_data tbl_data_col"

    for idx in range(start, end + 1):
        locations = {'draw': idx}
        idx_url = store_url + str(idx)

        soup = BeautifulSoup(requests.get(idx_url).text, 'lxml')
        tables = soup.find_all("table", {"class":table_class})
        tables = [x.find('tbody') for x in tables]

        locations['first'] = parseStores(content = tables[0])
        secondLocations = parseStores(content = tables[1], win = 2)

        maxPage = len(soup.find('div', {"id": "page_box"}).find_all('a'))
        for p in range(2, maxPage + 1):
            soup = BeautifulSoup(requests.get(idx_url + "&nowPage=" + str(p)).text, 'lxml')
            tables = soup.find_all("table", {"class":table_class})
            tables = [x.find('tbody') if i == 1 else None for i, x in enumerate(tables)]
            secondLocations.extend(parseStores(content = tables[1], win = 2))
        locations['second'] = secondLocations
        
        progress_bar(idx - start, end - start)
        lst.append(locations)
    progress_bar(end, end)
    print("")
    print("Done Collecting Locations!")

In [222]:
locations = []
collectLocations(lst=locations)

Collecting Locations...


In [224]:
print(f"We have collected {len(locations)} set of locations.")
print(f"Last Set : {locations[-1]}")

We have collected 52 set of locations.
Last Set : {'draw': 975, 'first': [['로또구포점', '자동', '부산 북구 덕천2길 23-3 1층'], ['노다지복권방', '자동', '대전 서구 조달청길 62'], ['금강복권', '자동', '경기 김포시 율생로 3'], ['행운복권', '자동', '경기 수원시 권선구 권선로 774-1'], ['오복 복권', '자동', '강원 강릉시 율곡로 2807'], ['춘향로또', '자동', '전북 남원시 동림로 102-1 산들애 김밥'], ['로또행운마트', '수동', '경남 김해시 능동로 177 복권판매점'], ['황금돼지 마트(복권방)', '수동', '경남 김해시 금관대로599번길 29 부영9단지상가 108호'], ['본스튜디오', '자동', '제주 제주시 하귀로 111']], 'second': [['가로판매소', '서울 구로구 새말로 97 서울 구로구 새말로 117-24(신도림역 2출구)(구로동)'], ['대박복권방', '서울 금천구 독산로64길 18'], ['인터넷 복권판매사이트', '동행복권(dhlottery.co.kr)'], ['인터넷 복권판매사이트', '동행복권(dhlottery.co.kr)'], ['버스충전소', '서울 서초구 동작대로 126 가판'], ['연초2호 쇼케이스7호', '서울 서초구 신반포로 194 강남고속버스터미널 쇼케이스7호'], ['인터넷 복권판매사이트', '동행복권(dhlottery.co.kr)'], ['인터넷 복권판매사이트', '동행복권(dhlottery.co.kr)'], ['인터넷 복권판매사이트', '동행복권(dhlottery.co.kr)'], ['노다지복권방', '서울 양천구 월정로 137'], ['로또복권', '서울 영등포구 영중로  22 120호(영등포동3가)'], ['복권마을', '서울 영등포구 가마산로 421-2'], ['럭키슈퍼', '서울 용산구 후암로 52'], ['월드마켓', '서울 은평구 불광로 28 시장내 18호']

In [227]:
with open("stores.json", "w") as outfile: 
    json.dumps(locations, indent = 4)  
    json.dump(locations, outfile, indent = 4)

In [29]:
draws = []
stores = []
with open("stores.json", "r") as file:
    stores = json.load(file)
with open("draws.json", "r") as file:
    draws = json.load(file)

In [57]:
sql_draw_create = "CREATE TABLE IF NOT EXISTS `DRAWS`(turn int, num_1 int, num_2 int, num_3 int, num_4 int, num_5 int, num_6 int, num_bonus int)"
sql_draw = "INSERT INTO `DRAWS`(`turn`, `num_1`, `num_2`, `num_3`, `num_4`, `num_5`, `num_6`, `num_bonus`) " \
            "VALUES(?, ?, ?, ?, ?, ?, ?, ?)"
sql_winner_create = "CREATE TABLE IF NOT EXISTS `WINNERS`(turn int, winner_1 int, winner_2 int, winner_3 int, winner_4 int, winner_5 int)"
sql_winner = "INSERT INTO `WINNERS`(`turn`, `winner_1`, `winner_2`, `winner_3`, `winner_4`, `winner_5`) VALUES(?, ?, ?, ?, ?, ?)"
sql_prize_create = "CREATE TABLE IF NOT EXISTS `PRIZES`(turn int, prize_1 int, prize_2 int, prize_3 int, prize_4 int, prize_5 int)"
sql_prize = "INSERT INTO `PRIZES`(`turn`, `prize_1`, `prize_2`, `prize_3`, `prize_4`, `prize_5`) VALUES(?, ?, ?, ?, ?, ?)"
sql_store_create = "CREATE TABLE IF NOT EXISTS `STORES`(idx int, turn int, name varchar(255), auto BOOLEAN, firstPrize BOOLEAN, address varchar(255))"
sql_store = "INSERT INTO `STORES`(`idx`, `turn`, `name`, `auto`, `firstPrize`, `address`) VALUES(?, ?, ?, ?, ?, ?)"

In [58]:
def rawToDB(draws, stores, db_path):
    con = sqlite3.connect(db_path)
    cur = con.cursor()

    cur.execute(sql_draw_create)
    cur.execute(sql_winner_create)
    cur.execute(sql_prize_create)
    cur.execute(sql_store_create)

    for draw_info in draws:
        draw = draw_info["draw"]
        nums = draw_info["nums"]
        winners = draw_info["winners"]
        prizes = draw_info["price"]

        data = tuple([draw] + [int(n) for n in nums])
        cur.execute(sql_draw, data)
        data = tuple([draw] + [int(n) for n in winners])
        cur.execute(sql_winner, data)
        data = tuple([draw] + [int(n) for n in prizes])
        cur.execute(sql_prize, data)
    row_idx = 0
    for stores_info in stores:
        draw = stores_info["draw"] 
        for i in stores_info["first"]:
            data = tuple([row_idx, draw, str(i[0]), \
                1 if str(i[1]).strip() == "자동" else 0 
                , 1, str(i[-1])])
            cur.execute(sql_store, data)
            row_idx += 1
        for i in stores_info["second"]:
            data = tuple([row_idx, draw, str(i[0]), None, 0, str(i[-1])])
            cur.execute(sql_store, data)
            row_idx += 1
    con.commit()
    con.close()

In [61]:
db_path = "lottery.db"
rawToDB(draws, stores, db_path)