In [None]:
import re
import pandas as pd
import numpy as np

from argparse import ArgumentParser
from tqdm.autonotebook import tqdm

from lib import request as req
from lib import database as db

In [None]:
def get_loto_numbers(url):
    ret = {}
    df  = req.get_dataframe(url)
    for d in df:
        times   = d.columns[1]
        times   = int(times[1:-1])
        date    = d.loc[0,:].values[1]
        numbers = d.loc[1,:].values[1:]
        bonus   = list(filter(lambda x: x[0] == '(' and x[-1] == ')', numbers))
        numbers = list(filter(lambda x: str.isdecimal(x), numbers))
        if len(bonus) == 0:
            bonus = d.loc[2,:].values[1:]
            bonus = list(filter(lambda x: type(x) is str, bonus))
        
        ret[times] = {
            'times'   : times,
            'date'    : date,
            'numbers' : [int(n) for n in numbers],
            'bonus'   : [int(b[1:-1]) for b in bonus],
        }
    return ret

In [None]:
def get_loto_detail_numbers(kind, url):

    num = (7 if kind == 'loto7' else (6 if kind == 'loto6' else 5))

    ret = {}
    df  = req.get_dataframe(url)
    df  = df[0]
    for i in range(len(df)):
        v     = df.loc[i,:].values
        times = int(v[0][1:-1])
        ret[times] = {
            'times'   : times,
            'date'    : v[1],
            'numbers' : list(v[2: 2 + num]),
            'bonus'   : list(v[2 + num:]),
        }
    return ret

In [None]:
def get_option():
    a = ArgumentParser()    
    a.add_argument('-k', '--kind', choices = ['mini', 'loto6', 'loto7'], required = True)
    a = a.parse_args()
    return a

In [None]:
a = get_option()
kind = a.kind
db.delete_loto(kind)

In [None]:
domein = 'https://takarakuji.rakuten.co.jp'

url  = domein + "/backnumber/" + kind + "_past/"
soup = req.get_page(url)

href = soup.find_all(href=re.compile("^/backnumber/" + kind + "/[\d+]"))
href = [domein + h['href'] for h in href]

url = "^/backnumber/" + ("miniloto" if kind == "mini" else kind) + "_detail/[\d+]"
d_href = soup.find_all(href = re.compile(url))
href  += [domein + h['href'] for h in d_href]

loto = {}
for i in tqdm(range(len(href))):
    if '_detail/' in href[i]:
        loto.update(get_loto_detail_numbers(kind, href[i]))
    else:
        loto.update(get_loto_numbers(href[i]))

In [None]:
df = pd.DataFrame(columns = ['kind', 'times', 'date', 'numbers', 'bonus'])

for k,l in loto.items():
    df = df.append(
        pd.Series(
            [kind, l['times'], l['date'], ','.join(map(str,l['numbers'])), ','.join(map(str,l['bonus']))], 
            index = df.columns
        ), 
        ignore_index = True
    )

df.sort_values('times', inplace = True)
df.reset_index(drop = True, inplace = True)

db.add_loto(df)