Automatic Update/Collect Draws

1. Request 50 Draws each time of request (max_permitted)
2. Check the response
3. Save it in json()
4. Parsing and store the data
5. Check the Update Interval (date?) -> Request the new draw at the right time.
6. ...

### Why drawrange over daterange?
drawrange allows upto 50 and 24 at once while daterange allows 90 days.

Since the lottery is weekly-drawn, daterange allows to get maximum 12 draws per request.

### About MonWedLotto?
MonWedLotto use the same machine, and draws twice weekly; mon and wed.

Since the purpose of this project is to analyze the tendency of a "machine" I did not separated two draws. 

Even draw Nums are Monday

Odd  draw Nums are Wed

### Why 2012 Jan?
1. API supports draws starting from different date.
2. Apply fairly "recent" tendencies.

In [260]:
import requests
import pandas as pd
from enum import Enum
import time
from datetime import date
from requests.exceptions import Timeout

In [261]:
# Classes - Different Lottery has diff rules

class Lotto:
    def __init__(self, game_name, draw_num):
        self.game_name = game_name
        self.draw_num = draw_num
        self.interval = 7
        self.last_draw = 0
        
    def update_last_drawn(self, string_date: str):
        self.last_draw = pd.to_datetime(string_date)
        print("Updated the Lotto Last Drawn Date to: ", self.last_draw)

    def is_time_to_crawl(self):
        return pd.to_datetime(date.today()) >= self.last_draw + pd.DateOffset(7)

In [262]:
# Basic Parameter Setup
# Two Lotto shares the same data.api url as following
url = 'https://data.api.thelott.com/sales/vmax/web/data/lotto/results/search/drawrange'

# Payload Setup 
payload_template = {'MinDrawNo': None, 'MaxDrawNo': None, 'Product': None, 'CompanyFilter': ["NSWLotteries"]}

In [263]:
# Create request payload in json format (dict)
# Request 50 draws at one time (api request limit)
def create_payload(game: Lotto) -> dict:
    payload_template = {'MinDrawNo': None, 'MaxDrawNo': None, 'Product': None, 'CompanyFilter': ["NSWLotteries"]}
    ret_payload = payload_template.copy()
    ret_payload['MinDrawNo'] = int(game.draw_num)
    ret_payload['MaxDrawNo'] = int(game.draw_num + 49)
    ret_payload['Product'] = game.game_name
    return ret_payload

In [264]:
# request Setup
wed_data = pd.DataFrame()
sat_data = pd.DataFrame()

def do_request(url, payload):
    try:
        print(payload)
        r = requests.post(url, json = payload, timeout = (2,5))
    except Timeout:
        print('Request', payload['MinDrawNo'], 'TimeOut')
        return
    else:
        if r.status_code == 200:
            print('Request', payload['MinDrawNo'], 'Success')
        else:
            print('Request ', payload['MinDrawNo'], 'Failed - ', r.status_code)
            return
    
    response = r.json()
    
    if response['ErrorInfo'] != None:
        print("Error - May have issues with parameter of payload")
        print(payload)
        return
    
    return response

def interpret_response(data, response):
    draws = pd.DataFrame.from_dict(response['Draws'])
    try:
        data = pd.concat([draws, data])
    except:
        print("parse_response: Data Append Failed")
        return
    else:
        print("parse_response: Data Append Success")
        return data, draws['DrawNumber'][0]

In [265]:
# Website has no previous draws for MonWedLotto
startNo_monWed = 3108 # 2012 Jan 30
startNo_sat = 3185 # 2012 Jan 28
wed_game = Lotto('MonWedLotto', startNo_monWed)
sat_game = Lotto('TattsLotto', startNo_sat)

In [266]:
# Script Function

def crawl_all(wed_data, wed_game: Lotto, sat_data, sat_game: Lotto):
    recent_draws = 0
    wed_crawl_done = False
    while not wed_crawl_done:
        wed_payload = create_payload(wed_game)
        response = do_request(url, wed_payload)
        if len(response['Draws']) == 0 or response == None:
            wed_crawl_done = True
        else:
            wed_data, recent_draws = interpret_response(wed_data, response)
            print("MonWed Draws from: ", wed_game.draw_num, "to", recent_draws)
            wed_game.draw_num = recent_draws + 1
            time.sleep(2)
            
    print("monWed Draws Completed upto", recent_draws)
    print("Now Moving on to Sat Draws")
    
    sat_crawl_done = False
    while not sat_crawl_done:
        sat_payload = create_payload(sat_game)
        response = do_request(url, sat_payload)
        if  len(response['Draws']) == 0 or response == None:
            sat_crawl_done = True
        else:
            sat_data, recent_draws = interpret_response(sat_data, response)
            print("Sat Draws from: ", sat_game.draw_num, "to", recent_draws)
            sat_game.draw_num = recent_draws + 1
            time.sleep(2)            
    
    print("sat Draws Completed upto", recent_draws)
    print("Now Wrapping up crawlling")
    
    if len(wed_data) != 0:
        date = wed_data['DrawDate'].head(1).values[0]
        wed_game.update_last_drawn(date)
    
    if len(sat_data) != 0:
        date = sat_data['DrawDate'].head(1).values[0]
        sat_game.update_last_drawn(date)
    
    return wed_data, wed_game, sat_data, sat_game

In [267]:
wed_data, wed_game, sat_data, sat_game = crawl_all(wed_data, wed_game, sat_data, sat_game)

{'MinDrawNo': 3108, 'MaxDrawNo': 3157, 'Product': 'MonWedLotto', 'CompanyFilter': ['NSWLotteries']}
Request 3108 Success
parse_response: Data Append Success
MonWed Draws from:  3108 to 3157
{'MinDrawNo': 3158, 'MaxDrawNo': 3207, 'Product': 'MonWedLotto', 'CompanyFilter': ['NSWLotteries']}
Request 3158 Success
parse_response: Data Append Success
MonWed Draws from:  3158 to 3207
{'MinDrawNo': 3208, 'MaxDrawNo': 3257, 'Product': 'MonWedLotto', 'CompanyFilter': ['NSWLotteries']}
Request 3208 Success
parse_response: Data Append Success
MonWed Draws from:  3208 to 3257
{'MinDrawNo': 3258, 'MaxDrawNo': 3307, 'Product': 'MonWedLotto', 'CompanyFilter': ['NSWLotteries']}
Request 3258 Success
parse_response: Data Append Success
MonWed Draws from:  3258 to 3307
{'MinDrawNo': 3308, 'MaxDrawNo': 3357, 'Product': 'MonWedLotto', 'CompanyFilter': ['NSWLotteries']}
Request 3308 Success
parse_response: Data Append Success
MonWed Draws from:  3308 to 3357
{'MinDrawNo': 3358, 'MaxDrawNo': 3407, 'Product': 

In [270]:
# Write to csv
wed_data.to_csv('Crawled_MonWed.csv', encoding='utf-8', index=False)
sat_data.to_csv('Crawled_Sat.csv', encoding='utf-8', index=False)