In [10]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
from typing import Dict, List
import pyprind
from math import cos, pi, floor
from datetime import datetime

In [11]:
def parse_challenge(page):
    """
    Parse a challenge given by mmi and mavat's web servers, forcing us to solve
    some math stuff and send the result as a header to actually get the page.
    This logic is pretty much copied from https://github.com/R3dy/jigsaw-rails/blob/master/lib/breakbot.rb
    """
    top = page.split('<script>')[1].split('\n')
    challenge = top[1].split(';')[0].split('=')[1]
    challenge_id = top[2].split(';')[0].split('=')[1]
    return {'challenge': challenge, 'challenge_id': challenge_id, 'challenge_result': get_challenge_answer(challenge)}

def telegram_bot_sendtext(bot_message: str) -> None:
    """
    Send telegram msg for my bot
    """
    bot_token = '1172952527:AAGoM74Rx25DPBpmQhEwacs_AQ9GWI8Oybk'
    chat_id = "839266998"
    send_text = 'https://api.telegram.org/bot' + bot_token + '/sendMessage?chat_id=' + chat_id + '&parse_mode=Markdown&text=' + bot_message
    requests.get(send_text)

def get_challenge_answer(challenge):
    """
    Solve the math part of the challenge and get the result
    """
    arr = list(challenge)
    last_digit = int(arr[-1])
    arr.sort()
    min_digit = int(arr[0])
    subvar1 = (2 * int(arr[2])) + int(arr[1])
    subvar2 = str(2 * int(arr[2])) + arr[1]
    power = ((int(arr[0]) * 1) + 2) ** int(arr[1])
    x = (int(challenge) * 3 + subvar1)
    y = cos(pi * subvar1)
    answer = x * y
    answer -= power
    answer += (min_digit - last_digit)
    answer = str(int(floor(answer))) + subvar2
    return answer

def make_hkjc_request(url: str) -> str:
    headers = {'User-agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'}
    with requests.Session() as s:
        r = s.get(url, headers = headers)
        if 'X-AA-Challenge' in r.text:
            challenge = parse_challenge(r.text)
            cookies = s.get(url, headers={
                'X-AA-Challenge': challenge['challenge'],
                'X-AA-Challenge-ID': challenge['challenge_id'],
                'X-AA-Challenge-Result': challenge['challenge_result']
            }).cookies  
        else:
            cookies = r.cookies

        r = s.post(url, cookies = cookies)

    return r.text

def evaluate_course(title):
    clean_title = title.replace(" ","").lower()
    if "shatin" in clean_title:
        return "ST"
    
    elif "happyvalley" in clean_title:
        return "HV"
    
    else:
        return None


def scrape_prize_pool(date) -> pd.DataFrame:
    df = pd.DataFrame()
    
    html_list = []
    # first race
    url = f"https://racing.hkjc.com/racing/information/english/Racing/LocalResults.aspx?RaceDate={date}"
    first_race = make_hkjc_request(url)
    soup = BeautifulSoup(first_race, "lxml")
    html_list.append(soup)

    # extract info
    num_of_races = len(soup.find("table", attrs = {"class": re.compile("f_fs12.+racecard$")}).find_all("a"))
    course = evaluate_course(str(soup.find("span", attrs = {"class": "f_fl f_fs13"})))
    #overhead_text = soup.find("tr", attrs = {"class": re.compile("bg_blue.+font_wb$")}).get_text()
    #first_race_no = re.search("\((\d+)\)", overhead_text).group(1)

    # extract info for all other races and append to html_list
    for i in range(2, num_of_races):
        url = f"https://racing.hkjc.com/racing/information/english/Racing/LocalResults.aspx?RaceDate={date}&Racecourse={course}&RaceNo={str(i)}"
        race_info = make_hkjc_request(url)
        soup = BeautifulSoup(race_info, "lxml")
        html_list.append(soup)

    # get prize pool and race_number
    for soup in html_list:
        try:
            df_temp = pd.DataFrame(columns = ["race_no", "date", "incident_report"])
            overhead_text = soup.find("tr", attrs = {"class": re.compile("bg_blue.+font_wb$")}).get_text()
            race_no = re.search("\((\d+)\)", overhead_text).group(1)
            df_temp["race_no"] = race_no
            df_temp["date"] = date
            try:
                df_temp["incident_report"] = soup.find("div", attrs = {"class": "race_incident_report"}).get_text()
            except AttributeError:
                df_temp["incident_report"] = ""
            
            for col in df_temp.columns:
                df_temp[col] = [str(x) for x in df_temp[col]]

            df = df.append(df_temp)
            
        except ValueError:
            pass

    return df

def remove_scraped_date(date_list: list) -> list:
    """
    read status.txt and remove those already been successfully scraped
    return a filtered horse_id_list for future loops
    """
    with open("status_incident_report.txt","r",encoding = 'utf-8') as txt:
        status = txt.readlines()
        status = [x.replace('\n','') for x in status]
    date_list = [x for x in date_list if x not in status]
    return date_list

def update_status(date: str) -> None:
    with open("status_incident_report.txt","a+",encoding = 'utf-8') as txt:
        txt.write(date+"\n")



In [None]:
if __name__ == "__main__":
    # fetch date list
    telegram_bot_sendtext(f"Prize pool scraper program started at {str(datetime.now())}.")
    error_list = []
    date_list = list(pd.read_pickle("1_horse_id_data.pkl")["date"])
    date_list = remove_scraped_date(date_list)

    print(f"Total {str(len(date_list))} race days.")
    bar = pyprind.ProgBar(len(date_list))
    
    for date in date_list:
        try:
            status_int = 0
            df = scrape_prize_pool(date)            

            # write to hd5 -> hd5 defaults to append mode
            df.to_hdf(r"data_20200822\incident_report.h5", key = "incident_report", append = True, format = "table")
            
            # report status to telegram
            if status_int % 500 == 0 and status_int > 0:
                telegram_bot_sendtext(f"Incident report scraper - completed {str(status_int)} out of {str(len(date_list))} at {str(datetime.now())} with {len(error_list)} errors.")

            bar.update()
            update_status(date)
            status_int += 1

        except Exception as e:
            status_int +=1
            print(f"{date}: {str(e)}")
            error_list.append((date, str(e)))
            bar.update()

Total 1611 race days.
2002/09/01: 'NoneType' object has no attribute 'get_text'


0% [###                           ] 100% | ETA: 14:27:31

2008/03/30: 'NoneType' object has no attribute 'get_text'
2008/12/28: 'NoneType' object has no attribute 'get_text'


0% [####                          ] 100% | ETA: 17:09:21

2009/05/17: 'NoneType' object has no attribute 'get_text'
2009/10/04: 'NoneType' object has no attribute 'get_text'
2010/02/07: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2010/02/07&Racecourse=ST&RaceNo=2 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835520A0>: Failed to establish a new connection: [WinError 10053] An established connection was aborted by the software in your host machine'))
2010/02/21: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2010/02/21 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835528E0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2010/02/28: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/informatio

0% [#####                         ] 100% | ETA: 15:22:50

2010/10/17: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2010/10/17 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835528B0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2010/10/24: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2010/10/24 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835526A0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2010/10/31: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2010/10/31 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835527C0>: Failed to establish a new connection: [Errno 

0% [######                        ] 100% | ETA: 12:17:40


2012/05/27: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2012/05/27 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835529D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2012/06/03: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2012/06/03 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835525B0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2012/06/17: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2012/06/17 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835526A0>: Failed to establish a new connection: [Errno

0% [#######                       ] 100% | ETA: 54:17:56

2014/04/13: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2014/04/13 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835390D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2014/04/27: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2014/04/27 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298353B0D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2014/05/04: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2014/05/04 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835390D0>: Failed to establish a new connection: [Errno 

0% [########                      ] 100% | ETA: 45:24:22

2016/01/24: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2016/01/24 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835590D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2016/02/14: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2016/02/14 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835360D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2016/02/21: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2016/02/21 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835590D0>: Failed to establish a new connection: [Errno 

0% [#########                     ] 100% | ETA: 38:29:54

2017/05/28: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2017/05/28 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000022989C6FBE0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2017/06/04: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2017/06/04 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835580D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2017/06/11: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2017/06/11 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835570D0>: Failed to establish a new connection: [Errno 

0% [##########                    ] 100% | ETA: 32:58:41


2019/03/10: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2019/03/10 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000022983550100>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2019/03/17: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2019/03/17 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298355A100>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2019/03/24: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2019/03/24 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000022983550100>: Failed to establish a new connection: [Errno

0% [###########                   ] 100% | ETA: 28:32:20

2020/06/21: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2020/06/21 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298362E130>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2020/07/05: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2020/07/05 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000022983607130>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2020/07/12: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2020/07/12 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298362E130>: Failed to establish a new connection: [Errno 

0% [############                  ] 100% | ETA: 24:45:57

2000/10/14: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2000/10/14 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000022983550130>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2000/10/18: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2000/10/18 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298360E130>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2000/10/28: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2000/10/28 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298360E130>: Failed to establish a new connection: [Errno 

0% [#############                 ] 100% | ETA: 21:34:32

2001/10/13: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2001/10/13 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298361C160>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2001/10/17: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2001/10/17 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298361C160>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2001/10/27: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2001/10/27 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298361C160>: Failed to establish a new connection: [Errno 

0% [##############                ] 100% | ETA: 18:53:24

2002/09/14: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2002/09/14 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000022983624160>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2002/09/18: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2002/09/18 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298362A130>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2002/09/25: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2002/09/25 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298362A130>: Failed to establish a new connection: [Errno 

0% [###############               ] 100% | ETA: 16:31:00


2003/06/18: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2003/06/18 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298361D040>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2003/09/06: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2003/09/06 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298361D040>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2003/09/10: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2003/09/10 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298361D040>: Failed to establish a new connection: [Errno

0% [################              ] 100% | ETA: 14:26:30

2004/09/25: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2004/09/25 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298360C130>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2004/09/29: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2004/09/29 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298360F130>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2004/10/06: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2004/10/06 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002298360C130>: Failed to establish a new connection: [Errno 

0% [#################             ] 100% | ETA: 12:38:38


2005/06/22: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2005/06/22 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000022983600130>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2005/06/29: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2005/06/29 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835F6130>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2005/09/07: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2005/09/07 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000022983600130>: Failed to establish a new connection: [Errno

0% [##################            ] 100% | ETA: 11:00:53


2006/11/22: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2006/11/22 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000022989B8D130>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2006/11/29: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2006/11/29 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000022989B8D130>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2006/12/02: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2006/12/02 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000022989B8D130>: Failed to establish a new connection: [Errno

0% [###################           ] 100% | ETA: 09:33:28

2007/12/26: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2007/12/26 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000022989B6A130>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2008/01/09: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2008/01/09 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000229835F6130>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
2008/01/12: HTTPSConnectionPool(host='racing.hkjc.com', port=443): Max retries exceeded with url: /racing/information/english/Racing/LocalResults.aspx?RaceDate=2008/01/12 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000022983629130>: Failed to establish a new connection: [Errno 