In [7]:
import json
from pathlib import Path, PurePath # To define open and save locations that are cross-compatible between Windows/Linux

import betfairlightweight
from betfairlightweight import filters

import datetime

import pandas as pd
import numpy as np

from bz2 import BZ2File # To unzip the Betfair data from its downloaded format

from betfairlightweight import StreamListener
from betfairlightweight.streaming.stream import MarketStream

In [8]:
# logging in
project_dir = Path.cwd().parents[1]
logins_dir = project_dir / 'api_logins.json'

with open(logins_dir) as f:
    login_dict =  json.load(f)
    
trading = betfairlightweight.APIClient(username=login_dict['my_username'],
                                       password=login_dict['my_password'],
                                       app_key=login_dict['my_app_key'],
                                       certs=login_dict['certs_path'])

trading.login()

<LoginResource>

In [9]:
# returns list of 'data dictionaries'
data_dicts = trading.historic.get_my_data()
data_dicts;

In [10]:
# calculate range of dates for advanced data
adv_range = [d['forDate'] for d in data_dicts if d['plan'] == 'Advanced Plan']

In [11]:
# find min date for adv_data
adv_min_date = datetime.datetime.strptime(min(adv_range), '%Y-%m-%dT%H:%M:%S')

# find max data for adv data
def last_day_of_month(any_day):
    next_month = any_day.replace(day=28) + datetime.timedelta(days=4)  
    return next_month - datetime.timedelta(days=next_month.day)

adv_max_temp = datetime.datetime.strptime(max(adv_range), '%Y-%m-%dT%H:%M:%S')
adv_max_date = last_day_of_month(adv_max_temp) 

In [12]:
# list files within advanced data range (GB Data)
adv_file_list = trading.historic.get_file_list(
    "Horse Racing",
    "Advanced Plan",
    from_day=adv_min_date.day,
    from_month=adv_min_date.day,
    from_year=adv_min_date.year,
    to_day=adv_max_date.day,
    to_month=adv_max_date.month,
    to_year=adv_max_date.year,
    market_types_collection=["WIN"],
    countries_collection=["GB"],
    file_type_collection=["M"]
)
print("No. items :", len(adv_file_list))

No. items : 1858


In [13]:
# where to store our advanced data
adv_dir = project_dir / 'data' / 'raw' / 'api' / 'advanced'

In [None]:
# downloading advanced data to disk (eta 2 hours using %%time estimate for one download)
adv_file_dirs = [] 

for file in adv_file_list[0:5]:
    if file not in adv_dir:
        download = trading.historic.download_file(file_path = file, store_directory = adv_dir)
    else:
        pass

In [None]:
# processing all downloaded data

adv_extfile_dirs = []

for file in adv_file_dirs:
    zipfile = BZ2File(file) # open the file
    data = zipfile.read() # get the decompressed data
    newfilepath = file.split('.bz2')[0] # removing the extension and saving without a filetype
    open(newfilepath, 'wb').write(data) # write an uncompressed file
    adv_extfile_dirs.append(newfilepath)
    zipfile.close()
    
# try and except here

In [15]:
df

Unnamed: 0,Age,Age_units,AgeRange
0,99,Y,65+
1,53,Y,35-65
2,71,Y,65+
3,84,Y,65+
4,84,Y,65+


In [14]:
df = pd.DataFrame({'Age': [99, 53, 71, 84, 84],
                   'Age_units': ['Y', 'Y', 'Y', 'Y', 'Y']})

bins = [0, 2, 18, 35, 65]
names = ['<2', '2-18', '18-35', '35-65', '65+']

d = dict(enumerate(names, 1))

df['AgeRange'] = np.vectorize(d.get)(np.digitize(df['Age'], bins))

In [None]:
# creating dataframe with from processed data