In [4]:
import bz2

file = "Data/1.225457316.bz2"

with bz2.open(file, 'rb') as fp:
    content = fp.read()
print(content)


b'{"op":"mcm","clk":"10791826256","pt":1709183865565,"mc":[{"id":"1.225457316","marketDefinition":{"bspMarket":true,"turnInPlayEnabled":true,"persistenceEnabled":true,"marketBaseRate":10.0,"eventId":"33063008","eventTypeId":"7","numberOfWinners":1,"bettingType":"ODDS","marketType":"WIN","marketTime":"2024-03-01T02:00:00.000Z","suspendTime":"2024-03-01T02:00:00.000Z","bspReconciled":false,"complete":true,"inPlay":false,"crossMatching":false,"runnersVoidable":false,"numberOfActiveRunners":7,"betDelay":0,"status":"OPEN","runners":[{"adjustmentFactor":2.34,"status":"ACTIVE","sortPriority":1,"id":66543008,"name":"1. Bonnyville"},{"adjustmentFactor":8.29,"status":"ACTIVE","sortPriority":2,"id":66543009,"name":"2. King Lane"},{"adjustmentFactor":43.73,"status":"ACTIVE","sortPriority":3,"id":66543010,"name":"3. Love The Cube"},{"adjustmentFactor":22.53,"status":"ACTIVE","sortPriority":4,"id":2161990,"name":"4. Reluctance"},{"adjustmentFactor":19.45,"status":"ACTIVE","sortPriority":5,"id":50610

In [7]:
import pandas as pd
import json

decoded_data = content.decode('utf-8')
json_strings = decoded_data.strip().split('\n')
parsed_data = [json.loads(jstr) for jstr in json_strings]

In [15]:
market_definitions = []

for entry in parsed_data:
    for mc_entry in entry.get('mc', []):
        market_def = mc_entry.get('marketDefinition')
        if market_def:
            for runner in market_def.get('runners', []):
                market_definitions.append({
                    'Market ID': mc_entry['id'],
                    'Market Status': market_def.get('status', 'N/A'),
                    #'Event Type ID': market_def.get('eventTypeId', 'N/A'),
                    #'Event ID': market_def.get('eventId', 'N/A'),
                    'Market Time': market_def.get('marketTime', 'N/A'),
                    'Suspend Time': market_def.get('suspendTime', 'N/A'),
                    'Venue': market_def.get('venue', 'N/A'),
                    #'Country Code': market_def.get('countryCode', 'N/A'),
                    #'Timezone': market_def.get('timezone', 'N/A'),
                    #'Market Type': market_def.get('marketType', 'N/A'),
                    #'Betting Type': market_def.get('bettingType', 'N/A'),
                    #'Number of Winners': market_def.get('numberOfWinners', 'N/A'),
                    'Runner Name': runner.get('name', 'N/A'),
                    'Runner ID': runner.get('id', 'N/A'),
                    'Runner Status': runner.get('status', 'N/A'),
                    'Adjustment Factor': runner.get('adjustmentFactor', 'N/A'),
                    #'Sort Priority': runner.get('sortPriority', 'N/A')
                })
df = pd.DataFrame(market_definitions)

print(df.head())


     Market ID Market Status               Market Time  \
0  1.225457316          OPEN  2024-03-01T02:00:00.000Z   
1  1.225457316          OPEN  2024-03-01T02:00:00.000Z   
2  1.225457316          OPEN  2024-03-01T02:00:00.000Z   
3  1.225457316          OPEN  2024-03-01T02:00:00.000Z   
4  1.225457316          OPEN  2024-03-01T02:00:00.000Z   

               Suspend Time     Venue       Runner Name  Runner ID  \
0  2024-03-01T02:00:00.000Z  Goulburn     1. Bonnyville   66543008   
1  2024-03-01T02:00:00.000Z  Goulburn      2. King Lane   66543009   
2  2024-03-01T02:00:00.000Z  Goulburn  3. Love The Cube   66543010   
3  2024-03-01T02:00:00.000Z  Goulburn     4. Reluctance    2161990   
4  2024-03-01T02:00:00.000Z  Goulburn        5. Zhivago    5061015   

  Runner Status  Adjustment Factor  
0        ACTIVE               2.34  
1        ACTIVE               8.29  
2        ACTIVE              43.73  
3        ACTIVE              22.53  
4        ACTIVE              19.45  


In [19]:
trading_data = []

for jstr in json_strings:
    parsed_json = json.loads(jstr)
    timestamp = parsed_json.get('pt', None)
    
    if 'mc' in parsed_json:
        for mc_entry in parsed_json['mc']:
            
            if 'rc' in mc_entry:
                for rc_entry in mc_entry['rc']:
                    runner_id = rc_entry.get('id')
                    atb = rc_entry.get('atb', [])
                    if atb:
                        odds, size = atb[0]
                        trading_data.append({
                            'timestamp': timestamp,
                            'runner_id': runner_id,
                            'atb': [odds, size],
                        })

In [16]:
from datetime import datetime


      Market ID Market Status               Market Time  \
0   1.225457316          OPEN  2024-03-01T02:00:00.000Z   
1   1.225457316          OPEN  2024-03-01T02:00:00.000Z   
2   1.225457316          OPEN  2024-03-01T02:00:00.000Z   
3   1.225457316          OPEN  2024-03-01T02:00:00.000Z   
4   1.225457316          OPEN  2024-03-01T02:00:00.000Z   
5   1.225457316          OPEN  2024-03-01T02:00:00.000Z   
6   1.225457316          OPEN  2024-03-01T02:00:00.000Z   
7   1.225457316     SUSPENDED  2024-03-01T02:00:00.000Z   
8   1.225457316     SUSPENDED  2024-03-01T02:00:00.000Z   
9   1.225457316     SUSPENDED  2024-03-01T02:00:00.000Z   
10  1.225457316     SUSPENDED  2024-03-01T02:00:00.000Z   
11  1.225457316     SUSPENDED  2024-03-01T02:00:00.000Z   
12  1.225457316     SUSPENDED  2024-03-01T02:00:00.000Z   
13  1.225457316     SUSPENDED  2024-03-01T02:00:00.000Z   
14  1.225457316     SUSPENDED  2024-03-01T02:00:00.000Z   
15  1.225457316     SUSPENDED  2024-03-01T02:00:00.000Z 