In [None]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import time
from dotenv import load_dotenv
import os

load_dotenv()

# API settings
API_KEY = os.environ.get('HISTORICAL_API_KEY')
BASE_URL = "https://api.the-odds-api.com/v4/historical/sports/basketball_nba"

# Function to get formatted date string
def get_date_string(date):
    return date.strftime('%Y-%m-%dT21:00:00Z')  # 5:00 PM ET

def get_csv_date(date):
    return date.strftime('%m_%d_%Y')

# Start with 30 days of data
start_date = datetime(2024, 12, 16)  # Adjust start date as needed
days_to_collect = 15

all_days_props = []  # Store all days' data

for day in range(days_to_collect):
    current_date = start_date + timedelta(days=day)
    DATE = get_date_string(current_date)
    event_ids = []
    
    print(f"\nFetching data for {current_date.date()}")
    
    # 1) List events for the day
    events_resp = requests.get(
        f"{BASE_URL}/events",
        params={
            "apiKey": API_KEY,
            "date": DATE
        }
    )
    
    if events_resp.status_code != 200:
        print(f"Error fetching events: HTTP {events_resp.status_code}")
        print(events_resp.text)
        continue
        
    # Parse JSON and get events
    resp_json = events_resp.json()
    events = resp_json if isinstance(resp_json, list) else resp_json.get("data", [])
    
    # Get event IDs
    for ev in events:
        ev_id = ev.get("id") or ev.get("event_id")
        event_ids.append(ev_id)
        print(f"Found game: {ev.get('away_team')} at {ev.get('home_team')}")
    
    # 2) Get odds for each event
    day_props = []  # Store this day's data
    
    for id in event_ids:
        url = f'{BASE_URL}/events/{id}/odds'
        
        params = {
            'apiKey': API_KEY,
            'date': DATE,
            'regions': 'us_dfs',
            'markets': 'player_points,player_rebounds,player_assists',
            'oddsFormat': 'american',
            'dateFormat': 'iso'
        }
        
        response = requests.get(url, params=params)
        
        if response.status_code == 200:
            data = response.json()
            props = []
            
            game_info = {
                'home_team': data['data']['home_team'],
                'away_team': data['data']['away_team'],
                'game_id': id,
                'commence_time': data['data']['commence_time']
            }
            
            for bookmaker in data['data']['bookmakers']:
                for market in bookmaker['markets']:
                    if market['key'] in ['player_points', 'player_rebounds', 'player_assists']:
                        for outcome in market['outcomes']:
                            prop_dict = {
                                'player': outcome['description'],
                                'market': market['key'],
                                'bookmaker': bookmaker['title'],
                                'side': outcome['name'],
                                'line': outcome['point'],
                                'price': outcome['price'],
                                **game_info
                            }
                            props.append(prop_dict)
            
            if props:
                day_props.extend(props)
        else:
            print(f'Failed to get odds for game {id}: status_code {response.status_code}')
        
        # Add delay to avoid hitting API rate limits
        time.sleep(0.1)  # 100ms delay between requests
    
    # Create DataFrame for this day
    if day_props:
        df = pd.DataFrame(day_props)
        
        # Save individual day's data
        csv_date = get_csv_date(current_date)
        df.to_csv(f'CSV_FILES/HISTORICAL_ODDS/{csv_date}.csv', index=False)
        
        all_days_props.extend(day_props)
        print(f"Saved data for {current_date.date()} - {len(day_props)} props collected")
    
    # Add delay between days
    time.sleep(1)  # 1 second delay between days

# Create final combined DataFrame
if all_days_props:
    final_df = pd.DataFrame(all_days_props)
    
    # Save combined data
    final_df.to_csv('CSV_FILES/HISTORICAL_ODDS/ALL_HISTORICAL_ODDS.csv', index=False)
    print(f"\nCompleted! Total props collected: {len(all_days_props)}")
else:
    print("No data was collected")

In [2]:
import pandas as pd
import glob
import os

# Path to your historical odds directory
historical_odds_path = 'CSV_FILES/HISTORICAL_ODDS/'

# Get all CSV files in the directory
csv_files = glob.glob(os.path.join(historical_odds_path, '*.csv'))

# Filter out the ALL_HISTORICAL_ODDS.csv if it exists
csv_files = [f for f in csv_files if 'ALL_HISTORICAL_ODDS.csv' not in f]

# List to store all dataframes
all_dfs = []

# Read each CSV file and append to the list
for file in csv_files:
    print(f"Reading {os.path.basename(file)}")
    df = pd.read_csv(file)
    all_dfs.append(df)

# Concatenate all dataframes
if all_dfs:
    final_df = pd.concat(all_dfs, ignore_index=True)
    print(f"\nTotal rows in combined dataset: {len(final_df)}")
    
    # Save the combined dataset
    output_path = os.path.join(historical_odds_path, 'ALL_HISTORICAL_ODDS.csv')
    final_df.to_csv(output_path, index=False)
    print(f"Saved combined data to {output_path}")
else:
    print("No CSV files found to combine")

Reading 01_01_2025.csv
Reading 01_02_2025.csv
Reading 01_03_2025.csv
Reading 01_04_2025.csv
Reading 01_05_2025.csv
Reading 01_06_2025.csv
Reading 01_07_2025.csv
Reading 01_08_2025.csv
Reading 01_09_2025.csv
Reading 01_10_2025.csv
Reading 01_11_2025.csv
Reading 01_12_2025.csv
Reading 01_13_2025.csv
Reading 10_22_2024.csv
Reading 10_23_2024.csv
Reading 10_24_2024.csv
Reading 10_25_2024.csv
Reading 10_26_2024.csv
Reading 10_27_2024.csv
Reading 10_28_2024.csv
Reading 10_29_2024.csv
Reading 10_30_2024.csv
Reading 10_31_2024.csv
Reading 11_01_2024.csv
Reading 11_02_2024.csv
Reading 11_03_2024.csv
Reading 11_04_2024.csv
Reading 11_05_2024.csv
Reading 11_06_2024.csv
Reading 11_07_2024.csv
Reading 11_08_2024.csv
Reading 11_09_2024.csv
Reading 11_10_2024.csv
Reading 11_11_2024.csv
Reading 11_12_2024.csv
Reading 11_13_2024.csv
Reading 11_14_2024.csv
Reading 11_15_2024.csv
Reading 11_16_2024.csv
Reading 11_17_2024.csv
Reading 11_18_2024.csv
Reading 11_19_2024.csv
Reading 11_20_2024.csv
Reading 11_