In [5]:
# Updated 2024.11.02
import pandas as pd
import requests
import csv
from datetime import datetime, timedelta

# Your API Key from The Odds API
API_KEY = 'Insert API Key Here'

# Define constants for the API call
SPORT = 'mma_mixed_martial_arts'  # MMA sport key
REGIONS = 'us'  # Focus on US market
MARKETS = 'h2h'  # Only fetch head-to-head markets
ODDS_FORMAT = 'american'  # American odds format
DATE_FORMAT = 'iso'  # ISO date format for timestamps

# Base URL for the Odds API
BASE_URL = 'https://api.the-odds-api.com/v4/historical/sports'

def get_event_dates_between(masterlist_df, start_date, end_date):
    """
    Filters an event_masterlist (generated by UFCStats Scraper) for dates within a specified range and applies a 12-hour offset.
    
    Parameters:
    - masterlist_df (DataFrame): A pandas DataFrame containing a list of events, including an 'event_date' column with event dates.
    - start_date (str or datetime-like): The start date to filter events. Events on or after this date are included in the output.
    - end_date (str or datetime-like): The end date to filter events. Events on or before this date are included in the output.

    Process:
    1. Converts the `start_date` and `end_date` inputs to datetime objects, allowing for flexible input formats (string or datetime).
    2. Filters `masterlist_df` to include only rows with event dates within the specified range from `start_date` to `end_date`.
    3. Applies a 12-hour offset to the event dates in the filtered DataFrame, which shifts each event date by 12 hours.
       This adjustment can be useful for accounting for timezone differences or standardizing event times.
    4. Extracts unique event dates from the adjusted 'event_date' column.
    
    Returns:
    - event_dates (list): A list of unique event dates (with the 12-hour offset applied) within the specified date range.
    - num_events (int): The count of unique events found within the range, useful for quick reference or logging.
    
    Example:
    --------
    >>> masterlist_df = pd.DataFrame({'event_date': ['2024-08-31', '2024-08-24', '2024-08-17', '2024-08-10']})
    >>> start_date = '2024-08-01'
    >>> end_date = '2024-08-30'
    >>> get_event_dates_between(masterlist_df, start_date, end_date)
    ([Timestamp('2024-08-10 12:00:00'), Timestamp('2024-08-17 12:00:00'), Timestamp('2023-08-24 12:00:00')], 3)

    Note:
    - This function is intended for event scheduling contexts where all events are expected to have distinct dates,
      and the 12-hour offset is applied uniformly to all events within the specified date range.
    - The function assumes that `masterlist_df` includes a column named 'event_date' containing datetime-compatible data.
    """
    # Convert input dates to datetime objects
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    # Filter masterlist for event dates within the given range
    filtered_masterlist = masterlist_df[(masterlist_df['event_date'] >= start_date) & (masterlist_df['event_date'] <= end_date)].copy()
    
    # Apply a 12-hour offset to the event dates using .loc[] to avoid the warning
    filtered_masterlist.loc[:, 'event_date'] = filtered_masterlist['event_date'] + pd.Timedelta(hours=12)
    
    # Extract unique event dates
    event_dates = filtered_masterlist['event_date'].unique()
    
    # Convert event dates back to a list and return
    return list(event_dates), len(event_dates)


def get_historical_mma_odds(api_key, snapshot_date):
    """
    Retrieves historical MMA odds for a specified timestamp from The Odds API.

    Parameters:
    - api_key (str): The API key for authenticating requests to The Odds API.
    - snapshot_date (str): The date and time for which to retrieve odds, formatted as an ISO 8601 timestamp 
                           (e.g., '2024-08-24T12:00:00Z').

    Process:
    1. Constructs the API endpoint URL for fetching MMA odds using a base URL and sport-specific path.
    2. Prepares query parameters, including:
       - `api_key`: The API key provided by the user for authentication.
       - `regions`: Specifies the market region, e.g., 'us'.
       - `markets`: Type of betting market to retrieve, e.g., 'h2h' (head-to-head).
       - `date`: The target timestamp (`snapshot_date`) in ISO format for the requested historical odds.
       - `oddsFormat`: Specifies the format of the odds, such as 'american'.
       - `dateFormat`: Specifies the format of the timestamp in the response.
    3. Sends an HTTP GET request to The Odds API using the constructed URL and parameters.

    Returns:
    - odds_data (dict): Parsed JSON response containing the odds data for the specified timestamp, 
                        or `None` if the request fails.
    - response (Response): The full response object from the API request, allowing access to status 
                           codes and headers.

    Example:
    --------
    >>> api_key = 'YOUR_API_KEY'
    >>> snapshot_date = '2024-08-24T12:00:00Z'
    >>> odds_data, response = get_historical_mma_odds(api_key, snapshot_date)
    >>> if odds_data:
    ...     print(f"Odds retrieved for timestamp: {snapshot_date}")
    ... else:
    ...     print(f"Failed to retrieve odds for timestamp: {snapshot_date}")
    Odds retrieved for timestamp: 2024-08-24T12:00:00Z

    Note:
    - Ensure that the `api_key` is valid and active to avoid authentication errors.
    - The `snapshot_date` should be provided in ISO 8601 format to match The Odds API's expected format.
    - Use the `response` object to handle HTTP status codes or rate limit headers for additional context.
    """
    url = f'{BASE_URL}/{SPORT}/odds'
    params = {
        'api_key': api_key,
        'regions': REGIONS,
        'markets': MARKETS,
        'date': snapshot_date,
        'oddsFormat': ODDS_FORMAT,
        'dateFormat': DATE_FORMAT
    }
    
    # Make the API request
    response = requests.get(url, params=params)
    
    if response.status_code != 200:
        print(f'Failed to get historical odds: status_code {response.status_code}, response body {response.text}')
        return None, response

    # Return the parsed JSON response and the response object to access headers
    return response.json(), response


def save_odds_to_csv(odds_data, file_name):
    """
    Saves structured odds data to a CSV file with UTF-8 encoding to handle special characters.
    
    Parameters:
    - odds_data (dict): The odds data to save, structured with details for each event, bookmaker, market, and outcome.
    - file_name (str): The name of the CSV file to save the data to. The file is appended to if it already exists.

    Process:
    1. Opens (or creates if it doesn’t exist) the specified CSV file in append mode with UTF-8 encoding.
       UTF-8 encoding ensures compatibility with any special characters in bookmaker or team names.
    2. Initializes a CSV writer to format and write rows to the file.
    3. Checks if the file is empty:
       - If it is, writes a header row with the column names, describing each data field in subsequent rows.
    4. Iterates through the `odds_data` dictionary to extract relevant details:
       - For each event: extracts `home_team`, `away_team`, `commence_time`.
       - For each bookmaker: extracts `title`.
       - For each market: extracts `key`.
       - For each outcome: extracts `name` and `price`.
    5. Writes each outcome as a row in the CSV file, where each row contains:
       `[timestamp, home_team, away_team, commence_time, bookmaker, market, outcome_name, odds_price]`.

    Example:
    --------
    >>> odds_data = {
    ...     'timestamp': '2024-08-24T12:00:00Z',
    ...     'data': [
    ...         {
    ...             'home_team': 'Fighter A',
    ...             'away_team': 'Fighter B',
    ...             'commence_time': '2024-08-24T12:00:00Z',
    ...             'bookmakers': [
    ...                 {
    ...                     'title': 'Bookmaker1',
    ...                     'markets': [
    ...                         {
    ...                             'key': 'h2h',
    ...                             'outcomes': [
    ...                                 {'name': 'Fighter A', 'price': -150},
    ...                                 {'name': 'Fighter B', 'price': +130}
    ...                             ]
    ...                         }
    ...                     ]
    ...                 }
    ...             ]
    ...         }
    ...     ]
    ... }
    >>> file_name = 'mma_odds.csv'
    >>> save_odds_to_csv(odds_data, file_name)

    Note:
    - This function appends data to `file_name`, so if used repeatedly, it will continue to add rows without overwriting.
    - It is designed to handle nested dictionaries as structured by The Odds API, assuming data includes bookmakers, markets, and outcomes.
    - A header row is written only if the file is empty, ensuring consistency in CSV structure.
    """
    with open(file_name, mode='a', newline='', encoding='utf-8') as file:  # Specify utf-8 encoding
        writer = csv.writer(file)
        
        # Write header if the file is empty
        if file.tell() == 0:
            writer.writerow(['Timestamp', 'Home Team', 'Away Team', 'Commence Time', 'Bookmaker', 'Market', 'Outcome Name', 'Odds Price'])
        
        # Write odds data to the CSV file
        for event in odds_data['data']:
            for bookmaker in event['bookmakers']:
                for market in bookmaker['markets']:
                    for outcome in market['outcomes']:
                        writer.writerow([odds_data['timestamp'], 
                                         event['home_team'], 
                                         event['away_team'], 
                                         event['commence_time'], 
                                         bookmaker['title'], 
                                         market['key'], 
                                         outcome['name'], 
                                         outcome['price']])


def get_and_save_odds_for_dates(api_key, event_dates, start_date, end_date, limit=None):
    """
    Iterates through a list of event dates, retrieves historical odds for each date, 
    and saves the data to a CSV file named according to the date range.

    Parameters:
    - api_key (str): The Odds API key for authenticating requests.
    - event_dates (list of datetime): A list of event dates, each with a 12-hour offset applied.
    - start_date (str): The start date for the event range, used for naming the output file.
    - end_date (str): The end date for the event range, used for naming the output file.
    - limit (int, optional): The maximum number of event dates to process. 
                             If None, all dates in `event_dates` are processed.

    Process:
    1. Generates a dynamic file name using the specified date range:
       `mma_odds_(end_date to start_date).csv`.
       This file will contain all odds data retrieved within the specified date range.
    2. If `limit` is specified, truncates `event_dates` to the first `limit` items.
    3. Iterates through the list of `event_dates`:
       - Formats each event date into an ISO 8601 timestamp (e.g., '2024-08-24T12:00:00Z').
       - Calls `get_historical_mma_odds` to retrieve odds data for the specific timestamp.
       - If data is retrieved, saves it to the CSV file using `save_odds_to_csv`.
       - Logs each action, including whether data was saved and the API’s request status headers.

    Example:
    --------
    >>> api_key = 'YOUR_API_KEY'
    >>> event_dates = [pd.Timestamp('2024-08-24 12:00:00'), pd.Timestamp('2024-08-17 12:00:00')]
    >>> start_date = '2024-08-01'
    >>> end_date = '2024-08-24'
    >>> get_and_save_odds_for_dates(api_key, event_dates, start_date, end_date, limit=1)

    Expected Output:
    Fetching odds for timestamp: 2024-08-24T12:00:00Z
    Odds saved for timestamp: 2024-08-24T12:00:00Z
    Remaining requests: [number of remaining requests from headers]
    Used requests: [number of used requests from headers]

    Note:
    - The file name is dynamically generated and includes the start and end dates.
    - This function will print the number of remaining and used API requests, based on the response headers.
    - To avoid excessive API usage, specify a `limit` if testing with multiple dates.
    """
    # Generate a dynamic file name based on the date range
    file_name = f'mma_odds_({end_date} to {start_date}).csv'
    
    if limit:
        event_dates = event_dates[:limit]  # Apply limit if specified

    for i, event_date in enumerate(event_dates):  
        snapshot_date = event_date.strftime("%Y-%m-%dT%H:%M:%SZ")
        print(f"\nFetching odds for timestamp: {snapshot_date}")
        odds_data, response = get_historical_mma_odds(api_key, snapshot_date)

        if odds_data:
            save_odds_to_csv(odds_data, file_name)
            print(f"Odds saved for timestamp: {snapshot_date}")
        else:
            print(f"No data available for timestamp {snapshot_date}")

        print(f"\nRemaining requests: {response.headers.get('x-requests-remaining', 'N/A')}")
        print(f"Used requests: {response.headers.get('x-requests-used', 'N/A')}")

        
def main():
    # Load masterlist CSV with date parsing
    masterlist_file = "Path to event_masterlist" # Generated using UFCStats Scraper (different repository)
    masterlist_df = pd.read_csv(masterlist_file, encoding='utf-8', parse_dates=['event_date'])

    # Define date range for filtering
    start_date = '2020-06-06' # First day TheOddsAPI began archiving UFC h2h odds
    end_date = '2024-08-24'

    # Get event dates within the range
    event_dates_list, num_events = get_event_dates_between(masterlist_df, start_date, end_date)
    print(f"There are {num_events} events between {start_date} and {end_date}.")
    print(f"Event dates (with 12-hour offset): {event_dates_list}")

    # Generate a dynamic file name based on the date range
    file_name = f'mma_odds_({start_date} to {end_date}).csv'

    # Retrieve and save odds data for the first 5 event dates
    get_and_save_odds_for_dates(API_KEY, event_dates_list, start_date, end_date, limit=5) 

if __name__ == "__main__":
    main()


There are 186 events between 2020-06-06 and 2024-08-24.

Event dates (with 12-hour offset): [Timestamp('2024-08-24 12:00:00'), Timestamp('2024-08-17 12:00:00'), Timestamp('2024-08-10 12:00:00'), Timestamp('2024-08-03 12:00:00'), Timestamp('2024-07-27 12:00:00'), Timestamp('2024-07-20 12:00:00'), Timestamp('2024-07-13 12:00:00'), Timestamp('2024-06-29 12:00:00'), Timestamp('2024-06-22 12:00:00'), Timestamp('2024-06-15 12:00:00'), Timestamp('2024-06-08 12:00:00'), Timestamp('2024-06-01 12:00:00'), Timestamp('2024-05-18 12:00:00'), Timestamp('2024-05-11 12:00:00'), Timestamp('2024-05-04 12:00:00'), Timestamp('2024-04-27 12:00:00'), Timestamp('2024-04-13 12:00:00'), Timestamp('2024-04-06 12:00:00'), Timestamp('2024-03-30 12:00:00'), Timestamp('2024-03-23 12:00:00'), Timestamp('2024-03-16 12:00:00'), Timestamp('2024-03-09 12:00:00'), Timestamp('2024-03-02 12:00:00'), Timestamp('2024-02-24 12:00:00'), Timestamp('2024-02-17 12:00:00'), Timestamp('2024-02-10 12:00:00'), Timestamp('2024-02-03 1

In [None]:
import csv
from collections import defaultdict
from statistics import mean

# Define a function to load odds data from the CSV file and structure it for easy display
def load_odds_from_csv(file_name):
    fights_with_odds = defaultdict(lambda: {
        'Event Date': None,
        'Fighter A': None,
        'Fighter B': None,
        'Fighter A Odds': [],
        'Fighter B Odds': [],
        'Fighter A Avg Odds': None,
        'Fighter B Avg Odds': None,
        'Fighter A Best Odds': None,
        'Fighter B Best Odds': None
    })
    
    # Read odds data from CSV
    with open(file_name, mode='r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            event_key = (row['Home Team'], row['Away Team'], row['Commence Time'])
            
            # Initialize event information if not already set
            if not fights_with_odds[event_key]['Event Date']:
                fights_with_odds[event_key]['Event Date'] = row['Commence Time']
                fights_with_odds[event_key]['Fighter A'] = row['Home Team']
                fights_with_odds[event_key]['Fighter B'] = row['Away Team']
            
            # Collect odds data for each fighter
            odds_info = {
                'Bookmaker': row['Bookmaker'],
                'Odds Price': float(row['Odds Price'])
            }
            if row['Outcome Name'] == fights_with_odds[event_key]['Fighter A']:
                fights_with_odds[event_key]['Fighter A Odds'].append(odds_info)
            elif row['Outcome Name'] == fights_with_odds[event_key]['Fighter B']:
                fights_with_odds[event_key]['Fighter B Odds'].append(odds_info)
    
    # Calculate average and best odds for each fighter
    for fight, details in fights_with_odds.items():
        if details['Fighter A Odds']:
            odds_prices = [odds['Odds Price'] for odds in details['Fighter A Odds']]
            details['Fighter A Avg Odds'] = mean(odds_prices)
            details['Fighter A Best Odds'] = min(details['Fighter A Odds'], key=lambda x: x['Odds Price'])
        
        if details['Fighter B Odds']:
            odds_prices = [odds['Odds Price'] for odds in details['Fighter B Odds']]
            details['Fighter B Avg Odds'] = mean(odds_prices)
            details['Fighter B Best Odds'] = min(details['Fighter B Odds'], key=lambda x: x['Odds Price'])
    
    return fights_with_odds

# Display function to format and print the structured odds data
def display_fights_with_odds(fights_with_odds):
    for event_key, fight in fights_with_odds.items():
        print(f"Event Date: {fight['Event Date']}")
        print(f"Fighter A: {fight['Fighter A']}")
        
        # Fighter A odds information
        if fight['Fighter A Odds']:
            print("  Fighter A Odds:")
            for odds in fight['Fighter A Odds']:
                print(f"    Bookmaker: {odds['Bookmaker']}, Odds Price: {odds['Odds Price']}")
            print(f"  Fighter A Avg Odds: {fight['Fighter A Avg Odds']}")
            print(f"  Fighter A Best Odds: {fight['Fighter A Best Odds']['Odds Price']} (Bookmaker: {fight['Fighter A Best Odds']['Bookmaker']})")
        else:
            print("  No odds available for Fighter A")
        
        print(f"\nFighter B: {fight['Fighter B']}")
        
        # Fighter B odds information
        if fight['Fighter B Odds']:
            print("  Fighter B Odds:")
            for odds in fight['Fighter B Odds']:
                print(f"    Bookmaker: {odds['Bookmaker']}, Odds Price: {odds['Odds Price']}")
            print(f"  Fighter B Avg Odds: {fight['Fighter B Avg Odds']}")
            print(f"  Fighter B Best Odds: {fight['Fighter B Best Odds']['Odds Price']} (Bookmaker: {fight['Fighter B Best Odds']['Bookmaker']})")
        else:
            print("  No odds available for Fighter B")
        
        print("\n" + "=" * 50 + "\n")

# Load odds from the CSV and display them
file_name = r"C:\Users\EditZ\UFC Research\New Pipeline\(DONT TOUCH) mma_odds_(2024-08-24 to 2020-06-06).csv"
fights_with_odds = load_odds_from_csv(file_name)
display_fights_with_odds(fights_with_odds)


Example Event (2024-08-24):

Event Date: 2024-08-24T17:00:00Z
Fighter A: Albert Odzimkowski
  Fighter A Odds:
    Bookmaker: Bovada, Odds Price: -185.0
    Bookmaker: LowVig.ag, Odds Price: -175.0
  Fighter A Avg Odds: -180.0
  Fighter A Best Odds: -175.0 (Bookmaker: LowVig.ag)

Fighter B: David Hosek
  Fighter B Odds:
    Bookmaker: Bovada, Odds Price: 140.0
    Bookmaker: LowVig.ag, Odds Price: 145.0
  Fighter B Avg Odds: 142.5
  Fighter B Best Odds: 145.0 (Bookmaker: LowVig.ag)


Event Date: 2024-08-24T17:00:00Z
Fighter A: Krystian Blezien
  Fighter A Odds:
    Bookmaker: Bovada, Odds Price: -155.0
    Bookmaker: LowVig.ag, Odds Price: -150.0
  Fighter A Avg Odds: -152.5
  Fighter A Best Odds: -150.0 (Bookmaker: LowVig.ag)

Fighter B: Alvin Lowenski
  Fighter B Odds:
    Bookmaker: Bovada, Odds Price: 120.0
    Bookmaker: LowVig.ag, Odds Price: 120.0
  Fighter B Avg Odds: 120.0
  Fighter B Best Odds: 120.0 (Bookmaker: Bovada)


Event Date: 2024-08-24T17:00:00Z
Fighter A: Oleksii Pol