# Sports Betting Data Cleaning Work Thru

### Package Imports

In [178]:
from OddsJamClient import OddsJamClient;
from dotenv import load_dotenv
import os, requests, datetime, json
import pandas as pd

### Load Env Vars

In [167]:
load_dotenv()
ODDSJAM_API_KEY = os.getenv("ODDSJAM_API_KEY")

### Initialize Clients

In [168]:
Client = OddsJamClient(ODDSJAM_API_KEY);
Client.UseV2();

### Get Games For Today

In [169]:
def get_nba_games_as_dataframe(league="nba", sport="basketball"):
    # Define the endpoint URL
    end_point = 'https://api-external.oddsjam.com/api/v2/games'
    
    # Set up the headers with the API key and content type
    headers = {
        # 'x-api-key': ODDSJAM_API_KEY,
        'Content-Type': 'application/json'
    }
    
    # Set up the data payload with the parameters
    data = {
        'league': league,
        'sport': sport,
        "include_team_info": True,
        "key": ODDSJAM_API_KEY,
        "game_date": datetime.datetime.now().strftime('%Y-%m-%d')
    }
    
    # Make the POST request to the API
    response = requests.get(end_point, headers=headers, params=data)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Here you would typically convert the response to a DataFrame
        df = pd.DataFrame(response.json()["data"])
        # update the time
        df['start_date'] = pd.to_datetime(df['start_date']).dt.strftime('%Y-%m-%d %H:%M:%S')
        df = df.sort_values(by='start_date').reset_index(drop=True)
        # Now, we extract the team ids from the home_team_info and away_team_info dictionaries
        # and then drop these columns from the DataFrame
        df['home_team_id'] = df['home_team_info'].apply(lambda x: x['id'])
        df['away_team_id'] = df['away_team_info'].apply(lambda x: x['id'])

        # Drop the now-redundant '_team_info' columns
        df = df.drop(columns=['home_team_info', 'away_team_info'])
        return df
    else:
        print(f"Error: {response.status_code}")
        # You could also handle errors here


In [170]:
games_df = get_nba_games_as_dataframe()



In [171]:
display(games_df)

Unnamed: 0,id,start_date,home_team,away_team,is_live,is_popular,tournament,status,sport,league,home_team_id,away_team_id
0,25236-12967-2024-01-20,2024-01-20 15:00:00,Detroit Pistons,Milwaukee Bucks,False,False,,unplayed,basketball,NBA,5988658C6B9B,14682EF45C4D
1,32634-30886-2024-01-20,2024-01-20 19:00:00,Washington Wizards,San Antonio Spurs,False,False,,unplayed,basketball,NBA,8F17F23FB753,E89F51275352
2,35142-48840-2024-01-20,2024-01-20 19:00:00,Charlotte Hornets,Philadelphia 76ers,False,False,,unplayed,basketball,NBA,C65360931346,EDF03AD3C346
3,19957-11464-2024-01-20,2024-01-20 19:30:00,Atlanta Hawks,Cleveland Cavaliers,False,False,,unplayed,basketball,NBA,B59C1C735494,D5348BDFEBCC
4,40294-35775-2024-01-20,2024-01-20 19:30:00,New York Knicks,Toronto Raptors,False,False,,unplayed,basketball,NBA,6A36E386117E,417F4FFF4625
5,17844-40177-2024-01-20,2024-01-20 20:00:00,Houston Rockets,Utah Jazz,False,False,,unplayed,basketball,NBA,DF2D9E9E8E20,7C902BB2E272
6,37132-22796-2024-01-20,2024-01-20 20:00:00,Chicago Bulls,Memphis Grizzlies,False,False,,unplayed,basketball,NBA,CA98E3A931AE,2C653B0A5BBF
7,41811-19432-2024-01-20,2024-01-20 20:00:00,Minnesota Timberwolves,Oklahoma City Thunder,False,False,,unplayed,basketball,NBA,FDAE71FA88C6,D8EC6878976A


### Segment The Games Into Sessions 
- Sessions are defined by games grouped together that start within 2 hours of each other
- no two sessions will share the same game; they're all exclusive sessions.

In [172]:
def segment_games_into_sessions(games_df):
    # Ensure start_date is a datetime
    games_df['start_date'] = pd.to_datetime(games_df['start_date'])
    
    # Sort the games by start time
    sorted_games_df = games_df.sort_values(by='start_date').reset_index(drop=True)
    
    # List to hold all game sessions
    sessions = []
    # Temporary list to store games for the current session
    current_session = []
    # Start time of the current session
    session_start = None
    
    for _, game in sorted_games_df.iterrows():
        game_start = game['start_date']
        
        # If current session is empty or game starts within 2 hours of the session start
        if not current_session or (game_start - session_start).total_seconds() <= 2 * 3600:
            # Add game to current session
            current_session.append(game)
            # If this is the first game in the session, set the session start time
            if not session_start:
                session_start = game_start
        else:
            # If game starts more than 2 hours after the session start, save and reset the session
            sessions.append(current_session)
            current_session = [game]
            session_start = game_start
    
    # Add the last session if it exists
    if current_session:
        sessions.append(current_session)
    
    # Convert sessions to DataFrame list for better handling
    session_dfs = [pd.DataFrame(session) for session in sessions]
    
    return session_dfs

In [173]:
# Example usage:
# Assuming games_df is your DataFrame containing the games information.
segmented_sessions = segment_games_into_sessions(games_df)

# You can then access each session DataFrame with segmented_sessions[0], segmented_sessions[1], etc.
len(segmented_sessions)

2

### Get Players From A Specific Team

In [228]:
def get_players_from_specific_team(team_id: str, league="nba", sport="basketball") -> pd.DataFrame:
    # Define the endpoint URL
    end_point = 'https://api-external.oddsjam.com/api/v2/players/list'
    
    # Set up the headers with the API key
    headers = {
        'Content-Type': 'application/json'
    }
    
    # Set up the parameters with the team_id
    params = {
        'team': team_id,
        # "league": league,
        # "sport": sport,
        'key': ODDSJAM_API_KEY
    }
    
    # Make the GET request to the API
    response = requests.get(end_point, headers=headers, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Convert the JSON response to a DataFrame
        data = response.json()["data"]
        df = pd.DataFrame(data)
        return df
    else:
        print(f"Error: {response.status_code}")
        return pd.DataFrame()

### Testing That The Functionality Works As Envisioned
* Need to get the game_id from the segmented_sessions  

In [218]:
display(segmented_sessions[1])

Unnamed: 0,id,start_date,home_team,away_team,is_live,is_popular,tournament,status,sport,league,home_team_id,away_team_id
1,32634-30886-2024-01-20,2024-01-20 19:00:00,Washington Wizards,San Antonio Spurs,False,False,,unplayed,basketball,NBA,8F17F23FB753,E89F51275352
2,35142-48840-2024-01-20,2024-01-20 19:00:00,Charlotte Hornets,Philadelphia 76ers,False,False,,unplayed,basketball,NBA,C65360931346,EDF03AD3C346
3,19957-11464-2024-01-20,2024-01-20 19:30:00,Atlanta Hawks,Cleveland Cavaliers,False,False,,unplayed,basketball,NBA,B59C1C735494,D5348BDFEBCC
4,40294-35775-2024-01-20,2024-01-20 19:30:00,New York Knicks,Toronto Raptors,False,False,,unplayed,basketball,NBA,6A36E386117E,417F4FFF4625
5,17844-40177-2024-01-20,2024-01-20 20:00:00,Houston Rockets,Utah Jazz,False,False,,unplayed,basketball,NBA,DF2D9E9E8E20,7C902BB2E272
6,37132-22796-2024-01-20,2024-01-20 20:00:00,Chicago Bulls,Memphis Grizzlies,False,False,,unplayed,basketball,NBA,CA98E3A931AE,2C653B0A5BBF
7,41811-19432-2024-01-20,2024-01-20 20:00:00,Minnesota Timberwolves,Oklahoma City Thunder,False,False,,unplayed,basketball,NBA,FDAE71FA88C6,D8EC6878976A


In [226]:
get_players_from_specific_team(segmented_sessions[1].iloc[0]['home_team_id'])

8F17F23FB753


Unnamed: 0,id,player_name,first_name,last_name,team_name,team_id,number,position,age,height,weight,is_active,sport,league
0,DBA7A36A56A0,Anthony Gill,Anthony,Gill,Washington Wizards,8F17F23FB753,16,PF,32,80,230,True,basketball,NBA
1,CBD44E0BCC61,Bilal Coulibaly,Bilal,Coulibaly,Washington Wizards,8F17F23FB753,0,SG,20,80,194,True,basketball,NBA
2,D714BB639EFB,Corey Kispert,Corey,Kispert,Washington Wizards,8F17F23FB753,24,SG,25,79,220,True,basketball,NBA
3,8A7EB4769E41,Daniel Gafford,Daniel,Gafford,Washington Wizards,8F17F23FB753,21,C,26,82,233,True,basketball,NBA
4,EC65ADA7CB14,Delon Wright,Delon,Wright,Washington Wizards,8F17F23FB753,55,PG,32,77,183,True,basketball,NBA
5,A516E172A7D9,Deni Avdija,Deni,Avdija,Washington Wizards,8F17F23FB753,8,SF,23,81,220,True,basketball,NBA
6,909D4FE4E8CC,Eugene Omoruyi,Eugene,Omoruyi,Washington Wizards,8F17F23FB753,97,SF,27,78,235,True,basketball,NBA
7,3A9B0B8E64F9,Hamidou Diallo,Hamidou,Diallo,Washington Wizards,8F17F23FB753,6,SG,26,77,198,True,basketball,NBA
8,AEB008271151,Isaiah Livers,Isaiah,Livers,Washington Wizards,8F17F23FB753,12,SF,26,79,230,True,basketball,NBA
9,458D5626316C,Jared Butler,Jared,Butler,Washington Wizards,8F17F23FB753,4,SG,24,75,195,True,basketball,NBA


In [227]:
temp = get_players_from_specific_team(segmented_sessions[1].iloc[0]['away_team_id'])

E89F51275352


In [229]:
temp

Unnamed: 0,id,player_name,first_name,last_name,team_name,team_id,number,position,age,height,weight,is_active,sport,league
0,7959261438C7,Blake Wesley,Blake,Wesley,San Antonio Spurs,E89F51275352,14,SG,21,77,181,True,basketball,NBA
1,97294E569CB7,Cedi Osman,Cedi,Osman,San Antonio Spurs,E89F51275352,16,SG,29,79,215,True,basketball,NBA
2,CD99BB67884D,Charles Bassey,Charles,Bassey,San Antonio Spurs,E89F51275352,28,C,24,83,235,True,basketball,NBA
3,E10320F559B1,David Duke Jr.,David,Duke Jr.,San Antonio Spurs,E89F51275352,7,SG,25,76,205,True,basketball,NBA
4,804B793FEEDB,Devin Vassell,Devin,Vassell,San Antonio Spurs,E89F51275352,24,SG,24,79,194,True,basketball,NBA
5,AACF1CBA1094,Devonte' Graham,Devonte',Graham,San Antonio Spurs,E89F51275352,4,SG,29,73,185,True,basketball,NBA
6,17624A972445,Dominick Barlow,Dominick,Barlow,San Antonio Spurs,E89F51275352,26,SF,21,81,214,True,basketball,NBA
7,CA04B2FDE9C2,Doug McDermott,Doug,McDermott,San Antonio Spurs,E89F51275352,17,PF,32,79,225,True,basketball,NBA
8,80FFC6ED3456,Jeremy Sochan,Jeremy,Sochan,San Antonio Spurs,E89F51275352,10,PF,21,81,230,True,basketball,NBA
9,99318830CCC3,Julian Champagnie,Julian,Champagnie,San Antonio Spurs,E89F51275352,30,SF,23,80,215,True,basketball,NBA


### Get All Available Player Props For Each Team, Given There's A PrizePicks Line

In [215]:
def get_player_odds(player_id: str, sportsbook: list, league="nba", sport="basketball") -> pd.DataFrame:
    # Define the endpoint URL
    end_point = 'https://api-external.oddsjam.com/api/v2/game-odds'
    
    # Set up the headers with the API key
    headers = {
        'Content-Type': 'application/json'
    }
    # Set up the parameters with the team_id
    params = {
        'player_id': player_id,
        "sportsbook": sportsbook,
        # "league": league,
        # "sport": sport,
        'key': ODDSJAM_API_KEY
    }
    
    # Make the GET request to the API
    response = requests.get(end_point, headers=headers, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Convert the JSON response to a DataFrame
        data = response.json()["data"]
        print(response.json())
        df = pd.DataFrame(data)
        return df
    else:
        print(f"Error: {response.reason}")
        print(response)
        return pd.DataFrame()

### Test That The Function Pulls Odds Correctly

In [263]:
odds = get_player_odds("2F80F21FE649", ["PrizePicks", "Underdog Fantasy", "Pinnacle", "FanDuel", "Draftkings", "BetMGM"])

{'data': [{'id': '54023-30886-2024-01-21', 'start_date': '2024-01-21T23:00:00+00:00', 'home_team': 'Washington Wizards', 'away_team': 'Denver Nuggets', 'is_live': False, 'is_popular': False, 'tournament': None, 'status': 'unplayed', 'sport': 'basketball', 'league': 'NBA', 'odds': []}, {'id': '32634-30886-2024-01-20', 'start_date': '2024-01-21T00:00:00+00:00', 'home_team': 'Washington Wizards', 'away_team': 'San Antonio Spurs', 'is_live': False, 'is_popular': False, 'tournament': None, 'status': 'unplayed', 'sport': 'basketball', 'league': 'NBA', 'odds': [{'id': '946C1EC66BC7', 'sports_book_name': 'Underdog Fantasy', 'name': 'Kyle Kuzma Over 4.5', 'price': -137.0, 'timestamp': 1705779507.1105263, 'bet_points': 4.5, 'is_main': True, 'is_live': False, 'market_name': 'Player Assists', 'market': 'player_assists', 'home_rotation_number': None, 'away_rotation_number': None, 'deep_link_url': None, 'player_id': '2F80F21FE649', 'selection': 'Kyle Kuzma', 'normalized_selection': 'kyle_kuzma', 'se

In [264]:
odds.shape

(2, 11)

In [265]:
books = odds.iloc[1].odds

In [268]:
books_df = pd.DataFrame(books)
display(books_df.head())
print(books_df.shape)

Unnamed: 0,id,sports_book_name,name,price,timestamp,bet_points,is_main,is_live,market_name,market,home_rotation_number,away_rotation_number,deep_link_url,player_id,selection,normalized_selection,selection_line,selection_points
0,946C1EC66BC7,Underdog Fantasy,Kyle Kuzma Over 4.5,-137.0,1705780000.0,4.5,True,False,Player Assists,player_assists,,,,2F80F21FE649,Kyle Kuzma,kyle_kuzma,over,4.5
1,26DBC6C0BFF0,Underdog Fantasy,Kyle Kuzma Under 4.5,-137.0,1705780000.0,4.5,True,False,Player Assists,player_assists,,,,2F80F21FE649,Kyle Kuzma,kyle_kuzma,under,4.5
2,7CF436F02863,Underdog Fantasy,Kyle Kuzma Over 21.5,-137.0,1705790000.0,21.5,True,False,Player Points,player_points,,,,2F80F21FE649,Kyle Kuzma,kyle_kuzma,over,21.5
3,293D9A0DD9F6,Underdog Fantasy,Kyle Kuzma Under 21.5,-137.0,1705790000.0,21.5,True,False,Player Points,player_points,,,,2F80F21FE649,Kyle Kuzma,kyle_kuzma,under,21.5
4,1EEDAE48FA5D,Underdog Fantasy,Kyle Kuzma Over 25.5,-137.0,1705780000.0,25.5,True,False,Player Points + Assists,player_points_+_assists,,,,2F80F21FE649,Kyle Kuzma,kyle_kuzma,over,25.5


(275, 18)
