# Sports Betting Data Cleaning Work Thru

### Package Imports

In [166]:
from OddsJamClient import OddsJamClient;
from dotenv import load_dotenv
import os, requests, json, datetime
import pandas as pd

### Load Env Vars

In [167]:
load_dotenv()
ODDSJAM_API_KEY = os.getenv("ODDSJAM_API_KEY")

### Initialize Clients

In [168]:
Client = OddsJamClient(ODDSJAM_API_KEY);
Client.UseV2();

### Get Games For Today

In [169]:
def get_nba_games_as_dataframe(league="nba", sport="basketball"):
    # Define the endpoint URL
    end_point = 'https://api-external.oddsjam.com/api/v2/games'
    
    # Set up the headers with the API key and content type
    headers = {
        # 'x-api-key': ODDSJAM_API_KEY,
        'Content-Type': 'application/json'
    }
    
    # Set up the data payload with the parameters
    data = {
        'league': league,
        'sport': sport,
        "include_team_info": True,
        "key": ODDSJAM_API_KEY,
        "game_date": datetime.datetime.now().strftime('%Y-%m-%d')
    }
    
    # Make the POST request to the API
    response = requests.get(end_point, headers=headers, params=data)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Here you would typically convert the response to a DataFrame
        df = pd.DataFrame(response.json()["data"])
        # update the time
        df['start_date'] = pd.to_datetime(df['start_date']).dt.strftime('%Y-%m-%d %H:%M:%S')
        df = df.sort_values(by='start_date').reset_index(drop=True)
        # Now, we extract the team ids from the home_team_info and away_team_info dictionaries
        # and then drop these columns from the DataFrame
        df['home_team_id'] = df['home_team_info'].apply(lambda x: x['id'])
        df['away_team_id'] = df['away_team_info'].apply(lambda x: x['id'])

        # Drop the now-redundant '_team_info' columns
        df = df.drop(columns=['home_team_info', 'away_team_info'])
        return df
    else:
        print(f"Error: {response.status_code}")
        # You could also handle errors here


In [170]:
games_df = get_nba_games_as_dataframe()



In [171]:
display(games_df)

Unnamed: 0,id,start_date,home_team,away_team,is_live,is_popular,tournament,status,sport,league,home_team_id,away_team_id
0,25236-12967-2024-01-20,2024-01-20 15:00:00,Detroit Pistons,Milwaukee Bucks,False,False,,unplayed,basketball,NBA,5988658C6B9B,14682EF45C4D
1,32634-30886-2024-01-20,2024-01-20 19:00:00,Washington Wizards,San Antonio Spurs,False,False,,unplayed,basketball,NBA,8F17F23FB753,E89F51275352
2,35142-48840-2024-01-20,2024-01-20 19:00:00,Charlotte Hornets,Philadelphia 76ers,False,False,,unplayed,basketball,NBA,C65360931346,EDF03AD3C346
3,19957-11464-2024-01-20,2024-01-20 19:30:00,Atlanta Hawks,Cleveland Cavaliers,False,False,,unplayed,basketball,NBA,B59C1C735494,D5348BDFEBCC
4,40294-35775-2024-01-20,2024-01-20 19:30:00,New York Knicks,Toronto Raptors,False,False,,unplayed,basketball,NBA,6A36E386117E,417F4FFF4625
5,17844-40177-2024-01-20,2024-01-20 20:00:00,Houston Rockets,Utah Jazz,False,False,,unplayed,basketball,NBA,DF2D9E9E8E20,7C902BB2E272
6,37132-22796-2024-01-20,2024-01-20 20:00:00,Chicago Bulls,Memphis Grizzlies,False,False,,unplayed,basketball,NBA,CA98E3A931AE,2C653B0A5BBF
7,41811-19432-2024-01-20,2024-01-20 20:00:00,Minnesota Timberwolves,Oklahoma City Thunder,False,False,,unplayed,basketball,NBA,FDAE71FA88C6,D8EC6878976A


### Segment The Games Into Sessions 
- Sessions are defined by games grouped together that start within 2 hours of each other
- no two sessions will share the same game; they're all exclusive sessions.

In [172]:
def segment_games_into_sessions(games_df):
    # Ensure start_date is a datetime
    games_df['start_date'] = pd.to_datetime(games_df['start_date'])
    
    # Sort the games by start time
    sorted_games_df = games_df.sort_values(by='start_date').reset_index(drop=True)
    
    # List to hold all game sessions
    sessions = []
    # Temporary list to store games for the current session
    current_session = []
    # Start time of the current session
    session_start = None
    
    for _, game in sorted_games_df.iterrows():
        game_start = game['start_date']
        
        # If current session is empty or game starts within 2 hours of the session start
        if not current_session or (game_start - session_start).total_seconds() <= 2 * 3600:
            # Add game to current session
            current_session.append(game)
            # If this is the first game in the session, set the session start time
            if not session_start:
                session_start = game_start
        else:
            # If game starts more than 2 hours after the session start, save and reset the session
            sessions.append(current_session)
            current_session = [game]
            session_start = game_start
    
    # Add the last session if it exists
    if current_session:
        sessions.append(current_session)
    
    # Convert sessions to DataFrame list for better handling
    session_dfs = [pd.DataFrame(session) for session in sessions]
    
    return session_dfs

In [173]:
# Example usage:
# Assuming games_df is your DataFrame containing the games information.
segmented_sessions = segment_games_into_sessions(games_df)

# You can then access each session DataFrame with segmented_sessions[0], segmented_sessions[1], etc.
len(segmented_sessions)

2

### Get Players From A Specific Team

In [174]:
def get_players_from_specific_team(team_id: str, league="nba", sport="basketball") -> pd.DataFrame:
    print(team_id)
    # Define the endpoint URL
    end_point = 'https://api-external.oddsjam.com/api/v2/players/list'
    
    # Set up the headers with the API key
    headers = {
        'Content-Type': 'application/json'
    }
    
    # Set up the parameters with the team_id
    params = {
        'team': team_id,
        # "league": league,
        # "sport": sport,
        'key': ODDSJAM_API_KEY
    }
    
    # Make the GET request to the API
    response = requests.get(end_point, headers=headers, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Convert the JSON response to a DataFrame
        data = response.json()["data"]
        df = pd.DataFrame(data)
        return df
    else:
        print(f"Error: {response.status_code}")
        return pd.DataFrame()

### Testing That The Functionality Works As Envisioned
* Need to get the game_id from the segmented_sessions  

In [175]:
display(segmented_sessions[0])

Unnamed: 0,id,start_date,home_team,away_team,is_live,is_popular,tournament,status,sport,league,home_team_id,away_team_id
0,25236-12967-2024-01-20,2024-01-20 15:00:00,Detroit Pistons,Milwaukee Bucks,False,False,,unplayed,basketball,NBA,5988658C6B9B,14682EF45C4D


In [176]:
get_players_from_specific_team(segmented_sessions[0].loc[0, 'home_team_id'])

5988658C6B9B


Unnamed: 0,id,player_name,first_name,last_name,team_name,team_id,number,position,age,height,weight,is_active,sport,league
0,5E9A505CCD97,Alec Burks,Alec,Burks,Detroit Pistons,5988658C6B9B,14,SG,33,78,214,True,basketball,NBA
1,547F0B60B699,Ausar Thompson,Ausar,Thompson,Detroit Pistons,5988658C6B9B,9,SF,21,79,218,True,basketball,NBA
2,C858645A4A40,Bojan Bogdanovic,Bojan,Bogdanovic,Detroit Pistons,5988658C6B9B,44,SF,35,80,216,True,basketball,NBA
3,72F133EEC33F,Cade Cunningham,Cade,Cunningham,Detroit Pistons,5988658C6B9B,2,PG,23,80,220,True,basketball,NBA
4,1BD51B891711,Danilo Gallinari,Danilo,Gallinari,Detroit Pistons,5988658C6B9B,12,PF,36,82,225,True,basketball,NBA
5,5F65A3C82CB7,Isaiah Stewart,Isaiah,Stewart,Detroit Pistons,5988658C6B9B,28,C,23,81,250,True,basketball,NBA
6,1A6DD7A9C666,Jaden Ivey,Jaden,Ivey,Detroit Pistons,5988658C6B9B,23,SG,22,76,195,True,basketball,NBA
7,8DC7D01E2795,Jalen Duren,Jalen,Duren,Detroit Pistons,5988658C6B9B,0,C,21,83,250,True,basketball,NBA
8,05449AE56F55,James Wiseman,James,Wiseman,Detroit Pistons,5988658C6B9B,13,C,23,85,240,True,basketball,NBA
9,14D5ECF70973,Jared Rhoden,Jared,Rhoden,Detroit Pistons,5988658C6B9B,8,SG,25,78,210,True,basketball,NBA


In [177]:
get_players_from_specific_team(segmented_sessions[0].loc[0, 'away_team_id'])

14682EF45C4D


Unnamed: 0,id,player_name,first_name,last_name,team_name,team_id,number,position,age,height,weight,is_active,sport,league
0,1E789F6CD429,AJ Green,AJ,Green,Milwaukee Bucks,14682EF45C4D,20,SG,25,76,190,True,basketball,NBA
1,9A886FCC523E,Andre Jackson Jr.,Andre,Jackson Jr.,Milwaukee Bucks,14682EF45C4D,44,SG,23,78,210,True,basketball,NBA
2,B93FB7365B2A,Bobby Portis,Bobby,Portis,Milwaukee Bucks,14682EF45C4D,9,PF,29,82,250,True,basketball,NBA
3,B411AB68C551,Brook Lopez,Brook,Lopez,Milwaukee Bucks,14682EF45C4D,11,C,36,84,270,True,basketball,NBA
4,FD1BE0F04EF6,Cameron Payne,Cameron,Payne,Milwaukee Bucks,14682EF45C4D,15,PG,30,75,190,True,basketball,NBA
5,9AD05EA5DC25,Chris Livingston,Chris,Livingston,Milwaukee Bucks,14682EF45C4D,7,SF,21,78,220,True,basketball,NBA
6,36ADF58C91B6,Damian Lillard,Damian,Lillard,Milwaukee Bucks,14682EF45C4D,0,PG,34,74,195,True,basketball,NBA
7,DD8C4485A3FE,Giannis Antetokounmpo,Giannis,Antetokounmpo,Milwaukee Bucks,14682EF45C4D,34,PF,30,83,242,True,basketball,NBA
8,4AB103EBD0CE,Jae Crowder,Jae,Crowder,Milwaukee Bucks,14682EF45C4D,99,SF,34,78,235,True,basketball,NBA
9,0B985F4C90D5,Khris Middleton,Khris,Middleton,Milwaukee Bucks,14682EF45C4D,22,SF,33,79,222,True,basketball,NBA


### Get All Available Player Props For Each Team, Given There's A PrizePicks Line