# This is a sample Jupyter Notebook

Below is an example of a code cell. 
Put your cursor into the cell and press Shift+Enter to execute it and select the next one, or click 'Run Cell' button.

Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

To learn more about Jupyter Notebooks in PyCharm, see [help](https://www.jetbrains.com/help/pycharm/ipython-notebook-support.html).
For an overview of PyCharm, go to Help -> Learn IDE features or refer to [our documentation](https://www.jetbrains.com/help/pycharm/getting-started.html).

In [7]:
import json
import unicodedata
import warnings
from basketball_reference_scraper.teams import get_roster
import pandas as pd
import numpy as np

# Suppress all warnings
warnings.filterwarnings("ignore")

# Team map with team IDs
nba_teams_map = {
    "Atlanta": "team1",
    "Boston": "team2",
    "Brooklyn": "team3",
    "Charlotte": "team4",
    "Chicago": "team5",
    "Cleveland": "team6",
    "Dallas": "team7",
    "Denver" : "team8",
    "Detroit": "team9",
    "Golden State": "team10",
    "Houston": "team11",
    "Indiana": "team12",
    "LA": "team13",  # Assuming LA refers to Los Angeles Clippers (team13)
    "Los Angeles": "team14",
    "Memphis": "team15",
    "Miami": "team16",
    "Milwaukee": "team17",
    "Minnesota": "team18",
    "New Orleans": "team19",
    "New York": "team20",
    "Oklahoma City": "team21",
    "Orlando": "team22",
    "Philadelphia": "team23",
    "Phoenix": "team24",
    "Portland": "team25",
    "Sacramento": "team26",
    "San Antonio": "team27",
    "Toronto": "team28",
    "Utah": "team29",
    "Washington": "team30"    
}

# Team abbreviations
team_abbreviations = {
    "ATL": "Atlanta",
    "BOS": "Boston",
    "BRK": "Brooklyn",
    "CHO": "Charlotte",
    "CHI": "Chicago",
    "CLE": "Cleveland",
    "DAL": "Dallas",
    "DEN": "Denver",
    "DET": "Detroit",
    "GSW": "Golden State",
    "HOU": "Houston",
    "IND": "Indiana",
    "LAC": "LA",
    "LAL": "Los Angeles",
    "MEM": "Memphis",
    "MIA": "Miami",
    "MIL": "Milwaukee",
    "MIN": "Minnesota",
    "NOP": "New Orleans",
    "NYK": "New York",
    "OKC": "Oklahoma City",
    "ORL": "Orlando",
    "PHI": "Philadelphia",
    "PHO": "Phoenix",
    "POR": "Portland",
    "SAC": "Sacramento",
    "SAS": "San Antonio",
    "TOR": "Toronto",
    "UTA": "Utah",
    "WAS": "Washington"
}

# Function to normalize names and remove accents
def normalize_name(name):
    return unicodedata.normalize('NFKD', name).encode('ASCII', 'ignore').decode('ASCII')

# Function to process each team's roster and save it as a JSON file
def create_roster_json(team_abbreviation, team_name):
    # Fetch roster data
    roster_df = get_roster(team_abbreviation, '2024')
    
    # Check if the DataFrame was returned correctly
    if roster_df is None or roster_df.empty:
        print(f"Failed to retrieve roster for {team_name} ({team_abbreviation}). Skipping...")
        return
    
    # Replace NaN values with None
    roster_df = roster_df.replace({np.nan: None})
    
    # Ensure birthdate is in string format (YYYY-MM-DD)
    if 'BIRTH_DATE' in roster_df.columns:
        roster_df['BIRTH_DATE'] = roster_df['BIRTH_DATE'].apply(lambda x: x.strftime('%Y-%m-%d') if pd.notnull(x) else None)
    
    # Create list to hold player data
    players = []
    team_id_number = int(nba_teams_map[team_name].replace('team', ''))
    
    # Process each player
    for idx, row in roster_df.iterrows():
        player_id = f"player{team_id_number * 100 + (idx + 1)}"
        player = {
            "_id": player_id,
            "name": normalize_name(row['PLAYER']),
            "position": row['POS'],
            "team_id": nba_teams_map[team_name],
            "birthdate": row['BIRTH_DATE'],
            "height": row['HEIGHT'],  # Keep height in original format
            "weight": f"{row['WEIGHT']} lbs" if row['WEIGHT'] else None,
            "college": row['COLLEGE'],
            "starter": False,  # You can modify this based on your criteria
            "number": row['NUMBER'],
            "nationality": row['NATIONALITY'],
            "experience": row['EXPERIENCE']
        }
        players.append(player)
    
    # Save to JSON file
    filename = f"{team_abbreviation.lower()}_roster.json"
    with open(filename, 'w') as f:
        json.dump(players, f, indent=2)
    print(f"Saved {filename}")

# Loop through all teams and create their roster JSON files
for team_abbreviation, team_name in team_abbreviations.items():
    create_roster_json(team_abbreviation, team_name)


Saved atl_roster.json
Saved bos_roster.json
Saved brk_roster.json
Saved cho_roster.json
Saved chi_roster.json
Saved cle_roster.json
Saved dal_roster.json
Saved den_roster.json
Saved det_roster.json
Saved gsw_roster.json
Saved hou_roster.json
Saved ind_roster.json
Saved lac_roster.json
Saved lal_roster.json
Saved mem_roster.json
Saved mia_roster.json
Saved mil_roster.json
Saved min_roster.json
Saved nop_roster.json
Saved nyk_roster.json
Saved okc_roster.json
Saved orl_roster.json
Saved phi_roster.json
Saved pho_roster.json
Saved por_roster.json
Saved sac_roster.json
Saved sas_roster.json
Saved tor_roster.json
Saved uta_roster.json
Saved was_roster.json


In [7]:
import json
import logging
from basketball_reference_scraper.players import get_game_logs
import pandas as pd
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

# Setup logging
logging.basicConfig(filename='player_stats_processing.log', 
                    level=logging.INFO, 
                    format='%(asctime)s - %(levelname)s - %(message)s')

# Function to load a team's roster
def load_team_roster(team_abbreviation):
    filename = f"{team_abbreviation.lower()}_roster.json"
    with open(filename, 'r') as file:
        return json.load(file)

# Function to load a team's match data
def load_team_matches(team_number):
    filename = f"team{team_number}_matches.json"
    with open(filename, 'r') as file:
        return json.load(file)

# Function to find the match ID based on the date
def find_match_id(matches, game_date):
    for match in matches:
        try:
            if match['match_date'] == game_date:
                return match['_id']  # Use '_id' directly
        except KeyError as e:
            logging.error(f"KeyError: Missing key '{e.args[0]}' in match data for game on date {game_date}")
            continue
    return None

# Function to create statistics.json for a team
def create_team_statistics_json(team_abbreviation, team_number):
    roster = load_team_roster(team_abbreviation)
    matches = load_team_matches(team_number)
    statistics = []
    stat_id_counter = team_number * 1000  # Start stat counter based on team number

    for player in roster:
        player_id = player['_id']
        player_name = player['name']
        
        logging.info(f"Processing player: {player_name}")
        try:
            # Get player game logs
            game_logs = get_game_logs(player_name, '2024', playoffs=False)
            logging.info(f"Successfully retrieved game logs for {player_name}.")
        except Exception as e:
            logging.error(f"Could not retrieve game logs for {player_name}: {e}")
            continue  # Continue with the next player if there is an issue
        
        # Process each game
        for _, game in game_logs.iterrows():
            # Check if minutes played (mp) is greater than 0
            if pd.to_numeric(game['MP'].replace(':', '.'), errors='coerce') > 0:
                game_date = game['DATE'].strftime('%Y-%m-%d')
                match_id = find_match_id(matches, game_date)
                
                if match_id:
                    # Create the statistics entry
                    stat_entry = {
                        "_id": f"stat{stat_id_counter}",
                        "match_id": match_id,
                        "player_id": player_id,
                        "age": game['AGE'],
                        "gs": game['GS'],
                        "mp": game['MP'],
                        "fg": game['FG'],
                        "fga": game['FGA'],
                        "drb": game['DRB'],
                        "trb": game['TRB'],
                        "ast": game['AST'],
                        "stl": game['STL'],
                        "blk": game['BLK'],
                        "tov": game['TOV'],
                        "pf": game['PF'],
                        "pts": game['PTS'],
                        "plus_minus": game['+/-'],
                    }
                    statistics.append(stat_entry)
                    stat_id_counter += 1
    
    # Save statistics to JSON
    output_filename = f"team{team_number}_statistics.json"
    with open(output_filename, 'w') as outfile:
        json.dump(statistics, outfile, indent=2)
    logging.info(f"Saved statistics for {team_abbreviation} to {output_filename}.")
    print(f"Statistics saved for {team_abbreviation}")

# Updated team abbreviations
team_abbreviations = {
    "ATL": "Atlanta",
    "BOS": "Boston",
    "BRK": "Brooklyn",
    "CHO": "Charlotte",
    "CHI": "Chicago",
    "CLE": "Cleveland",
    "DAL": "Dallas",
    "DEN": "Denver",
    "DET": "Detroit",
    "GSW": "Golden State",
    "HOU": "Houston",
    "IND": "Indiana",
    "LAC": "LA",
    "LAL": "Los Angeles",
    "MEM": "Memphis",
    "MIA": "Miami",
    "MIL": "Milwaukee",
    "MIN": "Minnesota",
    "NOP": "New Orleans",
    "NYK": "New York",
    "OKC": "Oklahoma City",
    "ORL": "Orlando",
    "PHI": "Philadelphia",
    "PHO": "Phoenix",
    "POR": "Portland",
    "SAC": "Sacramento",
    "SAS": "San Antonio",
    "TOR": "Toronto",
    "UTA": "Utah",
    "WAS": "Washington"
}

nba_teams_map = {
    "Atlanta": "team1",
    "Boston": "team2",
    "Brooklyn": "team3",
    "Charlotte": "team4",
    "Chicago": "team5",
    "Cleveland": "team6",
    "Dallas": "team7",
    "Denver" : "team8",
    "Detroit": "team9",
    "Golden State": "team10",
    "Houston": "team11",
    "Indiana": "team12",
    "LA": "team13",
    "Los Angeles": "team14",
    "Memphis": "team15",
    "Miami": "team16",
    "Milwaukee": "team17",
    "Minnesota": "team18",
    "New Orleans": "team19",
    "New York": "team20",
    "Oklahoma City": "team21",
    "Orlando": "team22",
    "Philadelphia": "team23",
    "Phoenix": "team24",
    "Portland": "team25",
    "Sacramento": "team26",
    "San Antonio": "team27",
    "Toronto": "team28",
    "Utah": "team29",
    "Washington": "team30"    
}

for team_abbreviation, team_name in team_abbreviations.items():
    team_number = int(nba_teams_map[team_name].replace("team", ""))
    create_team_statistics_json(team_abbreviation, team_number)


You searched for "Saddiq Bey"
1 result found.
Saddiq Bey
Results for Saddiq Bey:

You searched for "Bogdan Bogdanovic"
2 results found.
You searched for "Kobe Bufkin"
0 results found.
You searched for "Clint Capela"
3 results found.
You searched for "Bruno Fernando"
1 result found.
Bruno Fernando
Results for Bruno Fernando:

You searched for "Trent Forrest"
1 result found.
Trent Forrest
Results for Trent Forrest:

You searched for "AJ Griffin"
3 results found.
You searched for "Mouhamed Gueye"
1 result found.
Mouhamed Sene
Results for Mouhamed Sene:

You searched for "De'Andre Hunter"
1 result found.
De'Andre Hunter
Results for De'Andre Hunter:

You searched for "Jalen Johnson"
38 results found.
You searched for "Vit Krejci"
0 results found.
You searched for "Seth Lundy"
1 result found.
Seth Curry
Results for Seth Curry:

You searched for "Garrison Mathews"
1 result found.
Garrison Mathews
Results for Garrison Mathews:

You searched for "Wesley Matthews"
2 results found.
You searched f

In [20]:
import pandas as pd
from bs4 import BeautifulSoup
from request_utils import get_wrapper

# Function to get the schedule and process it
def get_schedule(season, playoffs=False):
    months = ['October', 'November', 'December', 'January', 'February', 'March',
              'April', 'May', 'June']
    if season == 2023:
        months = ['October-2022', 'November', 'December', 'January', 'February', 'March',
                  'April', 'May', 'June']
    df = pd.DataFrame()
    for month in months:
        url = f'https://www.basketball-reference.com/leagues/NBA_{season}_games-{month.lower()}.html'
        r = get_wrapper(url)
        if r.status_code == 200:
            soup = BeautifulSoup(r.content, 'html.parser')
            table = soup.find('table', attrs={'id': 'schedule'})
            if table:
                month_df = pd.read_html(str(table))[0]
                df = pd.concat([df, month_df])

    df = df.reset_index(drop=True)

    # Drop unnecessary columns dynamically based on the actual column names
    cols_to_remove = ['Start (ET)', 'Unnamed: 6', 'Unnamed: 7', 'Attend.', 'LOG', 'Arena', 'Notes']
    df = df.drop([col for col in cols_to_remove if col in df.columns], axis=1)
    
    # Rename the columns based on the remaining columns
    df.columns = ['DATE', 'VISITOR', 'VISITOR_PTS', 'HOME', 'HOME_PTS']

    df['DATE'] = pd.to_datetime(df['DATE'])
    
    return df

# Function to create match files for each team
def create_team_matches_file(season):
    schedule = get_schedule(season)
    
    nba_teams_map = {
        "Atlanta Hawks": "team1",
        "Boston Celtics": "team2",
        "Brooklyn Nets": "team3",
        "Charlotte Hornets": "team4",
        "Chicago Bulls": "team5",
        "Cleveland Cavaliers": "team6",
        "Dallas Mavericks": "team7",
        "Denver Nuggets" : "team8",
        "Detroit Pistons": "team9",
        "Golden State Warriors": "team10",
        "Houston Rockets": "team11",
        "Indiana Pacers": "team12",
        "Los Angeles Clippers": "team13",
        "Los Angeles Lakers": "team14",
        "Memphis Grizzlies": "team15",
        "Miami Heat": "team16",
        "Milwaukee Bucks": "team17",
        "Minnesota Timberwolves": "team18",
        "New Orleans Pelicans": "team19",
        "New York Knicks": "team20",
        "Oklahoma City Thunder": "team21",
        "Orlando Magic": "team22",
        "Philadelphia 76ers": "team23",
        "Phoenix Suns": "team24",
        "Portland Trail Blazers": "team25",
        "Sacramento Kings": "team26",
        "San Antonio Spurs": "team27",
        "Toronto Raptors": "team28",
        "Utah Jazz": "team29",
        "Washington Wizards": "team30"    
    }
    
    for team_name, team_id in nba_teams_map.items():
        team_schedule = schedule[(schedule['HOME'] == team_name) | (schedule['VISITOR'] == team_name)]
        matches = []
        match_id_counter = int(team_id.replace("team", "")) * 1000
        
        for idx, row in team_schedule.iterrows():
            if row['HOME'] == team_name:
                team1_id = team_id
                team2_id = nba_teams_map.get(row['VISITOR'])
                score_team1 = row['HOME_PTS']
                score_team2 = row['VISITOR_PTS']
            else:
                team1_id = team_id
                team2_id = nba_teams_map.get(row['HOME'])
                score_team1 = row['VISITOR_PTS']
                score_team2 = row['HOME_PTS']

            match_entry = {
                "_id": f"match{match_id_counter}",
                "team1_id": team1_id,
                "team2_id": team2_id,
                "score_team1": score_team1,
                "score_team2": score_team2,
                "match_date": row['DATE'].strftime('%Y-%m-%d')
            }
            matches.append(match_entry)
            match_id_counter += 1
        
        output_filename = f"{team_id}_matches_{season}.json"
        with open(output_filename, 'w') as outfile:
            json.dump(matches, outfile, indent=2)
        
        print(f"Saved matches for {team_name} to {output_filename}")

# Run the function to create match files for the 2023-24 season
create_team_matches_file(2024)  # for 2023-24 season


Saved matches for Atlanta Hawks to team1_matches_2024.json
Saved matches for Boston Celtics to team2_matches_2024.json
Saved matches for Brooklyn Nets to team3_matches_2024.json
Saved matches for Charlotte Hornets to team4_matches_2024.json
Saved matches for Chicago Bulls to team5_matches_2024.json
Saved matches for Cleveland Cavaliers to team6_matches_2024.json
Saved matches for Dallas Mavericks to team7_matches_2024.json
Saved matches for Denver Nuggets to team8_matches_2024.json
Saved matches for Detroit Pistons to team9_matches_2024.json
Saved matches for Golden State Warriors to team10_matches_2024.json
Saved matches for Houston Rockets to team11_matches_2024.json
Saved matches for Indiana Pacers to team12_matches_2024.json
Saved matches for Los Angeles Clippers to team13_matches_2024.json
Saved matches for Los Angeles Lakers to team14_matches_2024.json
Saved matches for Memphis Grizzlies to team15_matches_2024.json
Saved matches for Miami Heat to team16_matches_2024.json
Saved ma