### Packages 

In [None]:
import pandas as pd 
from sklearn.preprocessing import MinMaxScaler
from nba_api.stats.static import players
from nba_api.stats.endpoints import commonallplayers, playergamelog, playercareerstats, leaguegamelog

### Reading WNBA data with nba_api

In [None]:
# Set league_id to "10" for WNBA and specify the season
season = '2023'
league_id = '10'  # WNBA

# Retrieve game log data
wnba_game_log = leaguegamelog.LeagueGameLog(season=season, league_id=league_id)
wnba_data = wnba_game_log.get_data_frames()[0]

# Display the first few rows
print(wnba_data.head())

# Optional: Save to CSV for further analysis
wnba_data.to_csv("wnba_player_box_scores_per_game_2023.csv", index=False)


In [None]:
wnba_players = players.get_wnba_active_players() #return list of active wnba players
player_ids = [player['id'] for player in wnba_players]

In [None]:
# Initialize a list to collect each player's game data
all_players_data = []

# Loop through each player to get their game logs for season 2023 
season = '2023'
for player_id in player_ids:
    gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season, league_id_nullable="10")
    player_games = gamelog.get_data_frames()[0]
    all_players_data.append(player_games)

# Combine all players' data into a single DataFrame
full_data = pd.concat(all_players_data, ignore_index=True)
print(full_data.head())

# Save to CSV for further analysis if desired
full_data.to_csv("wnba_all_players_game_logs_2023.csv", index=False)

### Retrieve Data for Pregnant Athletes

In [None]:
# List of WNBA player names to fetch stats for
player_names = [
    'Skylar Diggins-Smith',
    'Dearica Hamby',
    # 'Katie Lou Samuelson',
    # 'Napheesa Collier',
    # 'Candace Parker',
    'DeWanna Bonner'
]

# Get the WNBA players list to find player IDs
wnba_players = players.get_wnba_players()

# Function to get player ID
def get_player_id(player_name):
    for player in wnba_players:
        if player['full_name'] == player_name:
            return player['id']
    return None

# Initialize a list to collect data
player_season_stats_list = []

# Fetch per-season stats for each player and store in list
for name in player_names:
    player_id = get_player_id(name)
    if player_id:
        # Retrieve career stats broken down by season
        career_stats = playercareerstats.PlayerCareerStats(player_id=player_id, league_id_nullable='10')
        season_df = career_stats.get_data_frames()[0]  # Data per season
        
        # Add the player's name to each row
        season_df.insert(0, 'Player', name)
        
        # Append to the list
        player_season_stats_list.append(season_df)
    else:
        print(f"Player {name} not found in WNBA players list.")

# Concatenate all player stats into a single DataFrame
player_season_stats_df = pd.concat(player_season_stats_list, ignore_index=True)
player_season_stats_df.head()

In [None]:
pd.set_option('display.max_columns', None)  
player_season_stats_df.head()

player_season_stats_df.to_csv("wnba_pregancy_analysis.csv", index=False)

### Take Active WNBA Record and their Past Few Seasons

In [None]:
# Get only active WNBA players
active_wnba_players = [player for player in wnba_players if player['is_active']]

# Initialize a list to collect data
active_player_stats_list = []

# Fetch per-season stats for each active player and store in list
for player in active_wnba_players:
    player_id = player['id']
    player_name = player['full_name']
    
    try:
        # Retrieve career stats broken down by season
        career_stats = playercareerstats.PlayerCareerStats(player_id=player_id, league_id_nullable='10')
        season_df = career_stats.get_data_frames()[0]  # Data per season
        
        # Add the player's name to each row
        season_df.insert(0, 'Player', player_name)
        
        # Append to the list
        active_player_stats_list.append(season_df)
    except Exception as e:
        print(f"Error retrieving data for {player_name}: {e}")

# Combine all player stats into a single DataFrame
if active_player_stats_list:
    active_wnba_season_stats_df = pd.concat(active_player_stats_list, ignore_index=True)
else:
    active_wnba_season_stats_df = pd.DataFrame()  # Empty dataframe if no data

active_wnba_season_stats_df.head()

In [None]:
active_wnba_season_stats_df = pd.read_csv("active_wnba_players_2024.csv")
active_wnba_season_stats_df.columns

In [None]:
# clean up year 
active_wnba_season_stats_df["Year"] = active_wnba_season_stats_df["SEASON_ID"].str[:4].astype(int)
active_wnba_season_stats_df.to_excel("active_wnba_players_2024.xlsx", index=False)
active_wnba_season_stats_df.head()

### Prepare Data for Radar Chart

In [None]:
#prepare data for radar chart 

# Select relevant columns for performance metrics
metrics_columns = ['PLAYER_AGE', 'GP', 'MIN', 'GS', 'FG_PCT', 'FG3_PCT', 'FT_PCT', 'OREB', 'DREB', 'AST', 'STL', 'BLK', 'TOV', 'PTS', 'PF']

df = active_wnba_season_stats_df.copy()

# Reshape data to long format for Tableau
df_original = df.melt(id_vars=["Player", "SEASON_ID"], 
                           value_vars=metrics_columns, 
                           var_name="Metric", 
                           value_name="Original Value")

# Apply percentile ranking (MinMax scaling) to normalize data for radar chart
scaler = MinMaxScaler()
df[metrics_columns] = scaler.fit_transform(df[metrics_columns]) * 100

# Reshape normalized data to long format for Tableau
df_percentile = df.melt(id_vars=["Player", "SEASON_ID"], 
                        value_vars=metrics_columns, 
                        var_name="Metric", 
                        value_name="Percentile Value")

# Merge original values with percentiles
df_final = pd.merge(df_original, df_percentile, on=["Player", "SEASON_ID", "Metric"])
    
    

# Save as CSV for Tableau
file_path = "wnba_2024_radar_chart_data_percentile.xlsx"
df_final.to_excel(file_path, index=False)