<a href="https://colab.research.google.com/github/kanikasp113/kanikasp113/blob/main/2022_2023_NBA_Analzyer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
import sklearn.metrics
from sklearn import datasets


In [None]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

raw_data = pd.read_csv('./nba_stats_2022.csv')
raw_data

Unnamed: 0,RANK,NAME,TEAM,POS,AGE,GP,MPG,USG%,TO%,FTA,...,APG,SPG,BPG,TPG,P+R,P+A,P+R+A,VI,ORtg,DRtg
0,,Joel Embiid,Phi,C-F,29.1,66,34.6,37.0,14.5,771,...,4.2,1.0,1.7,3.4,43.2,37.2,47.4,13.0,124.4,104.1
1,,Luka Doncic,Dal,F-G,24.1,66,36.2,37.7,14.0,694,...,8.0,1.4,0.5,3.6,41.0,40.4,49.0,14.4,120.0,109.2
2,,Damian Lillard,Por,G,32.7,58,36.3,33.8,13.7,558,...,7.3,0.9,0.3,3.3,36.9,39.5,44.3,11.5,126.4,117.6
3,,Shai Gilgeous-Alexander,Okc,G-F,24.7,68,35.5,32.8,12.2,739,...,5.5,1.6,1.0,2.8,36.2,36.9,41.7,10.6,124.9,109.8
4,,Giannis Antetokounmpo,Mil,F,28.3,63,32.1,38.8,16.2,772,...,5.7,0.8,0.8,3.9,42.9,36.8,48.6,15.9,116.8,100.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
604,,Michael Foster Jr.,Phi,F,20.2,1,1.0,0.0,,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,
605,,Frank Jackson,Uta,G,24.9,1,5.1,24.6,0.0,0,...,1.0,0.0,0.0,0.0,2.0,1.0,3.0,0.0,48.0,120.2
606,,Alondes Williams,Bro,G,23.8,1,5.3,16.7,100.0,0,...,0.0,0.0,0.0,2.0,1.0,0.0,1.0,0.0,,97.7
607,,Jamaree Bouyea,Was,G,23.8,1,5.5,7.9,0.0,0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,99.9


In [None]:
import pandas as pd



# Dictionary mapping positions to their respective weight mappings
position_weights = {
    'G': {'PPG': 0.3, 'FG': 0.3, 'APG': 0.15, 'FTA': 0.15, 'RPG': 0.1},
    'SG': {'PPG': 0.3, 'FG': 0.3, 'FTA': 0.15, 'APG': 0.15, 'RPG': 0.1},
    'SF': {'PPG': 0.3, 'FG': 0.3, 'RPG': 0.15, 'APG': 0.15, 'FTA': 0.1},
    'PF': {'PPG': 0.3, 'RPG': 0.3, 'BPG': 0.15, 'APG': 0.15, 'SPG': 0.1},
    'C': {'RPG': 0.3, 'BPG': 0.3, 'PPG': 0.2, 'SPG': 0.1, 'FTA': 0.1}
}

def calculate_weighted_rating(player_row):
    position = player_row['POS']
    # Check if the position is directly in the position_weights dictionary
    if position in position_weights:
        weights = position_weights[position]
    elif position in ['F', 'SF', 'PF']:  # Forwards
        weights = position_weights['SF']  # Default to SF if specific forward position is not present
    elif position in ['G', 'SG', 'PG']:  # Guards
        weights = position_weights['G']   # Default to G if specific guard position is not present
    elif position == 'G-F' or position == 'F-G':  # Guard-Forwards
        # Average the weights for 'G' and 'SF'
        weights = {stat: (position_weights['G'].get(stat, 0) + position_weights['SF'].get(stat, 0)) / 2
                   for stat in set(position_weights['G']) | set(position_weights['SF'])}
    elif position == 'C-F' or position == 'F-C':
        # Example: Average the weights for 'C' and 'SF'
        weights = {stat: (position_weights['C'].get(stat, 0) + position_weights['SF'].get(stat, 0)) / 2
                   for stat in set(position_weights['C']) | set(position_weights['SF'])}
    else:
        print(f"Unhandled position: {position}")  # Debug: print out any unhandled positions
        return 0  # Return 0 for unhandled positions

    # Calculate the weighted rating
    rating = sum(player_row.get(stat, 0) * weight for stat, weight in weights.items())

    # Debug: print out the rating calculation for the first few players
    if player_row.name < 5:
        print(f"Calculating rating for {player_row['NAME']}, POS: {position}")
        for stat, weight in weights.items():
            print(f"Stat: {stat}, Weight: {weight}, Value: {player_row.get(stat, 0)}")
        print(f"Rating: {rating}\n")

    return rating



def load_data(file_path):
    df = pd.read_csv(file_path)

    # Calculate the rating if not already present in the CSV
    # This assumes that all necessary stats (PPG, FG, etc.) are available in the CSV
    if 'Rating' not in df.columns:
        df['Rating'] = df.apply(calculate_weighted_rating, axis=1)
    return df

# Calculate ratings for each player in the dataset
def calculate_player_ratings(dataframe):
    dataframe['Rating'] = dataframe.apply(calculate_weighted_rating, axis=1)
    return dataframe[['NAME', 'TEAM', 'POS', 'Rating']]

# Main function to run the program
def main():
    file_path = 'nba_stats_2022.csv'  # Update with the actual path to your data file
    player_stats = load_data(file_path)
    player_rankings = calculate_player_ratings(player_stats)

    # At this point, you'd continue to develop the model for predicting team matchups
    # For example, you could aggregate player ratings by team, consider head-to-head statistics,
    # and use historical matchup outcomes to train a predictive model

    print(player_rankings)

if __name__ == '__main__':
    main()


Calculating rating for Joel Embiid, POS: C-F
Stat: BPG, Weight: 0.15, Value: 1.7
Stat: APG, Weight: 0.075, Value: 4.2
Stat: PPG, Weight: 0.25, Value: 33.1
Stat: FG, Weight: 0.15, Value: 0
Stat: SPG, Weight: 0.05, Value: 1.0
Stat: FTA, Weight: 0.1, Value: 771
Stat: RPG, Weight: 0.22499999999999998, Value: 10.2
Rating: 88.29

Calculating rating for Luka Doncic, POS: F-G
Stat: APG, Weight: 0.15, Value: 8.0
Stat: PPG, Weight: 0.3, Value: 32.4
Stat: FG, Weight: 0.3, Value: 0
Stat: FTA, Weight: 0.125, Value: 694
Stat: RPG, Weight: 0.125, Value: 8.6
Rating: 98.745

Calculating rating for Damian Lillard, POS: G
Stat: PPG, Weight: 0.3, Value: 32.2
Stat: FG, Weight: 0.3, Value: 0
Stat: APG, Weight: 0.15, Value: 7.3
Stat: FTA, Weight: 0.15, Value: 558
Stat: RPG, Weight: 0.1, Value: 4.8
Rating: 94.935

Calculating rating for Shai Gilgeous-Alexander, POS: G-F
Stat: APG, Weight: 0.15, Value: 5.5
Stat: PPG, Weight: 0.3, Value: 31.4
Stat: FG, Weight: 0.3, Value: 0
Stat: FTA, Weight: 0.125, Value: 739


In [None]:
import pandas as pd

# Assume 'position_weights' and 'calculate_weighted_rating' are defined as above...


def load_data(file_path):
    return pd.read_csv(file_path)

def display_teams(player_stats):
    teams = player_stats['TEAM'].unique()
    print("NBA Teams:")
    for team in sorted(teams):
        print(team)
    print("\n")

def get_team_selection(teams):
    team1 = input("Select the first team: ")
    while team1 not in teams:
        print("Team not recognized. Please try again.")
        team1 = input("Select the first team: ")

    team2 = input("Select the second team: ")
    while team2 not in teams or team2 == team1:
        print("Team not recognized or same as the first team. Please try again.")
        team2 = input("Select the second team: ")

    return team1, team2


def calculate_team_strength(player_stats, team_stats, team, top_n_players=7) :
    # Select top N players based on rating
    team_players = player_stats[player_stats['TEAM'] == team].sort_values(by='Rating', ascending=False).head(top_n_players)

    # Apply a decaying weight to top players (i.e., best player has more weight)
    weights = [1 - 0.2 * i for i in range(top_n_players)]

    # Calculate the combined strength of top N players
    team_strength_from_players = sum(rating * weight for rating, weight in zip(team_players['Rating'], weights))

    # Ensure the team exists in the team_stats DataFrame
    if team in team_stats['TEAM'].values:
        # Fetch team win-loss ratio safely and apply as a weightage
        team_record = team_stats[team_stats['TEAM'] == team]
        if not team_record.empty:
            # Typical calculation of win percentage
            team_win_loss_ratio = team_record['W'].iloc[0] / (team_record['W'].iloc[0] + team_record['L'].iloc[0])

            # Combine player strength with team performance
            team_strength = team_strength_from_players * (1 + team_win_loss_ratio)  # Adjust the weightage as necessary
        else:
            team_strength = team_strength_from_players  # Fallback in case team record is not found
    else:
        print(f"Team '{team}' not found in team statistics.")
        team_strength = team_strength_from_players  # Fallback in case team is not in team_stats

    return team_strength

def predict_matchup(player_stats, team_stats, team1, team2):
    team1_strength = calculate_team_strength(player_stats, team_stats, team1, top_n_players=7)
    team2_strength = calculate_team_strength(player_stats, team_stats, team2, top_n_players=7)

    total_strength = team1_strength + team2_strength
    team1_probability = round((team1_strength / total_strength) * 100)
    team2_probability = 100 - team1_probability
    print(f"\n{team1} Total Strength: {team1_strength}")
    print(f"{team2} Total Strength: {team2_strength}")
    print(f"\n{team1} has a {team1_probability}% chance of winning.")
    print(f"{team2} has a {team2_probability}% chance of winning.")

    if team1_strength > team2_strength:
        print(f"\nPredicted Winner: {team1}")
    elif team2_strength > team1_strength:
        print(f"\nPredicted Winner: {team2}")
    else:
        print("\nIt's a tie!")

def main():
    player_file_path = 'nba_stats_2022.csv'
    team_stats_file_path = 'team_stats2022.csv'

    # Load player and team stats
    player_stats = load_data(player_file_path)
    team_stats = pd.read_csv(team_stats_file_path)

    # Standardize team names in both DataFrames
    team_stats['TEAM'] = team_stats['TEAM'].str.upper().str.strip()
    player_stats['TEAM'] = player_stats['TEAM'].str.upper().str.strip()

    # Calculate player ratings after loading the data

    player_stats = calculate_player_ratings(player_stats)

    # Calculate win-loss ratio for team stats
    team_stats['WIN%'] = team_stats['W'] / (team_stats['L'] + 1)  # Prevent division by zero

    # Display available teams
    display_teams(player_stats)

    # User selects two teams to match up
    teams = player_stats['TEAM'].unique()
    team1, team2 = get_team_selection(teams)

    # Predict the outcome of the matchup
    predict_matchup(player_stats, team_stats, team1, team2)

if __name__ == '__main__':
    main()


Calculating rating for Joel Embiid, POS: C-F
Stat: BPG, Weight: 0.15, Value: 1.7
Stat: APG, Weight: 0.075, Value: 4.2
Stat: PPG, Weight: 0.25, Value: 33.1
Stat: FG, Weight: 0.15, Value: 0
Stat: SPG, Weight: 0.05, Value: 1.0
Stat: FTA, Weight: 0.1, Value: 771
Stat: RPG, Weight: 0.22499999999999998, Value: 10.2
Rating: 88.29

Calculating rating for Luka Doncic, POS: F-G
Stat: APG, Weight: 0.15, Value: 8.0
Stat: PPG, Weight: 0.3, Value: 32.4
Stat: FG, Weight: 0.3, Value: 0
Stat: FTA, Weight: 0.125, Value: 694
Stat: RPG, Weight: 0.125, Value: 8.6
Rating: 98.745

Calculating rating for Damian Lillard, POS: G
Stat: PPG, Weight: 0.3, Value: 32.2
Stat: FG, Weight: 0.3, Value: 0
Stat: APG, Weight: 0.15, Value: 7.3
Stat: FTA, Weight: 0.15, Value: 558
Stat: RPG, Weight: 0.1, Value: 4.8
Rating: 94.935

Calculating rating for Shai Gilgeous-Alexander, POS: G-F
Stat: APG, Weight: 0.15, Value: 5.5
Stat: PPG, Weight: 0.3, Value: 31.4
Stat: FG, Weight: 0.3, Value: 0
Stat: FTA, Weight: 0.125, Value: 739
