In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

In [24]:
# Read the CSV file into a pandas DataFrame
data2 = pd.read_csv('2022_2023_NBA_Player_Stats.csv', encoding='ISO-8859-1', delimiter=';')
data2

# Save the DataFrame to a new CSV file with the desired encoding
data2.to_csv('2022_2023_NBA_Player_Stats_Transformed.csv', encoding='utf-8', index=False)

In [25]:
data2.columns

Index(['Rk', 'Player', 'Pos', 'Age', 'Tm', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG%',
       '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA', 'FT%',
       'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS'],
      dtype='object')

In [26]:
# Select the features (X) and target variables (y) for points (PTS) and assists (AST)
features = data2[['Age', 'G', 'GS', 'MP', 'FG', 'FGA']]
target_pts = data2['PTS']
target_ast = data2['AST']


In [27]:
# Split the data into training and test sets
X_train, X_test, y_train_pts, y_test_pts, y_train_ast, y_test_ast = train_test_split(features, target_pts, target_ast, test_size=0.2, random_state=42)

In [28]:
# Create separate models for points and assists
model_pts = LinearRegression()

model_ast = LinearRegression()


# Train the models
model_pts.fit(X_train, y_train_pts)

model_ast.fit(X_train, y_train_ast)

LinearRegression()

In [29]:
# Make predictions for points and assists
pts_predictions = model_pts.predict(X_test)

ast_predictions = model_ast.predict(X_test)


In [30]:
player_names = data2['Player'].unique()

print(player_names)

['Precious Achiuwa' 'Steven Adams' 'Bam Adebayo' 'Ochai Agbaji'
 'Santi Aldama' 'Nickeil Alexander-Walker' 'Grayson Allen' 'Jarrett Allen'
 'Jose Alvarado' 'Kyle Anderson' 'Giannis Antetokounmpo'
 'Thanasis Antetokounmpo' 'Cole Anthony' 'OG Anunoby' 'Ryan Arcidiacono'
 'Deni Avdija' 'Deandre Ayton' 'Udoka Azubuike' 'Marvin Bagley III'
 'Patrick Baldwin Jr.' 'LaMelo Ball' 'Mo Bamba' 'Paolo Banchero'
 'Desmond Bane' 'Dalano Banton' 'Dominick Barlow' 'Harrison Barnes'
 'Scottie Barnes' 'RJ Barrett' 'Will Barton' 'Charles Bassey'
 'Keita Bates-Diop' 'Nicolas Batum' 'Darius Bazley' 'Bradley Beal'
 'Malik Beasley' 'MarJon Beauchamp' 'D?vis Bert?ns' 'Patrick Beverley'
 'Saddiq Bey' 'Khem Birch' 'Goga Bitadze' 'Bismack Biyombo'
 'Buddy Boeheim' 'Bogdan Bogdanovi?' 'Bojan Bogdanovi?' 'Bol Bol'
 'Leandro Bolmaro' 'Devin Booker' 'Brandon Boston Jr.' 'Chris Boucher'
 'James Bouknight' 'Jamaree Bouyea' 'Tony Bradley' 'Malaki Branham'
 'Jarrell Brantley' 'Christian Braun' 'Mikal Bridges' 'Oshae Bris

In [31]:
# Print the actual and predicted points and assists for a sample player
player_name = input("Enter the player's name: ")
player_row = data2[data2['Player'] == player_name]
player_features = player_row[['Age', 'G', 'GS', 'MP', 'FG', 'FGA']]

In [32]:
# Get the actual points and assists for the player
pts_actual = player_row['PTS'].values[0]
ast_actual = player_row['AST'].values[0]

# Predict points and assists for the player
pts_predicted = model_pts.predict(player_features)[0]
ast_predicted = model_ast.predict(player_features)[0]

print("Player:", player_name)
print("2022-2023 Points:", pts_actual)
print("2023-2024 Predicted Points:", pts_predicted)
print("2022-2023 Assists:", ast_actual)
print("2023-2024 Predicted Assists:", ast_predicted)

Player: Zion Williamson
2022-2023 Points: 26.0
2023-2024 Predicted Points: 25.64097220997928
2022-2023 Assists: 4.6
2023-2024 Predicted Assists: 3.9469072449561


In [33]:
# Create the Random Forest Regressor
rf = RandomForestRegressor()

# Fit the model to the training data for points
rf.fit(X_train, y_train_pts)

# Get the feature importances for points
feature_importances_pts = rf.feature_importances_

# Fit the model to the training data for assists
rf.fit(X_train, y_train_ast)

# Get the feature importances for assists
feature_importances_ast = rf.feature_importances_

# Average the feature importances for both targets
average_feature_importances = (feature_importances_pts + feature_importances_ast) / 2

# Sort the features based on importance
sorted_indices = average_feature_importances.argsort()[::-1]

# Get the names of the selected features
selected_features = features.columns[sorted_indices]

# Print the selected features
print("Selected Features:")
print(selected_features)


Selected Features:
Index(['FG', 'MP', 'FGA', 'Age', 'G', 'GS'], dtype='object')
