In [14]:
# Predicting NBA Salaries using machine learning via AWS softwares
# 1. Data Collection, reformat data to include salary as the last column along with all other items
# 2. Data Cleaning, remove all rows with missing values
# 3. Data Preprocessing, split data into training and testing sets, utilize 2024 free agent list as testing data
# 4. Model Selection, utilize Random Forest Regressor to predict salaries based on player statistics and GP, GS 
# 5. Model Training, train the model on the training data
# 6. Model Evaluation, evaluate model on the testing data
# 7. Model Tuning, tune the model to improve accuracy
# 8. Model Prediction, predict the salaries of the 2024 free agents
# 9. Model Deployment, deploy model to AWS S3 bucket
# 10. Model Monitoring, monitor model performance and update model as needed
# 11. Model Maintenance, maintain model by updating it as needed

# Software Utilization
# 1. AWS Redshift, store model data in Redshift
# 2. AWS SageMaker, train and evaluate model on SageMaker
# 3. AWS Lambda, run code on Lambda to predict salaries
# 4. AWS API Gateway, create an API to access the model
# 5. AWS CloudWatch, monitor model performance
# 6. Terraform, create a stack to deploy model
# 7. AWS IAM, manage permissions for model deployment
# 8. AWS SNS, send notifications for model updates
# 9. AWS SQS, manage model updates
# 10. AWS SES, send email notifications for model updates

# Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt






In [15]:

# Importing Data
stats = pd.read_csv('playerstats2425.csv')
salaries = pd.read_csv('salaries2425.csv')

print(stats.head())
print(salaries.head())

    Rk                   Player   Age Team Pos     G    GS      MP     FG  \
0  1.0              Luka Dončić  24.0  DAL  PG  70.0  70.0  2624.0  804.0   
1  2.0  Shai Gilgeous-Alexander  25.0  OKC  PG  75.0  75.0  2553.0  796.0   
2  3.0    Giannis Antetokounmpo  29.0  MIL  PF  73.0  73.0  2567.0  837.0   
3  4.0            Jalen Brunson  27.0  NYK  PG  77.0  77.0  2726.0  790.0   
4  5.0             Nikola Jokić  28.0  DEN   C  79.0  79.0  2737.0  822.0   

      FGA  ...    DRB    TRB    AST    STL   BLK    TOV     PF     PTS  \
0  1652.0  ...  588.0  647.0  686.0   99.0  38.0  282.0  149.0  2370.0   
1  1487.0  ...  350.0  415.0  465.0  150.0  67.0  162.0  184.0  2254.0   
2  1369.0  ...  645.0  841.0  476.0   87.0  79.0  250.0  210.0  2222.0   
3  1648.0  ...  235.0  278.0  519.0   70.0  13.0  186.0  144.0  2212.0   
4  1411.0  ...  753.0  976.0  708.0  108.0  68.0  237.0  194.0  2085.0   

                     Awards  Player-additional  
0         MVP-3CPOY-6ASNBA1          doncil

In [16]:
# Cleaning Stats Data
multi_team_players = stats[stats['Team'].isin(['2TM', '3TM'])]

three_team_players = stats[stats['Team'].isin(['3TM'])]
#print(multi_team_players)
print(three_team_players)

        Rk              Player   Age Team Pos     G   GS     MP     FG    FGA  \
309  254.0       Malachi Flynn  25.0  3TM  PG  69.0  0.0  876.0  136.0  325.0   
379  301.0    Danilo Gallinari  35.0  3TM  SF  49.0  0.0  630.0   83.0  190.0   
425  334.0        Shake Milton  27.0  3TM  SG  48.0  0.0  581.0   79.0  195.0   
478  368.0        Mike Muscala  32.0  3TM   C  53.0  6.0  600.0   56.0  156.0   
537  410.0  Kenneth Lofton Jr.  21.0  3TM  PF  21.0  0.0  199.0   36.0   78.0   
547  417.0      Kira Lewis Jr.  22.0  3TM  PG  28.0  0.0  265.0   34.0   92.0   
604  459.0       Dylan Windler  27.0  3TM  SF  17.0  0.0  108.0   15.0   30.0   

     ...   DRB    TRB    AST   STL   BLK   TOV    PF    PTS  Awards  \
309  ...  93.0  114.0  130.0  38.0   8.0  63.0  66.0  380.0     NaN   
379  ...  88.0  108.0   55.0  14.0   6.0  23.0  50.0  280.0     NaN   
425  ...  61.0   75.0   60.0  19.0   6.0  31.0  54.0  216.0     NaN   
478  ...  86.0  125.0   36.0   9.0  13.0  22.0  52.0  157.0     NaN

In [17]:
for index, row in multi_team_players.iterrows():
    # Find the last team for the player
    player_id = row['Player']
    last_team = stats[(stats['Player'] == player_id) & (stats['Team'] != '2TM')].iloc[-1]['Team']
    
    # Replace '2TM' with the last team
    stats.loc[index, 'Team'] = last_team
    
    # Drop all other rows for the player (keeping only the updated row)
    stats = stats.drop(stats[(stats['Player'] == player_id) & (stats.index != index)].index)
    print(stats)

        Rk                   Player   Age Team  Pos     G    GS      MP  \
0      1.0              Luka Dončić  24.0  DAL   PG  70.0  70.0  2624.0   
1      2.0  Shai Gilgeous-Alexander  25.0  OKC   PG  75.0  75.0  2553.0   
2      3.0    Giannis Antetokounmpo  29.0  MIL   PF  73.0  73.0  2567.0   
3      4.0            Jalen Brunson  27.0  NYK   PG  77.0  77.0  2726.0   
4      5.0             Nikola Jokić  28.0  DEN    C  79.0  79.0  2737.0   
..     ...                      ...   ...  ...  ...   ...   ...     ...   
731  569.0           Ron Harper Jr.  23.0  TOR   PF   1.0   0.0     4.0   
732  570.0           Justin Jackson  28.0  MIN   SF   2.0   0.0     1.0   
733  571.0        Dmytro Skapintsev  25.0  NYK    C   2.0   0.0     2.0   
734  572.0            Javonte Smart  24.0  PHI   PG   1.0   0.0     1.0   
735    NaN           League Average   NaN  NaN  NaN   NaN   NaN     NaN   

        FG     FGA  ...    DRB    TRB    AST    STL   BLK    TOV     PF  \
0    804.0  1652.0  ... 

In [21]:
remaining_multi_team = stats[stats['Team'].isin(['2TM', '3TM'])]
# 1. Check if there are any remaining multi-team entries
if remaining_multi_team.empty:
    print("Success! No '2TM' or '3TM' entries left.")
else:
    print("There are still multi-team entries:")
    print(remaining_multi_team)

# 2. Verify by inspecting specific players who had multiple teams
player_rows = stats[stats['Player'] == 'Danilo Gallinari']

# Display the rows to ensure only the correct team remains
print(player_rows)

Success! No '2TM' or '3TM' entries left.
        Rk            Player   Age Team Pos     G   GS     MP    FG    FGA  \
379  301.0  Danilo Gallinari  35.0  MIL  SF  49.0  0.0  630.0  83.0  190.0   

     ...   DRB    TRB   AST   STL  BLK   TOV    PF    PTS  Awards  \
379  ...  88.0  108.0  55.0  14.0  6.0  23.0  50.0  280.0     NaN   

     Player-additional  
379          gallida01  

[1 rows x 32 columns]


In [22]:
# Cleaning Salaries Data
sal = salaries[['Player', 'Team', '2024-25', 'Guaranteed', 'PlayerID']]
print(sal)

                    Player Team    2024-25  Guaranteed   PlayerID
0            Stephen Curry  GSW  $55761216  $115368033  curryst01
1             Nikola Jokić  DEN  $51415938  $165673578  jokicni01
2              Joel Embiid  PHI  $51415938  $106640464  embiijo01
3             Kevin Durant  PHO  $51179021  $105887630  duranke01
4             Bradley Beal  PHO  $50203930  $103870200   bealbr01
..                     ...  ...        ...         ...        ...
422           Juwan Morgan  IND    $576229     $576229  morgaju01
423            Malik Fitts  IND    $555216     $555216  fittsma01
424         Jordan Goodwin  BRK    $325590     $325590  goodwjo01
425           Didi Louzada  POR    $268032    $1340160  louzama01
426  Javon Freeman-Liberty  TOR    $100000     $100000  freemja01

[427 rows x 5 columns]


In [24]:
# Merge Stats and Salaries Data
data = pd.merge(stats, sal, left_on='Player-additional', right_on='PlayerID', how='inner')
print(data.head())
print(data.columns)

   Rk_x                 Player_x   Age Team_x Pos     G    GS      MP     FG  \
0   1.0              Luka Dončić  24.0    DAL  PG  70.0  70.0  2624.0  804.0   
1   2.0  Shai Gilgeous-Alexander  25.0    OKC  PG  75.0  75.0  2553.0  796.0   
2   3.0    Giannis Antetokounmpo  29.0    MIL  PF  73.0  73.0  2567.0  837.0   
3   4.0            Jalen Brunson  27.0    NYK  PG  77.0  77.0  2726.0  790.0   
4   5.0             Nikola Jokić  28.0    DEN   C  79.0  79.0  2737.0  822.0   

      FGA  ...                 Player_y  Team_y    2024-25    2025-26  \
0  1652.0  ...              Luka Dončić     DAL  $43031940  $45999660   
1  1487.0  ...  Shai Gilgeous-Alexander     OKC  $35859950  $38333050   
2  1369.0  ...    Giannis Antetokounmpo     MIL  $48787676  $54126450   
3  1648.0  ...            Jalen Brunson     NYK  $24960001  $34944001   
4  1411.0  ...             Nikola Jokić     DEN  $51415938  $55224526   

     2026-27    2027-28    2028-29  2029-30  Guaranteed   PlayerID  
0  $4896738