<a href="https://colab.research.google.com/github/sofiebudman/ML/blob/main/Basketball.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Basketball ML Project**

## **Predict points based on statistics**

### Data

In [4]:
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/dataprofessor/data/master/NBA_2021.csv')
df = df.dropna()
df = df.reset_index(drop=True)
df


Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,Precious Achiuwa,PF,21,MIA,61,4,12.1,2.0,3.7,0.544,...,0.509,1.2,2.2,3.4,0.5,0.3,0.5,0.7,1.5,5.0
1,Steven Adams,C,27,NOP,58,58,27.7,3.3,5.3,0.614,...,0.444,3.7,5.2,8.9,1.9,0.9,0.7,1.3,1.9,7.6
2,Bam Adebayo,C,23,MIA,64,64,33.5,7.1,12.5,0.570,...,0.799,2.2,6.7,9.0,5.4,1.2,1.0,2.6,2.3,18.7
3,LaMarcus Aldridge,C,35,TOT,26,23,25.9,5.4,11.4,0.473,...,0.872,0.7,3.8,4.5,1.9,0.4,1.1,1.0,1.8,13.5
4,LaMarcus Aldridge,C,35,SAS,21,18,25.9,5.5,11.8,0.464,...,0.838,0.8,3.7,4.5,1.7,0.4,0.9,1.0,1.7,13.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
642,Delon Wright,PG,28,SAC,27,8,25.8,3.9,8.3,0.462,...,0.833,1.0,2.9,3.9,3.6,1.6,0.4,1.3,1.1,10.0
643,Thaddeus Young,PF,32,CHI,68,23,24.3,5.4,9.7,0.559,...,0.628,2.5,3.8,6.2,4.3,1.1,0.6,2.0,2.2,12.1
644,Trae Young,PG,22,ATL,63,63,33.7,7.7,17.7,0.438,...,0.886,0.6,3.3,3.9,9.4,0.8,0.2,4.1,1.8,25.3
645,Cody Zeller,C,28,CHO,48,21,20.9,3.8,6.8,0.559,...,0.714,2.5,4.4,6.8,1.8,0.6,0.4,1.1,2.5,9.4


In [5]:
y = df['PTS']
y

0       5.0
1       7.6
2      18.7
3      13.5
4      13.7
       ... 
642    10.0
643    12.1
644    25.3
645     9.4
646     9.0
Name: PTS, Length: 647, dtype: float64

In [None]:
X = df.drop(['Age','PTS','Player','Pos','Tm', 'FG','FGA', '3P', '3PA', '2P', '2PA', 'eFG%', 'FT', 'FTA','ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF'], axis = 1)
X

Unnamed: 0,G,GS,MP,FG%,3P%,2P%,FT%
0,61,4,12.1,0.544,0.000,0.546,0.509
1,58,58,27.7,0.614,0.000,0.620,0.444
2,64,64,33.5,0.570,0.250,0.573,0.799
3,26,23,25.9,0.473,0.388,0.505,0.872
4,21,18,25.9,0.464,0.360,0.509,0.838
...,...,...,...,...,...,...,...
642,27,8,25.8,0.462,0.398,0.500,0.833
643,68,23,24.3,0.559,0.267,0.580,0.628
644,63,63,33.7,0.438,0.343,0.491,0.886
645,48,21,20.9,0.559,0.143,0.598,0.714


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state = 100)

In [None]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train,y_train)

### Apply model to make prediction


In [None]:
y_lr_train_pred = lr.predict(X_train)
y_lr_test_pred = lr.predict(X_test)

### Evaluate Performance

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

lr_train_mse = mean_squared_error(y_train, y_lr_train_pred)
lr_train_r2 = r2_score(y_train, y_lr_train_pred)

lr_test_mse = mean_squared_error(y_test, y_lr_test_pred)
lr_test_r2 = r2_score(y_test, y_lr_test_pred)

In [None]:
print('LR_MSE (Train):', lr_train_mse)
print('LR_R2 (Train):', lr_train_r2)
print('LR_MSE (Test):', lr_test_mse)
print('LR_R2 (Test):', lr_test_r2)

LR_MSE (Train): 8.57101726424157
LR_R2 (Train): 0.7822496044139352
LR_MSE (Test): 8.120058887041282
LR_R2 (Test): 0.7723591125722578


In [None]:
lr_results = pd.DataFrame(['Linear regression', lr_train_mse, lr_train_r2, lr_test_mse, lr_test_r2]).transpose()
lr_results.columns = ['Method', 'Training MSE', 'Training R2', 'Test MSE', 'Test R2']

In [None]:
lr_results

Unnamed: 0,Method,Training MSE,Training R2,Test MSE,Test R2
0,Linear regression,8.571017,0.78225,8.120059,0.772359


### **Custom**

In [None]:
custom_data = {
    'G': [25],
    'GS': [10],
    'MP': [15],
    'FG%': [0.8],
    '3P%': [0.1],
    '2P%': [0.3],
    'FT%': [0.5]
}
custom_df = pd.DataFrame(custom_data)
custom_df

Unnamed: 0,G,GS,MP,FG%,3P%,2P%,FT%
0,25,10,15,0.8,0.1,0.3,0.5


In [None]:
stats_pred = lr.predict(custom_df)
stats_pred[0]

9.333714851009972