In [37]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder


# URL of the webpage containing the table
url = 'https://www.basketball-reference.com/players/l/lavinza01/gamelog/2024'

# Reading tables from the webpage
tables_list = pd.read_html(url)

# Selecting the desired table
table_data = tables_list[7]

team_label_encoder = LabelEncoder()
def preProcessData(df):
  # pre process data
  df['Date'] = pd.to_datetime(df['Date']).astype(int) // 10**9
  df['Age'] = df['Age'].str.replace('-', '').astype(int)
  # Assign encoded labels to the 'Opp' column
  df['Tm'] = team_label_encoder.fit_transform(df['Tm'])
  df['Opp'] = team_label_encoder.fit_transform(df['Opp'])
  # dont need the @ or the L and stuff
  df = df.rename(columns={df.columns[5]: 'At'})
  df['At'] = df['At'].apply(lambda x: 1 if '@' in str(x) else 0)
  df = df.drop(df.columns[7], axis=1)
  df['MP'] = df['MP'].astype(str)
  df['MP'] = df['MP'].str.replace(':', '').astype(int)

  # return processed data
  return df

table_data = preProcessData(table_data)

# Setting the rows we want to predict
predictedValues = [ 'FG',	'FGA'	, 'FG%',	'3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'ORB','DRB',
                   'TRB',	'AST',	'STL',	'BLK',	'TOV',	'PF',	'PTS',	'GmSc',	'+/-', 'MP', 'Rk']

# Separate features (X) and the target variable (y)
X = table_data.drop(predictedValues, axis=1)  # Independant Features

y = table_data[predictedValues]  # Target variables to predict

# Initialize and train the RandomForestRegressor
model = RandomForestRegressor()
model.fit(X, y)


In [40]:
new_data = {
    "Rk" : 0,
    'G' : 13,
    "Date": '2023-11-16',
    'Age': "28-251",
    "Tm" : "CHI",
    "At": '',
    "Opp":"ORL",
    "GS": 1,
    "MP": 2022
}

def preProcessNewRow(df):
  # pre process data
  df['Date'] = pd.to_datetime(df['Date']).astype(int) // 10**9
  df['Age'] = df['Age'].str.replace('-', '').astype(int)
  # Assign encoded labels to the 'Opp' column
  df['Tm'] = team_label_encoder.fit_transform(df['Tm'])
  df['Opp'] = team_label_encoder.fit_transform(df['Opp'])
  # dont need the @ or the L and stuff
  df = df.rename(columns={df.columns[5]: 'At'})
  df['At'] = df['At'].apply(lambda x: 1 if '@' in str(x) else 0)
  #df = df.drop(df.columns[7], axis=1)
  df['MP'] = df['MP'].astype(str)
  df['MP'] = df['MP'].str.replace(':', '').astype(int)

  # pre processed
  return df
# Convert the new_row_features to a DataFrame for prediction
new_row = pd.DataFrame([new_data])
# Preprocess the new_row_features for prediction
new_row = preProcessNewRow(new_row)

new_row = new_row.drop(['MP', 'Rk'], axis=1)

predicted_points = model.predict(new_row)

cleaned_output = ['{:.2f}'.format(val) for val in predicted_points[0]]

for title, number in zip(predictedValues, cleaned_output):
    print(f'{title}: {number}')



FG: 6.25
FGA: 15.69
FG%: 0.40
3P: 2.04
3PA: 6.44
3P%: 0.30
FT: 4.26
FTA: 4.91
FT%: 0.84
ORB: 0.24
DRB: 4.79
TRB: 5.03
AST: 3.39
STL: 1.02
BLK: 0.22
TOV: 1.80
PF: 1.66
PTS: 18.80
GmSc: 12.74
+/-: -2.91
MP: 3511.88
Rk: 9.38
