In [94]:
import pandas as pd
import numpy as np

import sklearn
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import RobustScaler

In [95]:
# import the data into a df
df = pd.read_csv("data/learning_data.csv")
data = df.to_numpy()

#shuffle the data
np.random.shuffle(data)

# split the data into inputs and labels
X = data[:, 1:20]
y = data[:, 20]
X = np.array(X, dtype=float)
y = np.array(y, dtype=float)

# normalize the data
scaler = RobustScaler()
X = scaler.fit_transform(X)
#y = scaler.fit_transform(y)

In [96]:
knn = KNeighborsRegressor(n_neighbors=5)
knn_mse = []
knn_mae = []

kfolds = KFold(n_splits=10);
for train_index,test_index in kfolds.split(X, y):
    ## split the data
    X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index];
    
    knn.fit(X_train, y_train);
    y_pred = np.around(knn.predict(X_test));
            
#     knn_mse.append(mean_squared_error(y_pred, y_test))
#     knn_mae.append(mean_absolute_error(y_pred, y_test))
    p_error = []
    for i in range(y_test.size):
        p_error.append(abs(y_test[i] - y_pred[i]) / y_test[i])
    
    knn_mae.append(sum(p_error) / y_test.size)
    
# print('Average mean squared error:', np.mean(knn_mse))
print('Average mean absolute error:', np.mean(knn_mae))

Average mean absolute error: 0.2811012129315923


In [97]:
import warnings
warnings.filterwarnings('ignore')

mlp = MLPRegressor(hidden_layer_sizes=(100,50,20), max_iter=300, activation='relu', random_state=1)
mlp_mse = []
mlp_mae = []

kfolds = KFold(n_splits=10);
for train_index,test_index in kfolds.split(X, y):
    ## split the data
    X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index];
    
    mlp.fit(X_train, y_train);
    y_pred = np.around(mlp.predict(X_test));
    
    p_error = []
    for i in range(y_test.size):
        p_error.append(abs(y_test[i] - y_pred[i]) / y_test[i])
    
    mlp_mae.append(sum(p_error) / y_test.size)
    
#print('Average mean squared error:', np.mean(mlp_mse))
print('Average mean absolute error:', np.mean(mlp_mae))

Average mean absolute error: 0.2563325259905026


In [98]:
import warnings
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import KFold
import numpy as np

warnings.filterwarnings('ignore')

# Initialize models
rf = RandomForestRegressor(random_state=1)
gb = GradientBoostingRegressor(random_state=1)

# Initialize error lists for each model
rf_mae = []
gb_mae = []

kfolds = KFold(n_splits=10)
for train_index, test_index in kfolds.split(X, y):
    # Split the data
    X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index]
    
    # Train and predict with RandomForestRegressor
    rf.fit(X_train, y_train)
    y_pred_rf = np.around(rf.predict(X_test))
    rf_error = [abs(y_test[i] - y_pred_rf[i]) / y_test[i] for i in range(y_test.size)]
    rf_mae.append(np.mean(rf_error))

    # Train and predict with GradientBoostingRegressor
    gb.fit(X_train, y_train)
    y_pred_gb = np.around(gb.predict(X_test))
    gb_error = [abs(y_test[i] - y_pred_gb[i]) / y_test[i] for i in range(y_test.size)]
    gb_mae.append(np.mean(gb_error))

# Print average mean absolute errors
print('Average mean absolute error for RandomForestRegressor:', np.mean(rf_mae))
print('Average mean absolute error for GradientBoostingRegressor:', np.mean(gb_mae))


Average mean absolute error for RandomForestRegressor: 0.26713150853624146
Average mean absolute error for GradientBoostingRegressor: 0.26304367107065435


In [99]:
# import the data into a df
test_df = pd.read_csv("data/Player_2022_cleaned_copy.csv")
test_data = test_df.to_numpy()

#shuffle the data
#np.random.shuffle(data)

# split the data into inputs and labels
X_test = test_data[:, 3:22]

X_test = np.array(X_test, dtype=float)

# normalize the data
scaler = RobustScaler()
X_test = scaler.fit_transform(X_test)
#y = scaler.fit_transform(y)

In [102]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor # or any other model you have used

# Make predictions
predictions = gb.predict(X_test)

# Add predictions to DataFrame and save to new CSV (optional)
test_df['Predictions'] = predictions
test_df.to_csv('data/predictions.csv', index=False)
