In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
df = pd.read_csv('filtered.csv')

# Define features (X) and target variables (y)
X = df[['age', 'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking', 'goalkeeping_positioning', 'goalkeeping_reflexes', 'goalkeeping_speed']]
y_ability = df['overall']
y_potential = df['potential']
print(y_ability)
print(y_potential)


X_train, X_test, y_ability_train, y_ability_test, y_potential_train, y_potential_test = train_test_split(
    X, y_ability, y_potential, test_size=0.2
)
# Remove rows with missing values from both X_train and y_ability_train
from sklearn.impute import SimpleImputer

# Instantiate the imputer
imputer = SimpleImputer(strategy='mean')

# Fit and transform the imputer on X_train
X_train = imputer.fit_transform(X_train)

# Transform X_test as well to ensure consistency
X_test = imputer.transform(X_test)

# Imputing may change the index, so reset the index to align them
y_ability_train = y_ability_train.reset_index(drop=True)
y_ability_test = y_ability_test.reset_index(drop=True)

# Now you can proceed with model training and prediction
ability_model = LinearRegression()
ability_model.fit(X_train, y_ability_train)
potential_model = LinearRegression()
potential_model.fit(X_train, y_potential_train)

# Make predictions
ability_predictions = ability_model.predict(X_test)
potential_predictions = potential_model.predict(X_test)

# Evaluate the models
ability_rmse = mean_squared_error(y_ability_test, ability_predictions, squared=False)
potential_rmse = mean_squared_error(y_potential_test, potential_predictions, squared=False)
from sklearn.metrics import mean_absolute_error
ability_mae = mean_absolute_error(y_ability_test, ability_predictions)
potential_mae = mean_absolute_error(y_potential_test, potential_predictions)

print("Ability RMSE:", ability_rmse)
print("Potential RMSE:", potential_rmse)
print("Ability MAE:", ability_mae)
print("Potential MAE:", potential_mae)



0        93
1        92
2        91
3        91
4        91
         ..
19234    47
19235    47
19236    47
19237    47
19238    47
Name: overall, Length: 19239, dtype: int64
0        93
1        92
2        91
3        91
4        91
         ..
19234    52
19235    59
19236    55
19237    60
19238    60
Name: potential, Length: 19239, dtype: int64
