In [None]:
import pandas as pd

file_path = './data/filtered_data.csv'
data = pd.read_csv(file_path)
print(data.head())


In [None]:
print(data.info())
data = data.dropna(subset=['Age', 'BodyweightKg', 'Best3SquatKg'])


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

# Preparing the data
X = data[['Age', 'BodyweightKg']]
y = data['Best3SquatKg']

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Function to train and evaluate a model
def train_and_evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    print(f'Model: {model.__class__.__name__}, MSE: {mse}')

# Models to train
models = [LinearRegression(), Ridge(), Lasso(), DecisionTreeRegressor()]

# Train and evaluate each model
for model in models:
    train_and_evaluate_model(model, X_train, X_test, y_train, y_test)


In [None]:
from math import sqrt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

features = data[['Age', 'BodyweightKg']]
target = data['Best3SquatKg']

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Pruning the tree by setting max_depth
tree_model = DecisionTreeRegressor(max_depth=5, random_state=42)
tree_model.fit(X_train, y_train)

predictions = tree_model.predict(X_test)
rmse = sqrt(mean_squared_error(y_test, predictions))
print(f'Root Mean Squared Error: {rmse}')

# Visualize the tree (simplified)
plt.figure(figsize=(20,10))
plot_tree(tree_model, feature_names=['Age', 'BodyweightKg'], filled=True, max_depth=3)
plt.show()



In [None]:
from math import sqrt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Assuming 'data' is your DataFrame

features = data[['Age', 'BodyweightKg']]
target = data['Best3SquatKg']

# Splitting the data
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Generating polynomial features
poly_degree = 2
poly_features = PolynomialFeatures(degree=poly_degree)

X_train_poly = poly_features.fit_transform(X_train)
X_test_poly = poly_features.transform(X_test)

# Training the model
poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)

# Making predictions and evaluating the model
predictions = poly_model.predict(X_test_poly)

# Evaluation metrics
rmse = sqrt(mean_squared_error(y_test, predictions))
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

# Print the report
print(f"Evaluation Metrics for Polynomial Regression (Degree {poly_degree}):")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R-squared (R²): {r2:.2f}")