In [1]:
# imports, setup
import pandas as pd
import numpy as np

from sklearn.model_selection import KFold, cross_validate, GridSearchCV
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, r2_score, make_scorer

import matplotlib.pyplot as plt

# load data
df = pd.read_csv("steel.csv")

# features/target
X = df.drop("tensile_strength", axis=1)
y = df["tensile_strength"]

scoring = {
    "R2": make_scorer(r2_score),
    "MAE": make_scorer(mean_absolute_error)
}

kfold = KFold(n_splits=10, shuffle=True, random_state=1)

In [2]:
#SVR

In [3]:
svr_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svr', SVR())
])

svr_default_scores = cross_validate(
    svr_pipeline, X, y, cv=kfold,
    scoring=scoring, return_train_score=True
)

print("SVR Default Results")
print("Average Train RÂ²:", svr_default_scores['train_R2'].mean())
print("Average Test RÂ²:", svr_default_scores['test_R2'].mean())
print("Average Train MAE:", svr_default_scores['train_MAE'].mean())
print("Average Test MAE:", svr_default_scores['test_MAE'].mean())

svr_params = {
    "svr__C": [0.1, 1, 10, 100],
    "svr__gamma": ["scale", 0.01, 0.1, 1]
}

svr_grid = GridSearchCV(
    svr_pipeline,
    param_grid=svr_params,
    scoring="neg_mean_absolute_error",
    cv=kfold,
    n_jobs=-1
)

svr_grid.fit(X, y)

print("\nBest SVR hyperparameters:", svr_grid.best_params_)
print("Best CV MAE:", -svr_grid.best_score_)


ðŸ”¹ SVR Default Results
Average Train RÂ²: 0.24317282407379848
Average Test RÂ²: 0.20734036745248044
Average Train MAE: 61.723993433434394
Average Test MAE: 62.889022680030806

 Best SVR hyperparameters: {'svr__C': 100, 'svr__gamma': 'scale'}
Best CV MAE: 25.2484419747283


In [4]:
#KNNR

In [5]:
knn_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsRegressor())
])

knn_default_scores = cross_validate(
    knn_pipeline, X, y, cv=kfold,
    scoring=scoring, return_train_score=True
)

print("\nKNN Default Results")
print("Average Train RÂ²:", knn_default_scores['train_R2'].mean())
print("Average Test RÂ²:", knn_default_scores['test_R2'].mean())
print("Average Train MAE:", knn_default_scores['train_MAE'].mean())
print("Average Test MAE:", knn_default_scores['test_MAE'].mean())

knn_params = {
    "knn__n_neighbors": [2, 3, 5, 7, 10, 12, 15],
    "knn__weights": ["uniform", "distance"]
}

knn_grid = GridSearchCV(
    knn_pipeline,
    param_grid=knn_params,
    scoring="neg_mean_absolute_error",
    cv=kfold,
    n_jobs=-1
)

knn_grid.fit(X, y)

print("\nBest KNN hyperparameters:", knn_grid.best_params_)
print("Best CV MAE:", -knn_grid.best_score_)



ðŸ”¹ KNN Default Results
Average Train RÂ²: 0.8579134967354001
Average Test RÂ²: 0.7618873050552063
Average Train MAE: 26.108937775942707
Average Test MAE: 33.3660419606476

Best KNN hyperparameters: {'knn__n_neighbors': 7, 'knn__weights': 'distance'}
Best CV MAE: 30.322820302103857
