In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler, PolynomialFeatures
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
import joblib

In [2]:
# 1. Load data
df = pd.read_csv('../Servo_Mechanism.csv', on_bad_lines='skip')
X = df.drop('Class', axis=1)
y = df['Class']

In [3]:
# 2. Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# 3. Preprocessing + polynomial expansion
numeric_features = ['Pgain', 'Vgain']
categorical_features = ['Motor', 'Screw']

preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
    ('num', Pipeline([
        ('scale', StandardScaler()),
        ('poly', PolynomialFeatures(degree=2, include_bias=False))
    ]), numeric_features)
])

In [5]:
# 4. Pipeline + GridSearch over alpha
pipeline = Pipeline([
    ('pre', preprocessor),
    ('model', Ridge())
])

param_grid = {
    'model__alpha': [0.1, 1.0, 10.0, 50.0, 100.0]
}

grid = GridSearchCV(
    pipeline,
    param_grid,
    cv=5,
    scoring='r2',
    n_jobs=-1,
    verbose=1
)

In [6]:
# 5. Train & evaluate
grid.fit(X_train, y_train)
best_alpha = grid.best_params_['model__alpha']
print(f"Best alpha: {best_alpha}")

Fitting 5 folds for each of 5 candidates, totalling 25 fits
Best alpha: 1.0


In [7]:
# Evaluate
y_pred = grid.predict(X_test)
print("Improved Ridge Regression:")
print(f"  MSE: {mean_squared_error(y_test, y_pred):.3f}")
print(f"  R2 : {r2_score(y_test, y_pred):.3f}")

Improved Ridge Regression:
  MSE: 38.925
  R2 : 0.834


In [8]:
# Save
joblib.dump(pipeline, 'linear_regression_model.pkl')

['linear_regression_model.pkl']