In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
import joblib

In [12]:
# Load dataset
df = pd.read_csv('../Servo_Mechanism.csv', on_bad_lines='skip')

In [13]:
# Define features and target
y = df['Class']
X = df.drop('Class', axis=1)

In [14]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
# Preprocessing: One-hot encode categorical and scale numeric
categorical_features = ['Motor', 'Screw']
numeric_features = ['Pgain', 'Vgain']

preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(), categorical_features),
    ('num', StandardScaler(), numeric_features)
])

In [16]:
# Use this if you are traning the model here only

# # Define models in pipelines
# def make_pipeline(model):
#     return Pipeline([('pre', preprocessor), ('model', model)])

# models = {
#     'linear_regression': make_pipeline(LinearRegression()),
#     'decision_tree': make_pipeline(DecisionTreeRegressor(random_state=42)),
#     'random_forest': make_pipeline(RandomForestRegressor(n_estimators=100, random_state=42)),
#     'gradient_boosting': make_pipeline(GradientBoostingRegressor(n_estimators=100, random_state=42)),
#     'svr': make_pipeline(SVR())
# }


# # Train each model and save
# for name, pipeline in models.items():
#     print(f"Training {name}...")
#     pipeline.fit(X_train, y_train)
#     y_pred = pipeline.predict(X_test)
#     mse = mean_squared_error(y_test, y_pred)
#     r2 = r2_score(y_test, y_pred)
#     print(f"{name} -- MSE: {mse:.3f}, R2: {r2:.3f}\n")
#     joblib.dump(pipeline, f'{name}_model.pkl')

In [17]:
# # Ensemble: load saved models and average predictions
# ensemble_models = []
# for name in models.keys():
#     ensemble_models.append(joblib.load(f'{name}_model.pkl'))

In [18]:
# Use this if you have already trained models
# Define model paths
model_paths = {
    'linear_regression': '../Linear Regression/linear_regression_model.pkl',
    'decision_tree': '../Decision Tree/decision_tree_model.pkl',
    'random_forest': '../Random Forest/random_forest_model.pkl',
    'gradient_boosting': '../Gradient Boosting/gradient_boosting_model.pkl',
    'svr': '../Svr/svr_model.pkl'
}

# Load models
ensemble_models = []
for name, path in model_paths.items():
    print(f"Loading model: {name} from {path}")
    model = joblib.load(path)
    ensemble_models.append(model)

Loading model: linear_regression from ../Linear Regression/linear_regression_model.pkl
Loading model: decision_tree from ../Decision Tree/decision_tree_model.pkl
Loading model: random_forest from ../Random Forest/random_forest_model.pkl
Loading model: gradient_boosting from ../Gradient Boosting/gradient_boosting_model.pkl
Loading model: svr from ../Svr/svr_model.pkl


In [19]:
# Predict with each and average
defs_ = {}
preds = []
for idx, model in enumerate(ensemble_models):
    preds.append(model.predict(X_test))

In [20]:
# Stack predictions: shape (n_models, n_samples)
preds_array = np.vstack(preds)
# Average across models
y_ensemble_pred = np.mean(preds_array, axis=0)

In [21]:
# Evaluate ensemble
mse_ens = mean_squared_error(y_test, y_ensemble_pred)
r2_ens = r2_score(y_test, y_ensemble_pred)
print(f"Ensemble -- MSE: {mse_ens:.3f}, R2: {r2_ens:.3f}")

Ensemble -- MSE: 53.303, R2: 0.773


In [22]:
# Save ensemble predictions to file
np.savetxt('ensemble_predictions.csv', y_ensemble_pred, delimiter=',', header='Class_pred', comments='')