# Ensemble Learning

In [11]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.metrics import mean_squared_error

In [12]:
# Load your dataset
file_path = 'Shuff_Data_11.xlsx'
data = pd.read_excel(file_path)

# Handle missing values
data['obstacles'] = data['obstacles'].replace('x', '0;0')

# Split 'obstacles' into two separate columns
data[['Obstacle Start', 'Obstacle End']] = data['obstacles'].str.split(';', expand=True).astype(int)

# Convert 'sensors_position' to lists of integers
data['sensors_position'] = data['sensors_position'].apply(lambda x: list(map(int, x.split())))

# Drop the original 'obstacles' column
data = data.drop(columns=['obstacles'])

# Normalize input features
scaler = StandardScaler()
features = data[['n', 'do', 'alfa', 'Mmin', 'Obstacle Start', 'Obstacle End']]
features_scaled = scaler.fit_transform(features)

# Prepare the output with padding to the maximum length
max_length = max(data['sensors_position'].apply(len))
sensors_position_padded = np.array([np.pad(seq, (0, max_length - len(seq)), 'constant') for seq in data['sensors_position']])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features_scaled, sensors_position_padded, test_size=0.25, random_state=42)


In [13]:
# Decision Tree model
decision_tree = MultiOutputRegressor(DecisionTreeRegressor(random_state=42))

# Ridge Regression model with best parameters
ridge = MultiOutputRegressor(Ridge(alpha=0.01, max_iter=None, solver='saga', tol=0.0001, random_state=42))

# Polynomial Regression model
degree = 2
poly = PolynomialFeatures(degree=degree)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
polynomial = MultiOutputRegressor(LinearRegression())

In [15]:
# Train each base model separately
decision_tree.fit(X_train, y_train)
ridge.fit(X_train, y_train)
polynomial.fit(X_train_poly, y_train)

# Get predictions from each model
decision_tree_preds = decision_tree.predict(X_test)
ridge_preds = ridge.predict(X_test)
polynomial_preds = polynomial.predict(X_test_poly)

# Stack predictions into a new feature matrix
stacked_predictions = np.hstack((decision_tree_preds, ridge_preds, polynomial_preds))

# Train a meta-model on these predictions
meta_model = MultiOutputRegressor(LinearRegression())
meta_model.fit(stacked_predictions, y_test)

# Use the meta-model for final predictions
final_predictions = meta_model.predict(stacked_predictions)

# Evaluate the final stacked model
mse = mean_squared_error(y_test, final_predictions)
print(f"\nFinal Stacked Model Mean Squared Error: {mse}")

# Display detailed predicted values and true values
for i in range(len(final_predictions)):
    print(f"Predicted values for sample {i + 1}: {final_predictions[i]}")
    print(f"True values for sample {i + 1}: {y_test[i]}")
    print("-----")



Final Stacked Model Mean Squared Error: 3.45245878037905
Predicted values for sample 1: [12.84532919 17.50362338  8.0057861  10.25581381  1.97703827 -2.68367345
 -0.95864917 -1.21924983  1.10201848  0.68653615  1.01598666 -0.15525472
 -0.31988754  0.          0.          0.        ]
True values for sample 1: [13 18  8 14  0  0  0  0  0  0  0  0  0  0  0  0]
-----
Predicted values for sample 2: [ 6.75209464e+00  1.10246749e+01  6.43596667e+00  1.16035358e+01
 -3.49495754e+00  2.63465403e-01 -5.43734044e-04 -1.95170815e-01
  1.40270624e+00  3.69606060e-01  1.27064627e+00  1.41373728e+00
 -9.22593897e-01  0.00000000e+00  0.00000000e+00  0.00000000e+00]
True values for sample 2: [ 7 10  6 11  0  0  0  0  0  0  0  0  0  0  0  0]
-----
Predicted values for sample 3: [20.97048979 15.28534942 21.29029886 18.56155426 16.23004389  0.88688655
  4.57194781 -1.36392791  0.79043543  0.21950599  1.33015835  1.90573832
 -0.04287731  0.          0.          0.        ]
True values for sample 3: [21 15

In [17]:
from sklearn.metrics import mean_absolute_error, r2_score

# Assuming y_test contains the true values and y_stacked_pred contains the predictions from your stacked models
mse = mean_squared_error(y_test, final_predictions)
mae = mean_absolute_error(y_test, final_predictions)
r2 = r2_score(y_test, final_predictions)

print(f"\nFinal Stacked Model Mean Squared Error (MSE): {mse}")
print(f"Final Stacked Model Mean Absolute Error (MAE): {mae}")
print(f"Final Stacked Model R^2 Score: {r2}")



Final Stacked Model Mean Squared Error (MSE): 3.45245878037905
Final Stacked Model Mean Absolute Error (MAE): 1.019923098777557
Final Stacked Model R^2 Score: 0.9199690732915405


## Combined

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.metrics import mean_squared_error

# Load your dataset
file_path = 'Shuff_Data_11.xlsx'
data = pd.read_excel(file_path)

# Handle missing values
data['obstacles'] = data['obstacles'].replace('x', '0;0')

# Split 'obstacles' into two separate columns
data[['Obstacle Start', 'Obstacle End']] = data['obstacles'].str.split(';', expand=True).astype(int)

# Convert 'sensors_position' to lists of integers
data['sensors_position'] = data['sensors_position'].apply(lambda x: list(map(int, x.split())))

# Drop the original 'obstacles' column
data = data.drop(columns=['obstacles'])

# Normalize input features
scaler = StandardScaler()
features = data[['n', 'do', 'alfa', 'Mmin', 'Obstacle Start', 'Obstacle End']]
features_scaled = scaler.fit_transform(features)

# Prepare the output with padding to the maximum length
max_length = max(data['sensors_position'].apply(len))
sensors_position_padded = np.array([np.pad(seq, (0, max_length - len(seq)), 'constant') for seq in data['sensors_position']])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features_scaled, sensors_position_padded, test_size=0.25, random_state=42)

# Decision Tree model
decision_tree = MultiOutputRegressor(DecisionTreeRegressor(random_state=42))

# Ridge Regression model with best parameters
ridge = MultiOutputRegressor(Ridge(alpha=0.01, max_iter=None, solver='saga', tol=0.0001, random_state=42))

# Polynomial Regression model
degree = 2
poly = PolynomialFeatures(degree=degree)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
polynomial = MultiOutputRegressor(LinearRegression())
# Train each base model separately
decision_tree.fit(X_train, y_train)
ridge.fit(X_train, y_train)
polynomial.fit(X_train_poly, y_train)

# Get predictions from each model
decision_tree_preds = decision_tree.predict(X_test)
ridge_preds = ridge.predict(X_test)
polynomial_preds = polynomial.predict(X_test_poly)

# Stack predictions into a new feature matrix
stacked_predictions = np.hstack((decision_tree_preds, ridge_preds, polynomial_preds))

# Train a meta-model on these predictions
meta_model = MultiOutputRegressor(LinearRegression())
meta_model.fit(stacked_predictions, y_test)

# Use the meta-model for final predictions
final_predictions = meta_model.predict(stacked_predictions)

# Evaluate the final stacked model
mse = mean_squared_error(y_test, final_predictions)
print(f"\nFinal Stacked Model Mean Squared Error: {mse}")

# Display detailed predicted values and true values
for i in range(len(final_predictions)):
    print(f"Predicted values for sample {i + 1}: {final_predictions[i]}")
    print(f"True values for sample {i + 1}: {y_test[i]}")
    print("-----")

from sklearn.metrics import mean_absolute_error, r2_score

# Assuming y_test contains the true values and y_stacked_pred contains the predictions from your stacked models
mse = mean_squared_error(y_test, final_predictions)
mae = mean_absolute_error(y_test, final_predictions)
r2 = r2_score(y_test, final_predictions)

print(f"\nFinal Stacked Model Mean Squared Error (MSE): {mse}")
print(f"Final Stacked Model Mean Absolute Error (MAE): {mae}")
print(f"Final Stacked Model R^2 Score: {r2}")
