In [57]:
# Importing important libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [58]:
# Importing cleaned data 
dataset=pd.read_csv("E:\\ML\\AM_dataset\\ML_AM\\training_models\\cleaned_dataset_updated.csv")

In [59]:
dataset.head()

Unnamed: 0,Material,Laser power (W),Laser speed (mm/s),Layer thickness (um),Hatch spacing (um),Beam size (um),Scanning strategy,Linear energy density (J/m),Volumetric energy density (J/mm3),Yield strength (MPa),Ultimate tensile strength (MPa),Enlongation (%)
0,0,250,700,50,120.0,80,0,357.142857,59.52381,668.0,1011.0,22.0
1,0,250,700,50,120.0,80,0,357.142857,59.52381,875.0,1153.0,17.0
2,0,250,700,50,120.0,80,0,357.142857,59.52381,645.0,1025.0,38.0
3,0,250,700,50,120.0,80,0,357.142857,59.52381,1145.0,1376.0,19.0
4,0,950,320,100,500.0,100,0,2968.75,59.375,531.0,866.0,21.0


In [60]:
dataset.shape

(229, 12)

In [61]:
# Splitting the dataset into train and test data
input_features=dataset.iloc[:,:-3]
X=input_features
target_features=dataset.iloc[:,-3:]
y=target_features

In [62]:
# Importing library for test train split data
from sklearn.model_selection import train_test_split

# Importing library for scaling the input features
from sklearn.preprocessing import StandardScaler 

In [63]:
# train test split data
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [64]:
# Printing the shape of the test and train dataset
print("Training data shape is: ", X_train.shape,y_train.shape)
print("Testing data shape is: ",X_test.shape,y_test.shape)

Training data shape is:  (183, 9) (183, 3)
Testing data shape is:  (46, 9) (46, 3)


In [65]:
# Applying standard scaler on the input features 
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)

In [66]:
# Saving the pickle file for further predictions
import pickle

# Save the trained model to a file
model_filename = 'scalling_new.pkl'
with open(model_filename, 'wb') as model_file:
    pickle.dump(scaler, model_file)

print(f'Scalling saved to {model_filename}')

Scalling saved to scalling_new.pkl


Our dataset containing all output features are numerical features. So, we have to apply regression based 
machine learning algorithms

In [26]:
# Model Training 

In [27]:

# Importing libraries for model_training
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import SVR
# Model performance 
from sklearn.metrics import mean_squared_error, mean_squared_log_error, r2_score

Decision Tree Regression

In [28]:
# Initialize the DecisionTreeRegressor
dt_regressor = DecisionTreeRegressor(random_state=42)

# Define the hyperparameter grid
param_grid = {
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=dt_regressor, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)

# Fit the model with the training dataset
grid_search.fit(X_train_scaled, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_

# Initialize the DecisionTreeRegressor with the best hyperparameters
best_dt_regressor = DecisionTreeRegressor(random_state=42, **best_params)

# Train the model with the training dataset
best_dt_regressor.fit(X_train_scaled, y_train)

# Make predictions on the testing dataset
y_predict_best = best_dt_regressor.predict(X_test_scaled)

# Evaluate the model performance using MSE and r2_Score
mse_best_dt = mean_squared_error(y_test, y_predict_best)
rmse_best_dt = np.sqrt(mse_best_dt)
r2_best_dt = r2_score(y_test, y_predict_best)

# Display the results
print("Best hyperparameters:", best_params)
print("MSE for Decision Tree Regressor after hyperparameter tuning is: ", mse_best_dt)
print("RMSE for Decision Tree Regressor after hyperparameter tuning is: ", rmse_best_dt)
print("r2_score for Decision Tree Regressor after hyperparameter tuning is: ", r2_best_dt)


Best hyperparameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
MSE for Decision Tree Regressor after hyperparameter tuning is:  14562.451569209234
RMSE for Decision Tree Regressor after hyperparameter tuning is:  120.67498319539652
r2_score for Decision Tree Regressor after hyperparameter tuning is:  0.8282821433533011


Random Forest Regression

In [29]:
# Initialize the RandomForestRegressor
rf_regressor = RandomForestRegressor(random_state=42)

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=rf_regressor, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)

# Fit the model with the training dataset
grid_search.fit(X_train_scaled, y_train)

# Get the best hyperparameters
best_params_rf = grid_search.best_params_

# Initialize the RandomForestRegressor with the best hyperparameters
best_rf_regressor = RandomForestRegressor(random_state=42, **best_params_rf)

# Train the model with the training dataset
best_rf_regressor.fit(X_train_scaled, y_train)

# Make predictions on the testing dataset
y_predict_rf = best_rf_regressor.predict(X_test_scaled)

# Evaluate the model performance using MSE, RMSE, and R2 Score
mse_rf = mean_squared_error(y_test, y_predict_rf)
rmse_rf = np.sqrt(mse_rf)
r2_rf = r2_score(y_test, y_predict_rf)

# Display the results
print("Best hyperparameters for Random Forest Regressor:", best_params_rf)
print("Mean Squared Error (MSE) for Random Forest Regressor:", mse_rf)
print("Root Mean Squared Error (RMSE) for Random Forest Regressor:", rmse_rf)
print("R2 Score for Random Forest Regressor:", r2_rf)


Best hyperparameters for Random Forest Regressor: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Mean Squared Error (MSE) for Random Forest Regressor: 11926.391461694162
Root Mean Squared Error (RMSE) for Random Forest Regressor: 109.20801921880171
R2 Score for Random Forest Regressor: 0.8502092833405938


XG Boost Regression

In [30]:
# Initialize the XGBRegressor
xgb_regressor = XGBRegressor(random_state=42)

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 0.9, 1],
    'colsample_bytree': [0.8, 0.9, 1],
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=xgb_regressor, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)

# Fit the model with the training dataset
grid_search.fit(X_train_scaled, y_train)

# Get the best hyperparameters
best_params_xgb = grid_search.best_params_

# Initialize the XGBRegressor with the best hyperparameters
best_xgb_regressor = XGBRegressor(random_state=42, **best_params_xgb)

# Train the model with the training dataset
best_xgb_regressor.fit(X_train_scaled, y_train)

# Make predictions on the testing dataset
y_predict_xgb = best_xgb_regressor.predict(X_test_scaled)

# Evaluate the model performance using MSE, RMSE, and R2 Score
mse_xgb = mean_squared_error(y_test, y_predict_xgb)
rmse_xgb = np.sqrt(mse_xgb)
r2_xgb = r2_score(y_test, y_predict_xgb)

# Display the results
print("Best hyperparameters for XGBoost Regressor:", best_params_xgb)
print("Mean Squared Error (MSE) for XGBoost Regressor:", mse_xgb)
print("Root Mean Squared Error (RMSE) for XGBoost Regressor:", rmse_xgb)
print("R2 Score for XGBoost Regressor:", r2_xgb)


Best hyperparameters for XGBoost Regressor: {'colsample_bytree': 1, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 150, 'subsample': 0.8}
Mean Squared Error (MSE) for XGBoost Regressor: 11403.096788662757
Root Mean Squared Error (RMSE) for XGBoost Regressor: 106.78528357719877
R2 Score for XGBoost Regressor: 0.8378836161943521


K-Nearest Neighbour

In [31]:
# Initialize the KNeighborsRegressor
knn_regressor = KNeighborsRegressor()

# Define the hyperparameter grid
param_grid = {
    'n_neighbors': [3, 5, 7, 9],  
    'weights': ['uniform', 'distance'], 
    'p': [1, 2]  # 1 for Manhattan distance, 2 for Euclidean distance
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=knn_regressor, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)

# Fit the model with the training dataset
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params_knn = grid_search.best_params_

# Initialize the KNeighborsRegressor with the best hyperparameters
best_knn_regressor = KNeighborsRegressor(**best_params_knn)

# Training the model with the training data
best_knn_regressor.fit(X_train, y_train)

# Predictions on the testing data
y_predict_knn = best_knn_regressor.predict(X_test)

# Evaluate the model performance using MSE, RMSE, and R2 Score
mse_knn = mean_squared_error(y_test, y_predict_knn)
rmse_knn = np.sqrt(mse_knn)
r2_knn = r2_score(y_test, y_predict_knn)

# Display the results
print("Best hyperparameters for K-Nearest Neighbors:", best_params_knn)
print("Mean Squared Error (MSE) for K-Nearest Neighbors:", mse_knn)
print("Root Mean Squared Error (RMSE) for K-Nearest Neighbors:", rmse_knn)
print("R2 Score for K-Nearest Neighbors:", r2_knn)


Best hyperparameters for K-Nearest Neighbors: {'n_neighbors': 3, 'p': 1, 'weights': 'distance'}
Mean Squared Error (MSE) for K-Nearest Neighbors: 5836.01671158614
Root Mean Squared Error (RMSE) for K-Nearest Neighbors: 76.39382639707308
R2 Score for K-Nearest Neighbors: 0.8894509712261511


Support Vector Machine 

In [16]:
# Initialize the Support Vector Machine Regressor (SVR)
svm_regressor = SVR()

# Define the hyperparameter grid
param_grid = {
    'estimator__kernel': ['rbf', 'poly'], 
    'estimator__C': [0.1, 1, 10],  # Regularization parameter
    'estimator__epsilon': [0.1, 0.2, 0.5],
}

# Wrap SVR in MultiOutputRegressor
multioutput_svr = MultiOutputRegressor(svm_regressor)

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=multioutput_svr, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)

# Fit the model with the training dataset
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params_svm = grid_search.best_params_

# Initialize the MultiOutputRegressor with the best hyperparameters
best_multioutput_svr = MultiOutputRegressor(SVR(**best_params_svm['estimator']))

# Train the model with the training dataset
best_multioutput_svr.fit(X_train, y_train)

# Make predictions on the testing dataset
y_predict_svm = best_multioutput_svr.predict(X_test)

# Evaluate the model performance using MSE, RMSE, and R2 Score for each output
mse_svm = mean_squared_error(y_test, y_predict_svm)
rmse_svm = np.sqrt(mse_svm)
r2_svm = r2_score(y_test, y_predict_svm, multioutput='raw_values')

# Display the results
print("Best hyperparameters for Support Vector Machine:", best_params_svm)
print("Mean Squared Error (MSE) for Support Vector Machine:", mse_svm)
print("Root Mean Squared Error (RMSE) for Support Vector Machine:", rmse_svm)
print("R2 Score for Support Vector Machine:", r2_svm)


KeyboardInterrupt: 

In [None]:
# finding the best model


In [32]:
# Model performance metrics based on the results
decision_tree_metrics = {'mse': mse_best_dt, 'r2_score': r2_best_dt}
random_forest_metrics = {'mse': mse_rf, 'r2_score': r2_rf}
xgboost_metrics = {'mse': mse_xgb, 'r2_score': r2_xgb}
knn_metrics = {'mse': mse_knn, 'r2_score': r2_knn}

# MSE for all models
best_model_mse = min([decision_tree_metrics['mse'], random_forest_metrics['mse'], xgboost_metrics['mse'], knn_metrics['mse']])
best_model_name_mse = {
    decision_tree_metrics['mse']: 'Decision Tree',
    random_forest_metrics['mse']: 'Random Forest',
    xgboost_metrics['mse']: 'XGBoost',
    knn_metrics['mse']: 'KNN'
}[best_model_mse]

# R2 Score for all models
best_model_r2 = max([decision_tree_metrics['r2_score'], random_forest_metrics['r2_score'], xgboost_metrics['r2_score'], knn_metrics['r2_score']])
best_model_name_r2 = {
    decision_tree_metrics['r2_score']: 'Decision Tree',
    random_forest_metrics['r2_score']: 'Random Forest',
    xgboost_metrics['r2_score']: 'XGBoost',
    knn_metrics['r2_score']: 'KNN'
}[best_model_r2]

# Best model results
print(f"Best model based on MSE: {best_model_name_mse}")
print(f"Best model based on R2 Score: {best_model_name_r2}")


Best model based on MSE: KNN
Best model based on R2 Score: KNN


In [46]:
# Saving the pickle file for further predictions
import pickle

# Save the trained model to a file
model_filename = 'knn_model.pkl'
with open(model_filename, 'wb') as model_file:
    pickle.dump(best_knn_regressor, model_file)

print(f'Trained Random Forest model saved to {model_filename}')

Trained Random Forest model saved to knn_model.pkl


In [52]:

# Loading the pickle file
file_path = 'E:\\ML\\AM_dataset\\ML_AM\\knn_model.pkl'

with open(file_path, 'rb') as file:
    loaded_knn_model = pickle.load(file)


In [53]:
y_test.head()

Unnamed: 0,Yield strength (MPa),Ultimate tensile strength (MPa),Enlongation (%)
216,279.8,309.6,2.0
66,510.16,647.326,41.73
9,626.0,857.0,29.0
192,974.0,1058.0,12.3
15,1207.0,1471.0,18.9


In [56]:
loaded_knn_model.predict(X_test)

array([[ 249.93333333,  318.83333333,    2.86666667],
       [ 432.086     ,  548.663     ,   64.271     ],
       [ 751.66666667, 1038.        ,   26.66666667],
       [ 927.33333333, 1013.66666667,   13.2       ],
       [1220.33333333, 1435.        ,   22.16666667],
       [ 843.        ,  951.        ,   12.66666667],
       [1220.33333333, 1435.        ,   22.16666667],
       [ 843.        ,  951.        ,   12.66666667],
       [ 911.        , 1014.33333333,   11.15666667],
       [ 971.58039233, 1016.0580968 ,    3.51568436],
       [ 249.93333333,  318.83333333,    2.86666667],
       [1220.33333333, 1435.        ,   22.16666667],
       [ 272.3       ,  447.7       ,   17.4       ],
       [ 993.        , 1122.66666667,    8.7       ],
       [ 116.        ,  173.66666667,   31.83333333],
       [1179.4350635 , 1237.19183614,    7.20615961],
       [ 960.        , 1020.        ,    3.9       ],
       [ 833.33333333, 1386.66666667,   21.46666667],
       [ 116.        ,  173.