In [1]:
#Xgboost Model


import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score



df = pd.read_csv("Corrosion dataset7.0.csv")


features = ["Si", "Mn", "P", "S", "Cu", "Cr", "Ni", "T_AVE", "RH_MIN",  "SUNSHINE", "PRECIPIT",  "CHLORIDE", "SO2", "TIME"]


target = "Corrosion rate"



X = df[features]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)




scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



xgb_model = xgb.XGBRegressor(
    n_estimators=300,  
    max_depth=10,  
    learning_rate=0.20,  
    subsample=0.8, 
    colsample_bytree=0.8,  
    random_state=42
)

xgb_model.fit(X_train, y_train)



y_pred = xgb_model.predict(X_test)



mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)


print(f"Mean Absolute Error: {mae:.4f}")
print(f"Mean Squared Error: {mse:.4f}")
print(f"Root Mean Squared Error: {rmse:.4f}")
print(f"R-Squared Score: {r2:.4f}")




Mean Absolute Error: 0.0100
Mean Squared Error: 0.0003
Root Mean Squared Error: 0.0168
R-Squared Score: 0.7666


In [2]:
#Cat Boost Model


import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from catboost import CatBoostRegressor


df = pd.read_csv("Corrosion dataset7.0.csv")



features = ["Si", "Mn", "P", "S", "Cu", "Cr", "Ni", "T_AVE", "RH_MIN", "SUNSHINE", "PRECIPIT", "CHLORIDE", "SO2", "TIME"]


target = "Corrosion rate"


X = df[features]
y = df[target]



X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



model = CatBoostRegressor(
    iterations=1000,
    learning_rate=0.10,
    depth=10,
    loss_function='RMSE',
    verbose=100,
    random_seed=42
)


model.fit(X_train, y_train)



y_pred = model.predict(X_test)



mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)



print(f"Mean Absolute Error: {mae:.4f}")
print(f"Mean Squared Error: {mse:.4f}")
print(f"Root Mean Squared Error: {rmse:.4f}")
print(f"R² Score on Test Data: {r2:.4f}")


0:	learn: 0.0344835	total: 58.3ms	remaining: 58.2s
100:	learn: 0.0033205	total: 142ms	remaining: 1.26s
200:	learn: 0.0008752	total: 221ms	remaining: 878ms
300:	learn: 0.0003321	total: 302ms	remaining: 701ms
400:	learn: 0.0001459	total: 381ms	remaining: 569ms
500:	learn: 0.0000612	total: 460ms	remaining: 458ms
600:	learn: 0.0000257	total: 540ms	remaining: 358ms
700:	learn: 0.0000113	total: 618ms	remaining: 263ms
800:	learn: 0.0000048	total: 698ms	remaining: 173ms
900:	learn: 0.0000021	total: 780ms	remaining: 85.7ms
999:	learn: 0.0000010	total: 859ms	remaining: 0us
Mean Absolute Error: 0.0103
Mean Squared Error: 0.0003
Root Mean Squared Error: 0.0171
R² Score on Test Data: 0.7189


In [3]:
#Decision Tree Model


import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score



df = pd.read_csv("Corrosion dataset7.0.csv")



features = ["Si", "Mn", "P", "S", "Cu", "Cr", "Ni", "T_AVE", "RH_MIN",  "SUNSHINE", "PRECIPIT",  "CHLORIDE", "SO2", "TIME"]


target = "Corrosion rate"



X = df[features]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=50)




dt_model = DecisionTreeRegressor(max_depth=4, random_state=50)
dt_model.fit(X_train, y_train)



y_pred = dt_model.predict(X_test)



mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)



print(f"Mean Absolute Error: {mae:.4f}")
print(f"Mean Squared Error: {mse:.4f}")
print(f"Root Mean Squared Error: {rmse:.4f}")
print(f"R-Squared Score: {r2:.4f}")



Mean Absolute Error: 0.0110
Mean Squared Error: 0.0002
Root Mean Squared Error: 0.0148
R-Squared Score: 0.6546


In [4]:
#Gradient Boost Model


from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import pandas as pd



df = pd.read_csv("Corrosion dataset7.0.csv")


features = ["Si", "Mn", "P", "S", "Cu", "Cr", "Ni", "T_AVE", "RH_MIN", "SUNSHINE", "PRECIPIT", "CHLORIDE", "SO2", "TIME"]

target = "Corrosion rate"


X = df[features]
y = df[target]



X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



gbr_model = GradientBoostingRegressor(n_estimators=200, learning_rate=0.05, max_depth=5, random_state=42)
gbr_model.fit(X_train_scaled, y_train)
y_pred_gbr = gbr_model.predict(X_test_scaled)



mae = mean_absolute_error(y_test, y_pred_gbr)
mse = mean_squared_error(y_test, y_pred_gbr)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred_gbr)



print("\nGradient Boosting Regressor")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R²: {r2:.4f}")



Gradient Boosting Regressor
MAE: 0.0120
MSE: 0.0004
RMSE: 0.0202
R²: 0.6065


In [6]:
#Gradient Boosting with GridSearchCV


import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score



df = pd.read_csv("Corrosion dataset7.0.csv")


features = ["Si", "Mn", "P", "S", "Cu", "Cr", "Ni", "T_AVE", "RH_MIN", "SUNSHINE", "PRECIPIT", "CHLORIDE", "SO2", "TIME"]

target = "Corrosion rate"


X = df[features]
y = df[target]



X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, random_state=50
)



param_grid = {
    'n_estimators': [100, 300, 500],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 5, 7],
    'subsample': [0.8, 0.9, 1.0]
}



grid = GridSearchCV(
    estimator=GradientBoostingRegressor(random_state=50),
    param_grid=param_grid,
    scoring='r2',
    cv=5,
    n_jobs=-1,
    verbose=1
)


grid.fit(X_train, y_train)



print("Best Parameters:", grid.best_params_)
print("Best Cross-Validated R² Score:", round(grid.best_score_, 4))


best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)



mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)



print("\nEvaluation on Test Data:")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R²: {r2:.4f}")


Fitting 5 folds for each of 81 candidates, totalling 405 fits
Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 300, 'subsample': 0.9}
Best Cross-Validated R² Score: 0.6661

Evaluation on Test Data:
MAE: 0.0038
MSE: 0.0001
RMSE: 0.0071
R²: 0.9204


In [7]:
#KNeighbors Regressor Model


import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score



df = pd.read_csv("Corrosion dataset7.0.csv")


features = ["Si", "Mn", "P", "S", "Cu", "Cr", "Ni", "T_AVE", "RH_MIN", "SUNSHINE", "PRECIPIT", "CHLORIDE", "SO2", "TIME"]


target = "Corrosion rate"


X = df[features]
y = df[target]



X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=50)



scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



knn_model = KNeighborsRegressor(n_neighbors=4)
knn_model.fit(X_train_scaled, y_train)



y_pred_knn = knn_model.predict(X_test_scaled)



mae = mean_absolute_error(y_test, y_pred_knn)
mse = mean_squared_error(y_test, y_pred_knn)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred_knn)



print("\nK-Nearest Neighbors Regressor")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R²: {r2:.4f}")



K-Nearest Neighbors Regressor
MAE: 0.0077
MSE: 0.0002
RMSE: 0.0151
R²: 0.6379


In [10]:
#Ridge Regression Model


from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import pandas as pd
import numpy as np



df = pd.read_csv("Corrosion dataset7.0.csv")


features = ["Si", "Mn", "P", "S", "Cu", "Cr", "Ni", "T_AVE", "RH_MIN", "SUNSHINE", "PRECIPIT", "CHLORIDE", "SO2", "TIME"]

target = "Corrosion rate"


X = df[features]
y = df[target]



X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=46)



ridge = Ridge(alpha=0.1)  
ridge.fit(X_train, y_train)



y_pred = ridge.predict(X_test)



mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)



print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-Squared Score (R²): {r2:.4f}")


Mean Absolute Error (MAE): 0.0130
Mean Squared Error (MSE): 0.0004
Root Mean Squared Error (RMSE): 0.0191
R-Squared Score (R²): 0.6219
