In [None]:
## Input features: Temperature, Pressure, H2 to N2 ratio, Flow rate
import numpy as np
import pandas as pd
import itertools
from imblearn.over_sampling import SMOTE, BorderlineSMOTE, SVMSMOTE, SMOTENC, RandomOverSampler
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
from keras.optimizers import Adam
import json

data = np.array([[320, 0, 0.5, 100, 0.36],
[320, 20.49, 0.5, 100, 0.69],
[320, 39.18, 0.5, 100, 0.96],
[320, 59.59, 0.5, 100, 1.21],
[320, 79.93, 0.5, 100, 1.43],
[320, 0, 1, 100, 0.58],
[320, 10.24, 1, 100, 0.79],
[320, 20.11, 1, 100, 0.96],
[320, 39.33, 1, 100, 1.23],
[320, 59.44, 1, 100, 1.44],
[320, 75.44, 1, 100, 1.55],
[320, 79.85, 1, 100, 1.57],
[320, 0, 2, 100, 0.60],
[320, 10.09, 2, 100, 0.78],
[320, 20.41, 2, 100, 0.94],
[320, 39.18, 2, 100, 1.15],
[320, 59.51, 2, 100, 1.29],
[320, 75.59, 2, 100, 1.34],
[320, 80, 2, 100, 1.34],
[320, 0, 3, 100, 0.61],
[320, 10.32, 3, 100, 0.71],
[320, 20.56, 3, 100, 0.79],
[320, 39.18, 3, 100, 0.91],
[320, 59.44, 3, 100, 1],
[320, 79.85, 3, 100, 1.03],
[320, 0, 5, 100, 0.51],
[320, 10.32, 5, 100, 0.56],
[320, 20.49, 5, 100, 0.59],
[320, 39.33, 5, 100, 0.65],
[320, 59.66, 5, 100, 0.67],
[320, 79.85, 5, 100, 0.68],
[280.50, 80, 1, 100, 0.56],
[321.99, 80, 1, 100, 1.54],
[342.11, 80, 1, 100, 1.59],
[362.24, 80, 1, 100, 1.51],
[401.24, 80, 1, 100, 1.24],
[280.25, 80, 1, 200, 0.31],
[301.37, 80, 1, 200, 0.76],
[321.74, 80, 1, 200, 1.13],
[342.11, 80, 1, 200, 1.33],
[361.74, 80, 1, 200, 1.35],
[381.37, 80, 1, 200, 1.26],
[401.24, 80, 1, 200, 1.09],
[280.50, 80, 1, 250, 0.27],
[321.49, 80, 1, 250, 1.00],
[340.12, 80, 1, 250, 1.17],
[361.24, 80, 1, 250, 1.22],
[400.99, 80, 1, 250, 1.03],
[280.25, 80, 1, 50, 1.21],
[321.99, 80, 1, 50, 1.85],
[360.25, 80, 1, 50, 1.60],
[399.25, 80, 1, 50, 1.32],
[340.62, 80, 1, 50, 1.72]])

my_column_names = ['Temperature(C)','Pressure(psig)','H2 to N2 ratio','Flow rate(ml/g)','Ammonia Concentration(%)']
data = pd.DataFrame(data=data, columns=my_column_names)
dataVariableNames = data.columns.tolist()
feaColums = dataVariableNames[1:5]
labels = dataVariableNames[5:]

dataDel = data.drop_duplicates(subset=feaColums,keep='first',inplace=False)
#print(data2Del)

ind_list = [i for i in range(dataDel.shape[0])]
#print(ind_list)

ind_set = list(itertools.combinations(ind_list,3))
#print(ind_set)\

model_smote = SMOTE(k_neighbors=1,random_state=0)

data_smote_all = []
ind_smote_all = []
ind_smote = np.zeros(dataDel.shape[0]-5)
#print(ind_smote)

ind_smote[:2] = 1
#print(ind_smote[:2])
smote_data = []
i = 0

for item in ind_set:
    ind_ = list(item)
    y_smote = np.zeros(dataDel.shape[0])
    y_smote[ind_] = 1
    i +=1
    data_smote_resampled, y_smote_resampled = model_smote.fit_resample(dataDel, y_smote)
    smote_data.append(data_smote_resampled)


# Convert the list to a NumPy array
smote_data = np.array(smote_data)
smote_data = smote_data.reshape(-1, 5)
print(smote_data.shape)

# Convert the array to a list of tuples to preserve the 2D structure
smote_data_tuples = [tuple(row) for row in smote_data]

# Storing unique data points
unique_tuples, unique_indices = np.unique(smote_data_tuples, axis=0, return_index=True)

# Convert the unique tuples back to a NumPy array
unique_data_array = np.array(unique_tuples)
print(unique_data_array.shape)

# Taking random 23000 unique data points
np.random.seed(42)
random_unique_indices = np.random.choice(unique_data_array.shape[0], size=23000, replace=False)
unique_data_random = unique_data_array[random_unique_indices, :]

# Concatenate unique and original experimental data
smote_data_array = np.vstack((unique_data_random, data))
print(smote_data_array.shape)
smote_data_feature = smote_data_array[:, :4]
smote_data_label = smote_data_array[:, 4]

(2342600, 5)
(69415, 5)
(23053, 5)


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

# Split the data into training (80%) and validation (20%) sets
X_train, X_val, y_train, y_val = train_test_split(smote_data_feature, smote_data_label, test_size=0.2, random_state=42)

# Define the hyperparameter grid to search
param_grid = {'C': [0.1, 1, 10], 'gamma': ['scale', 'auto'], 'kernel': ['linear', 'rbf']}


# SVR Model fitting and Prediction
Model = SVR(kernel = 'poly', degree = 3, gamma = 'scale', C = 10)
Model.fit(X_train, y_train)
y_pred = Model.predict(X_val)
Results = pd.DataFrame({'Actual Ammonia Concentration(%)': y_val, 'Predicted Ammonia Concentration(%)': y_pred})
print(Results)


# Performance Check
MAE = mean_absolute_error(y_val, y_pred)
MSE = mean_squared_error(y_val, y_pred)
RMSE = np.sqrt(MSE)

print(f'Mean Absoulte Error: {MAE:.2f}')
print(f'Mean Squared Error: {MSE:.2f}')
print(f'Root Mean Squared Error: {RMSE:.2f}')

# Calculate R2 score
R2 = r2_score(y_val, y_pred)

print(f'R2 Score: {R2:.4f}')

      Actual Ammonia Concentration(%)  Predicted Ammonia Concentration(%)
0                            1.369018                            1.208284
1                            1.070982                            1.097445
2                            1.222548                            1.487110
3                            0.922162                            1.027003
4                            0.743452                            0.670877
...                               ...                                 ...
4606                         0.924376                            1.042939
4607                         1.263303                            1.072268
4608                         0.792360                            0.756160
4609                         1.129250                            0.963741
4610                         1.408298                            1.267889

[4611 rows x 2 columns]
Mean Absoulte Error: 0.13
Mean Squared Error: 0.03
Root Mean Squared Error: 0.17
R2 Sco

In [None]:
# Calculate the average prediction accuracy
absolute_differences = np.abs(y_val - y_pred) / y_val
prediction_accuracy = 1 - absolute_differences
average_prediction_accuracy = np.mean(prediction_accuracy)
print(f"Average Prediction Accuracy: {average_prediction_accuracy}")

Average Prediction Accuracy: 0.8421881605334594


In [None]:
import joblib

# Save the model to an .h5 file using joblib
model_filename = 'svr_regression_model.h5'
joblib.dump(Model, model_filename)
print(f"Model saved to {model_filename}")

In [None]:
# Load the model
loaded_model = joblib.load('svr_regression_model.h5')