In [None]:
!pip install scikit-optimize


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from scipy.optimize import minimize
import numpy as np
import matplotlib.pyplot as plt
import joblib

In [None]:
df=pd.read_csv("converted_distillation_data.csv")

In [None]:
inputs=['Feed_Flow_Rate','Feed_Composition','Reflux_Ratio','Boil_Up_Ratio']
outputs=['Distillate_Purity','Bottoms_Purity','Reboiler_Duty','Condenser_Duty']

In [None]:
X=df[inputs]
y=df[outputs]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
rf_model = RandomForestRegressor(n_estimators=500, random_state=50)
rf_model.fit(X_train, y_train)

In [None]:
y_pred = rf_model.predict(X_test)

In [None]:
inputs = [[350, 2.5, 100, 2000]]
input_df = pd.DataFrame(inputs, columns=['Feed_Flow_Rate','Feed_Composition','Reflux_Ratio','Boil_Up_Ratio'])
ideal_values = [99.6, 0.04, 1250, 1120]
predicted_outputs = rf_model.predict(input_df)
print(predicted_outputs)

In [None]:
def objective_function(inputs):
    input_df = pd.DataFrame([inputs], columns=X.columns)
    predicted_outputs = rf_model.predict(input_df).flatten()
    error = sum((predicted_outputs - ideal_values) ** 2)
    return error

bounds = [
    (50, 100),
    (0, 50),
    (0.5, 10.0),
    (0.0,0.3)
]

initial_guess = [75, 5, 2.5, 0.1]

result = minimize(
    objective_function,
    x0=initial_guess,
    method='Powell',
    bounds=bounds
)

best_inputs = result.x
print("Optimized Inputs:", best_inputs)

In [None]:
joblib.dump(rf_model, "rf_model.pkl")

In [None]:
X_test.to_csv('X_test.csv', index=False)
y_test.to_csv('y_test.csv', index=False)


In [None]:
joblib.dump(y_test, 'y_test.pkl')

In [None]:
joblib.dump(X_test, 'X_test.pkl')