In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import sys
import random
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import math    #Various math expressions
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.optimize import fsolve
from IPython.display import display
%matplotlib inline

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# We begin the analysis by gathering Antoine equation data to create functions that compute the saturation 
#    pressure for n-heptane and n-octane. This data comes from Appendix H. Vapor Pressures of Himmelblau textbook 
#    with units of temperature in K and pressure in mmHg.
class species(object):
    pass


A = species()
A.name = 'ethanol'
A.Psat = lambda T: np.exp(18.5242 -  3578.91/(T +  (-50.5)))
A.Tsat = lambda P: fsolve(lambda T: A.Psat(T) - P,500)[0]


B = species()
B.name = 'water)'
B.Psat = lambda T: np.exp(18.036 -  3816.44/(T +  (-46.13)))
B.Tsat = lambda P: fsolve(lambda T: B.Psat(T) - P,500)[0]

A.Psat(350)
A.Tsat(141)

In [None]:
#Psat = dict()
#Psat['ethanol'] = lambda T: (np.exp(18.5242 -  3578.91/(T +  (-50.5))))/7.5006157584566/100  # T range (K): 270-369-->P (bar)
#Psat['water'] = lambda T: (np.exp(18.3036 -  3816.44/(T +  (-46.13))))/7.5006157584566/100    # T range (K): 284-441 --> P (bar)
#E = 'ethanol'
#W = 'water'
#feed_P = 1.01 # units: bar
#feed_T = 350 # units: K

In [None]:
Psat = dict()
# Define the Antoine constants for different temperature ranges
def Psat_ethanol(T):
    if T < 364:  # Adjust this range as needed
        # Constants for T range (K): 270-369-->P (bar)
        return (np.exp(18.5242 - 3578.91/(T + (-50.5)))) / 7.5006157584566 / 100
    elif 364 <= T < 514:
        # Constants for temperature between 300 and 513.91
        return (10**(4.92531 - 1432.526/(T + (-61.819))))  # from webbook website

Psat['ethanol'] = lambda T: Psat_ethanol(T)
Psat['water'] = lambda T: (np.exp(18.3036 -  3816.44/(T +  (-46.13))))/7.5006157584566/100    # T range (K): 284-441 --> P (bar)
E = 'ethanol'
W = 'water'
#feed_P = 1.01 # units: bar
#feed_T = 350 # units: K

In [None]:
# Compound information
eth_mw = 46.07   # units: kg/kmol
wtr_mw = 18.016  # units: kg/mol

# Column Information
feed = 600 # units: kmol/h
dist = 260 # units: kmol/h
botts = feed - dist # units: kmol/h
feed_P = 1.01 # units: bar
feed_T = 350 # units: K
T_liq = 780 # total liquid flowrate --> units: kmol/h
T_vap = 1040 # total vapour flowrate --> units: kmol/h
T_feed = T_liq + T_vap  # total feed flowrate in column --> units: kmol/h
RR = T_liq/dist  # The reflux ratio
psi = T_vap/T_feed  # fraction of vapourisation
q = 1 - psi   # The q-line value
RB = (T_vap-(1-q)*feed)/botts  # The boil-up ratio
temp_e_conc = 0.82076  # The taken temporary ethanol concentration in the distillate
alpha = Psat['ethanol'](feed_T)/Psat['water'](feed_T)  # Separation efficiency or relative volatility

names = ['Light substance','Heavy substance','Ethanol molecular weight','Water molecular weight',
         'Ethanol Vapour Pressure (bar)', 'Water Vapour Pressure (bar)', 'Feed Flow (kmol/h)', 'Distillate Flow (kmol/h)',
         'Bottoms Flow (kmol/h)','Total Column Liquid Flow (kmol/h)', 'Total Column Vapour Flow (kmol/h)',
         'Feed Temperature (K)','Column Operating Pressure (bar)', 'Vapourisation Fraction (\u03A8)',
         'Feed State/phase (q)', 'Reflux Ratio', 'Boil-up Ratio', 'Relative Volatility (\u03B1)',
         'Distillate Temporary Ethanol Conc.']
info_sep = ['Ethanol', 'Water', eth_mw, round(wtr_mw, 2), round(Psat['ethanol'](feed_T), 3), round(Psat['water'](feed_T), 3),
            feed, dist, botts, T_liq, T_vap, feed_T, feed_P, round(psi, 3), round(q, 3), RR, round(RB, 2),
           round(alpha, 2), temp_e_conc]

# Inputing calculated information in separate dataframe for improved readability
df_sep = pd.DataFrame(list(zip(names, info_sep)), columns = ['Variable', 'Value'])
display(df_sep)



In [None]:
# Using VL Equilibrium for binary mixtures and General Flash Equations
wtr_feed = (((1 - temp_e_conc)*(psi*(Psat['water'](feed_T)/feed_P - 1) + 1)/(Psat['water'](feed_T)/feed_P)) + 0.34)*feed
   # The flowrate of water in feed. To ensure a ~37:63 ethanol:water ratio, 0.34 is used --> units: kmol/h
eth_feed = feed - wtr_feed  # Ethanol flowrate in feed --> units: kmol/h

# Other flows
eth_dist = temp_e_conc*dist
wtr_dist = dist - eth_dist

eth_botts = eth_feed - eth_dist
wtr_botts = wtr_feed - wtr_dist

# Lists
ethanol = [round(eth_feed, 2), round(eth_dist, 2) , round(eth_botts, 4)]
water = [round(wtr_feed, 2), round(wtr_dist, 3), round(wtr_botts, 2)]
totals = [feed,dist,botts]
names = ['Feed','Distillate','Bottoms']

# Inputing calculated information in separate dataframe for improved readability
df_col = pd.DataFrame(list(zip(names, ethanol, water, totals)), columns = ['Flow Type', 'Ethanol', 'Water', 'Total'])
display(df_col)

In [None]:
# For 80% ethanol purity in distillate
# Could add a user input with a limit?

eth_d_pty = 0.805

n_eth_dist = eth_d_pty*dist
n_wtr_dist = dist - n_eth_dist

n_eth_botts = eth_feed - n_eth_dist
n_wtr_botts = wtr_feed - n_wtr_dist

# Lists
n_ethanol = [round(eth_feed, 2), round(n_eth_dist, 2) , round(n_eth_botts, 4)]
n_water = [round(wtr_feed, 2), round(n_wtr_dist, 3), round(n_wtr_botts, 2)]

# Inputing calculated information in separate dataframe for improved readability
df_n_col = pd.DataFrame(list(zip(names, n_ethanol, n_water, totals)), columns = ['Flow Type', 'Ethanol', 'Water', 'Total'])
display(df_n_col)


In [None]:
# Equations & useful info for McCabe-Thiele Construction
xD = n_eth_dist/dist
zF = eth_feed/feed
xB = n_eth_botts/botts
Eqcurve = lambda x: ((alpha*x)/(1 + x*(alpha - 1)))
TOL = lambda x: ((RR/(RR + 1))*x + (1/(RR + 1))*xD)
BOL = lambda y: (RB/(RB + 1))*y + (1/(RB + 1))*xB
BOL_2 = lambda x: ((RB + 1)/RB)*x - (1/(RB))*xB
xq = ((1/(RR + 1))*xD + (1/(q - 1))*zF)/((q/(q - 1)) - (RR/(RR + 1)))
yq = (q/(q - 1))*xq - (1/(q - 1))*zF  # q-line/ Feed line

yq_2 = lambda x: (q/(q - 1))*x - (1/(q - 1))*zF  # q-line/ Feed line extra use

# This is to generate random numbers between 0 & 1 for the McCabe-Thiele and then
#   ensure specific values in this generated list
def generate_numbers(num_values, specific_numbers=None):
    if specific_numbers is None:
        specific_numbers = []
    
    # Generate random numbers
    random_numbers = [random.random() for _ in range(num_values - len(specific_numbers))]
    
    # Add specific numbers to the list
    numbers = random_numbers + specific_numbers
    
    # Sort the list
    numbers.sort()
    
    return numbers

# Use of the function generate_numbers
num_values = 19  # Total number of values to 
n_xB = xB + 0.0000005
spcfc_nums = [0, zF, xD, n_xB, xq, 1]  # Numbers to be in the result

gen_val = generate_numbers(num_values, spcfc_nums)
# print(gen_val)

# Function to create a dictionary with x values and corresponding y values
def create_xy_dict(x_values,function):
    # Calculate y values using the Eqcurve lambda function
    y_values = [function(x) for x in x_values]
    
    # Create a dictionary that maps x values to y values
    xy_dict = dict(zip(x_values, y_values))
    
    return xy_dict


eqcurve_dict = create_xy_dict(gen_val, Eqcurve)
tol_dict = create_xy_dict(gen_val, TOL)
bol_dict = create_xy_dict(gen_val, BOL_2)
q_dict = create_xy_dict(gen_val, yq_2)

In [None]:
df_bol = pd.DataFrame(list(zip(gen_val, bol_dict.values())), columns = ['x\u2099', 'BOL'])
df_bol = df_bol[(df_bol['x\u2099'] > xB) & (df_bol['x\u2099'] <= xq)]
ndf_bol_dict = {key: value for key, value in bol_dict.items() if xB < key <= xq}

df_tol = pd.DataFrame(list(zip(gen_val, tol_dict.values())), columns = ['x\u2099', 'TOL'])
df_tol = df_tol[(df_tol['x\u2099'] >= xq) & (df_tol['x\u2099'] < xD)]
ndf_tol_dict = {key: value for key, value in tol_dict.items() if (key <= xD) & (key >= xq)}

df_mct = pd.DataFrame(list(zip(gen_val, eqcurve_dict.values(), tol_dict.values(), bol_dict.values())), 
                     columns = ['x\u2099', 'EQ Curve', 'TOL', 'BOL'])

df_q_dict = {key: value for key, value in q_dict.items() if (key == xq) | (key == zF)}
display(df_bol)
#print(df_q_dict)

In [None]:
display(df_tol)

In [None]:
display(df_mct)

In [None]:
def create_stairs_plot_B(xb, xq, yq, ui):
    bol_lst_x = list()
    bol_lst_y = list()
    bol_2 = dict()
    #tol_lst_x = list()
    #tol_lst_y = list()
    n = 0
    if n == 0:
        yb = Eqcurve(xb)
        bol_lst_x.append(xb)
        bol_lst_y.append(yb)
        #bol_2[xb] = yb
        #yd = TOL(xd)
        #tol_lst_x.append(xd)
        #tol_lst_y.append(yd)
        n += 1
    while yb <= yq:
        x = BOL(yb)
        yb = Eqcurve(x)
        if yb <= yq:
            bol_lst_x.append(x)
            bol_lst_y.append(yb)
            #bol_2[x] = yb
            n += 1
        else:
            bol_lst_x.append(x)
            #ybb = bol_lst_y[-1]
            bol_lst_y.append(yb)
            #bol_2[x] = ybb
            n =+ 1
    if ui == 'x_y':
        bolo = [bol_lst_x, bol_lst_y]
        return bolo
    elif ui == 'dct':
        n_2 = len(bol_lst_y)
        for i in range(n_2):
            bol_2[bol_lst_x[i]] = bol_lst_y[i]
        return bol_2
    elif ui == 'N':
        return n
        

    

In [None]:

print(create_stairs_plot_B(xB, xq, yq, 'dct'))

In [None]:
def create_stairs_plot_D(xd, xq, yq, ui):
    tol_lst_x = list()
    tol_lst_y = list()
    tol_2 = dict()
    n = 0
    if n == 0:
        yd = TOL(xd)
        tol_lst_x.append(xd)
        tol_lst_y.append(yd)
        n += 1
    while yd >= yq:
        x = yd/(alpha - yd*(alpha - 1))
        yd = TOL(x)
        if yd >= yq:
            tol_lst_x.append(x)
            tol_lst_y.append(yd)
            n += 1
        else:
            tol_lst_x.append(x)
            ydd = tol_lst_y[-1]
            tol_lst_y.append(ydd)
            n =+ 1
    #return bol_lst_x, bol_lst_y, tol_lst_x, tol_lst_y
    if ui == 'x_y':
        #tolo = [tol_lst_x, tol_lst_y]
        return tol_lst_x, tol_lst_y
    elif ui == 'dct':
        n_2 = len(tol_lst_y)
        for i in range(n_2):
            tol_2[tol_lst_x[i]] = tol_lst_y[i]
        return tol_2
    elif ui == 'N':
        return n

In [None]:
print(create_stairs_plot_D(xD, xq, yq, 'dct'))

In [None]:
for x in create_stairs_plot_D(xD, xq, yq, 'x_y'):
    print(x[0])

In [None]:
# Create a figure and axis
fig, ax = plt.subplots(figsize = (10, 6))
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)

# Adding the Equilibrium Curve & diagonal line
# 45-degree line range
x = np.linspace(0, 1, num_values)
y = x  # 45-degree line (y = x)
ax.plot(x, y, linestyle = '-', color = 'black', label = 'y = x line')

#for label, data in eqcurve_dict.items():
x = list(eqcurve_dict.keys())
y = list(eqcurve_dict.values())
ax.plot(x, y, linestyle = '-', color = 'orange', label = 'V-L EQ line')


# Adding BOL, TOL & q-lines
# BOL
#for label, data in ndf_bol_dict.items():
xb = list(ndf_bol_dict.keys())
yb = list(ndf_bol_dict.values())
ax.plot(xb, yb, linestyle = '-', color = 'deepskyblue', label = 'Stripping Section')
# TOL
#for label, data in ndf_tol_dict.items():
xd = list(ndf_tol_dict.keys())
yd = list(ndf_tol_dict.values())
ax.plot(xd, yd, linestyle = '-', color = 'green', label = 'Rectifying Section')
# q-line
x = list(df_q_dict.keys())
y = list(df_q_dict.values())
ax.plot(x, y, linestyle = '-', color = 'gold', label = 'q line')

# Create stairs plot using a step plot (each line represents a step)
#   Starting with Stripping Section
x = list(create_stairs_plot_B(xB, xq, yq, 'dct').keys())
y = list(create_stairs_plot_B(xB, xq, yq, 'dct').values())
ax.step(x, y, where = 'post', linestyle = '-', color = 'rebeccapurple', label = 'Stripping Section Trays')
#plt.step(x_values, y_values, where='post', label='McCabe-Thiele Steps', color='b')

#   Starting with Rectifying Section
x = list(create_stairs_plot_D(xD, xq, yq, 'dct').keys())
y = list(create_stairs_plot_D(xD, xq, yq, 'dct').values())
ax.step(x, y, where = 'post', linestyle = '-', color = 'rebeccapurple', label = 'Rectifying Section Trays')

# Extra for better improved
x = [xB, n_xB]
y = [Eqcurve(xB), BOL_2(n_xB)]
ax.plot(x, y, linestyle = '-', color = 'rebeccapurple', label = None)


ax.plot([xB,xB],[0,xB], linestyle = '--', color = 'slategray')
ax.scatter(xB,xB, color = 'lightsteelblue', zorder = 1.8)
ax.text(xB + 0.01,0.02,'$x_B$ = {:0.3f}'.format(float(xB)))

ax.plot([zF,zF,zF],[0,zF,zF], linestyle = '--', color = 'slategray')
ax.scatter([zF,zF],[zF,zF], color = 'lightsteelblue', zorder = 1.9)
ax.text(zF + 0.01,0.02,'$z_F$ = {:0.3f}'.format(float(zF)))

ax.plot([xD,xD],[0,xD], linestyle = '--', color = 'slategray')
ax.scatter(xD,xD, color = 'lightsteelblue', zorder = 1.9)
ax.text(xD - 0.11,0.02,'$x_D$ = {:0.3f}'.format(float(xD)))

#   Starting with Rectifying Section stage numbering
x = list(create_stairs_plot_D(xD, xq, yq, 'dct').keys())
y = list(create_stairs_plot_D(xD, xq, yq, 'dct').values())
ex = len(y)
for i in range(len(y) - 1):
    if i < len(y):
        n_x = y[i]/(alpha - y[i]*(alpha - 1))
        ax.scatter(n_x, y[i], color = 'plum', zorder = 1.9)
        ax.text(n_x - 0.06, y[i] + 0.01,'N = {:0.0f}'.format(float(i+1)))
    elif i == len(y):
        break
#   Starting with Stripping Section stage numbering
x = list(create_stairs_plot_B(xB, xq, yq, 'dct').keys())
y = list(create_stairs_plot_B(xB, xq, yq, 'dct').values())
y.sort(reverse = True)
for i in range(len(y)):
    if i < len(y):
        n_x = y[i]/(alpha - y[i]*(alpha - 1))
        ax.scatter(n_x, y[i], color = 'plum', zorder = 1.9)
        ax.text(n_x - 0.06, y[i] + 0.01,'N = {:0.0f}'.format(float(ex + i - 1)))
    elif i == len(y):
        break


plt.title('Ethanol-Water McCabe-Thiele Diagram)')
plt.xlabel('x = mol frac. of ethanol in liquid phase')
plt.ylabel('y = mol. frac. of ethanol in vapour phase')

# Show the plot
ax.legend()
ax.grid(True, linestyle='--', alpha = 0.7)
plt.show()

In [None]:
# Operating pressure in bar
feed_P_pa = feed_P * 100000  # Convert to Pascals

# Function to solve for temperature using Raoult's Law
def solve_temperature(x_ethanol, P):
    def equation(T):
        y_ethanol = x_ethanol * (Psat['ethanol'](T) * 100000) / P
        y_water = (1 - x_ethanol) * (Psat['water'](T) * 100000) / P
        return y_ethanol + y_water - 1  # Raoult's Law
    # Initial guess for temperature (in K)
    T_guess = 272.15 + 78  # Close to ethanol's boiling point
    return fsolve(equation, T_guess)[0]  # Return temperature in K

# Input data: x-y values for rectifying and stripping sections
x_y_rectifying = create_stairs_plot_D(xD, xq, yq, 'dct')
x_y_stripping = create_stairs_plot_B(xB, xq, yq, 'dct')

# Number of stages
#num_stages = len(list(create_stairs_plot_D(xD, xq, yq, 'dct').values())) 
#+ len(list(create_stairs_plot_B(xB, xq, yq, 'dct').values()))*0

# Calculate total number of stages
num_stages_rectifying = len(list(create_stairs_plot_D(xD, xq, yq, 'dct').values()))
num_stages_stripping = len(list(create_stairs_plot_B(xB, xq, yq, 'dct').values()))
total_stages = num_stages_rectifying + num_stages_stripping

# Initialize dictionary to store stage temperatures
stage_temperatures = {}

# Iterative procedure for all stages
for stage in range(1, total_stages + 1):
    if stage <= num_stages_rectifying:
        # Rectifying section
        x_y_rectifying = list(create_stairs_plot_D(xD, xq, yq, 'dct').keys())
        x_y_rectifying.sort(reverse = True)
        x_ethanol = x_y_rectifying[stage - 1]
    else:
        # Stripping section
        x_y_stripping = list(create_stairs_plot_B(xB, xq, yq, 'dct').keys())
        x_y_stripping.sort(reverse = True)
        x_ethanol = x_y_stripping[stage - num_stages_rectifying - 1]

    # Solve for temperature at the current stage
    T_stage = solve_temperature(x_ethanol, feed_P_pa)
    stage_temperatures[f"Stage {stage}"] = [T_stage]

# Convert to DataFrame
df_temperatures = pd.DataFrame(stage_temperatures)

# Rename columns to match stage numbers
#df_temperatures.columns = [f"Stage {stage}" for stage in range(1, num_stages + 1)]

# Output DataFrame
print("Stage Temperatures (K):")
display(df_temperatures)

In [None]:
df_col_cmp = df_temperatures.copy()
df_col_cmp .iloc[:, 0:11] = df_col_cmp .iloc[:, 0:11].round(2)
df_col_cmp['L'] =  T_liq
df_col_cmp["V"] = T_vap
df_col_cmp["D"] = dist
df_col_cmp["B"] = botts
df_col_cmp["F"] = feed
df_col_cmp["Ethanol concentration"] = eth_d_pty

# Display the resulting DataFrame
display(df_col_cmp)

In [None]:
columns = ['T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9', 'T10', 'T11', 'T12', 'T13', 'T14',
          'L', 'V', 'D', 'B', 'F', 'Ethanol concentration']  # Adjusting columns based on irregular column sizes
# Load Dataframe from excel file at given location into pandas dataframe
filename = "/kaggle/input/distillation-column/dataset_distill.csv"        
#df_data_test = pd.read_csv(filename, names = columns, engine = 'python')
# Load DataFrame, ensuring proper parsing
# Load DataFrame, ensuring proper parsing
df_data_test = pd.read_csv(
    filename,
    names = columns,       # Define column names
    engine = 'python',     # Use Python engine for flexibility
    skiprows = 1,          # Skip the first row (header row, if needed)
    delimiter = ';',       # Specify the correct delimiter (adjust if it's not a comma)
    skipinitialspace = True,  # Handle spaces after delimiters
)
df_data_test.set_index('T3', inplace=True)
df_data_test.reset_index(inplace=True)
df_data_test.head()

In [None]:
df_data_test.rename(columns = {'T3':'Stage 1',
                              'T4': 'Stage 2',
                              'T5':'Stage 3',
                              'T6':'Stage 4',
                              'T7':'Stage 5',
                              'T8':'Stage 6',
                              'T9':'Stage 7',
                              'T10':'Stage 8',
                              'T11':'Stage 9',
                              'T12':'Stage 10',
                              'T13':'Stage 11',
                              'T14':'Stage 12'}, inplace = True)
display(df_data_test)

In [None]:
df_data_test.describe()

In [None]:
df_data_test.drop('Stage 11', axis = 1, inplace = True)
df_data_test.rename(columns = {'Stage 12':'Stage 11'}, inplace = True)
display(df_data_test)

In [None]:
# Identify columns with non-numeric data types
non_numeric_columns = df_data_test.select_dtypes(include=['object']).columns

# Display the columns and sample values
for col in non_numeric_columns:
    print(f"Column: {col}")
    print(df_data_test[col].unique())
    print()

In [None]:
# Identify columns with non-numeric data types
non_numeric_columns = df_data_test.select_dtypes(include=['object']).columns

# Display the columns and sample values
for col in non_numeric_columns:
    df_data_test[col] = df_data_test[col].str.replace(',', '').astype(float)
    print(f"Column: {col}")
    print(df_data_test[col].unique())
    print()

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# Define features and target
X = df_data_test[['Stage 1', 'Stage 2', 'Stage 3', 'Stage 4',
        'Stage 5', 'Stage 6', 'Stage 7', 'Stage 8', 'Stage 9',
                 'Stage 10', 'Stage 11', 'L', 'V', 'D', 'B', 'F']]
y = df_data_test['Ethanol concentration']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.05, random_state = 42)

# Train Random Forest
rf = RandomForestRegressor(random_state = 42)
rf.fit(X_train, y_train)

# Feature importance
feature_importance = pd.DataFrame({
    'Variable': X.columns,
    'Importance': rf.feature_importances_
}).sort_values(by='Importance', ascending = False)

print(feature_importance)

In [None]:
display(X_test)

In [None]:
y_pred = rf.predict(X_test)
df_y_pred = pd.DataFrame(y_pred, columns = ['Predicted Ethanol concentration'])
display(df_y_pred)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)
print(f'Root Mean Squared Error: {rmse}')

# R² (Coefficient of Determination)
r2 = r2_score(y_test, y_pred)
print(f'R-squared: {r2}')

In [None]:
df_col_cmp_tst = df_col_cmp.iloc[:,:-1]
y_pred = rf.predict(df_col_cmp_tst)
df_col_cmp_2 = df_col_cmp.copy()
df_col_cmp_2['Predicted Ethanol concentration'] = y_pred
display(df_col_cmp_2)

In [None]:
# Mean Absolute Error (MAE)
mae = mean_absolute_error(df_col_cmp_2['Predicted Ethanol concentration'], df_col_cmp_2['Ethanol concentration'])
print(f'Mean Absolute Error: {mae}')

# Mean Squared Error (MSE)
mse = mean_squared_error(df_col_cmp_2['Predicted Ethanol concentration'], df_col_cmp_2['Ethanol concentration'])
print(f'Mean Squared Error: {mse}')

# Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)
print(f'Root Mean Squared Error: {rmse}')
