# Load Libraries

In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
import plotly.graph_objects as go
import plotly.express as px

# Load Data

Firstly, the data prepared in Notebook 1 are loaded (data combined.csv), including dropping missing values (just in case any creeped in!).

Next, we specify the inputs and outputs. Note that to_drop contains any columns that are not to be used in inputs. Inputs will only contain molecular descriptors, M, T and p.
Outputs will be density values.

Output is divided by 1000 to convert the unit from kg/m3 to g/cm3 (this will be important in PINN).

In [2]:
df = pd.read_csv('./Data/data combined.csv', index_col = 0).dropna(axis = 0)
print('Loaded dataset:')
display(df)

extra_tags = ['M g/mol', 'T / K', 'p / MPa']
to_drop = ['Dataset ID', 'IL ID', 'Cation', 'Anion', 'Cationic family', 'Anionic family',
           'Excluded IL', 'Accepted dataset', 'T / K', 'p / MPa', 'ρ / kg/m3', 'SWMLR (v0) + FFANN (f)',
           'SWMLR (v0) + FFANN (f).1', 'FFANN (v0) + FFANN (f)', 'FFANN (v0) + FFANN (f).1',
           'LSSVM (v0) + FFANN (f)', 'LSSVM (v0) + FFANN (f).1', 'M g/mol']
molecular_descriptors = df.drop(to_drop, axis = 1).columns
inputs = np.hstack((molecular_descriptors, 
                    extra_tags))

outputs = ['ρ / kg/m3']

print(f'\n\nInputs {len(inputs)}:\n{inputs}')
print(f'\n\nOutputs {len(outputs)}:\n{outputs}')

X = df[inputs]
y = df[outputs]/1000

print('\n\nInput data:')
display(X)

print('\n\nOutput data:')
display(y)

Loaded dataset:


Unnamed: 0,Dataset ID,IL ID,Cation,Anion,Cationic family,Anionic family,Excluded IL,Accepted dataset,T / K,p / MPa,...,aNCHBr,aNCD3,aNCD2,aNNH2,cycNCH3,cycNCH2,cycNCH,cycNC(O),cycSCH3,cycSCH2
10,3,1,"azp-2o1,1",ntf2,azepanium,NTf2 derivatives,no,yes,293.15,0.1,...,0,0,0,0,1,1,0,0,0,0
11,3,1,"azp-2o1,1",ntf2,azepanium,NTf2 derivatives,no,yes,298.15,0.1,...,0,0,0,0,1,1,0,0,0,0
12,3,1,"azp-2o1,1",ntf2,azepanium,NTf2 derivatives,no,yes,303.15,0.1,...,0,0,0,0,1,1,0,0,0,0
13,3,1,"azp-2o1,1",ntf2,azepanium,NTf2 derivatives,no,yes,313.15,0.1,...,0,0,0,0,1,1,0,0,0,0
14,3,1,"azp-2o1,1",ntf2,azepanium,NTf2 derivatives,no,yes,323.15,0.1,...,0,0,0,0,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41245,4318,2267,"turea-8,(2,0),(2,0)",ntf2,thiouronium,NTf2 derivatives,no,yes,318.15,0.1,...,0,0,0,0,0,0,0,0,0,0
41246,4318,2267,"turea-8,(2,0),(2,0)",ntf2,thiouronium,NTf2 derivatives,no,yes,328.15,0.1,...,0,0,0,0,0,0,0,0,0,0
41247,4318,2267,"turea-8,(2,0),(2,0)",ntf2,thiouronium,NTf2 derivatives,no,yes,338.15,0.1,...,0,0,0,0,0,0,0,0,0,0
41248,4318,2267,"turea-8,(2,0),(2,0)",ntf2,thiouronium,NTf2 derivatives,no,yes,348.15,0.1,...,0,0,0,0,0,0,0,0,0,0




Inputs 323:
['Im' 'Im1' 'Im12' 'Im13' 'Im123' 'Im135' 'Im1345' 'Pz12' 'Pz124' 'Pz1235'
 'Trz123_13' 'Trz123_13(4,5)' 'Trz124_14' 'Tetraz15' 'BThz2' 'Quin1'
 'Quini1' 'Qu23' 'Quinuc1' 'Py' 'Py4' 'Py1' 'Py12' 'Py13' 'Py14' 'Py123'
 'Py124' 'Py125' 'Py134' 'Py135' 'Py1235' 'Pdz1' 'Py(D)' 'Pyr' 'Pyr1'
 'Pyr11' 'Pyrrl12' 'Ox11' 'Pip' 'Pip1' 'Pip11' 'Pip113' 'Pipz1' 'Azp11'
 'DABCO1' 'DBU' 'DBU1' 'TBDH1' 'Mor' 'Mor1' 'Mor11' 'Pyrro' 'Pyrro1'
 'Pyrro11' 'Epslac' 'GUAD' 'GUADH' 'GUADH2' 'CGUAD' 'THT1' 'THTP1' 'TUREA'
 'TUREAH2' 'TUREAH4' 'CPropN3' 'N1' 'N2' 'N3' 'N4' 'NdbCH' 'NdbC' 'P4'
 'S3' 'BF4' 'PF6' 'OHm' 'Cl' 'Br' 'I' 'HF2' 'HBr2' 'NO3' 'ClO4'
 'N(SO2CH3)2' 'NTf2' 'N(SO2CF2)Tf' 'N(SO2CF2)2' 'N(SO2F)SO2CF2' 'N(SO2F)2'
 'N(Tf)SO2Fc' 'TSAC' 'CTf3' 'DCA' 'NCNNO2' 'CCN3' 'SCN' 'TCB' 'BFCN3'
 'BF2CN2' 'BF3CN' 'BF2CNCF3' 'BF2CNCF2' 'BFCN2CF3' 'BFCN2CF2' 'BCN3CF3'
 'BCN3CF2' 'CH3BF3' 'CH2BF3' 'CH2CHBF3' 'aCBF3' 'CF3BF3' 'CF2BF3' 'FAP'
 'FPI' 'CH3SO4' 'CH2SO4' 'HSO4' 'TEMPOSO4' 'CS' 'CD3SO4' 'C

Unnamed: 0,Im,Im1,Im12,Im13,Im123,Im135,Im1345,Pz12,Pz124,Pz1235,...,aNNH2,cycNCH3,cycNCH2,cycNCH,cycNC(O),cycSCH3,cycSCH2,M g/mol,T / K,p / MPa
10,0,0,0,0,0,0,0,0,0,0,...,0,1,1,0,0,0,0,452.435,293.15,0.1
11,0,0,0,0,0,0,0,0,0,0,...,0,1,1,0,0,0,0,452.435,298.15,0.1
12,0,0,0,0,0,0,0,0,0,0,...,0,1,1,0,0,0,0,452.435,303.15,0.1
13,0,0,0,0,0,0,0,0,0,0,...,0,1,1,0,0,0,0,452.435,313.15,0.1
14,0,0,0,0,0,0,0,0,0,0,...,0,1,1,0,0,0,0,452.435,323.15,0.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41245,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,525.589,318.15,0.1
41246,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,525.589,328.15,0.1
41247,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,525.589,338.15,0.1
41248,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,525.589,348.15,0.1




Output data:


Unnamed: 0,ρ / kg/m3
10,1.4226
11,1.4179
12,1.4134
13,1.4042
14,1.3952
...,...
41245,1.2296
41246,1.2208
41247,1.2137
41248,1.2053


# Scale and Split Data

The code defines two functions, scale and descale, for normalising and denormalising data. The scale function adjusts the input data by subtracting a shift value and dividing by a factor for each column, effectively normalising the data. The descale function reverses this process, restoring the original data values by multiplying by the factor and adding the shift.

In the preparation section, a dictionary named scaler is created to store the shift and factor values for both input features (X) and output values (y). These values are used to scale the inputs and outputs into a range defined by scaler_range.

The scaled data is then stored in the dataset dictionary, and any columns with all NaN values are dropped. The dataset is split into training and testing sets using a 15% test size, with shapes of the resulting sets printed to confirm the split.

In [3]:
def scale(data_in, shift, factor):
    """
    Scale the input data by shifting and dividing by a factor.
    
    Parameters:
    data_in (array-like or DataFrame): The input data to be scaled.
    shift (Series or DataFrame): The shift values for each column in the data.
    factor (Series or DataFrame): The factor values for each column in the data.
    
    Returns:
    DataFrame: The scaled data.
    """
    # Convert the input data to a DataFrame
    data_in = pd.DataFrame(data_in)
    
    # Scale the data by subtracting the shift and dividing by the factor for each column
    return (data_in - shift[data_in.columns]) / factor[data_in.columns]

def descale(data_in, shift, factor):
    """
    Descale the input data by multiplying by a factor and adding a shift.
    
    Parameters:
    data_in (array-like or DataFrame): The input data to be descaled.
    shift (Series or DataFrame): The shift values for each column in the data.
    factor (Series or DataFrame): The factor values for each column in the data.
    
    Returns:
    DataFrame: The descaled data.
    """
    # Convert the input data to a DataFrame
    data_in = pd.DataFrame(data_in)
    
    # Descale the data by multiplying by the factor and adding the shift for each column
    return data_in * factor[data_in.columns] + shift[data_in.columns]

# Prepare scaler
scaler = {}
scaler_range = (0, 1)
scaler['X_shift'] = X.min(axis=0)
scaler['X_factor'] = X.max(axis=0) - X.min(axis=0)
scaler['y_shift'] = y.min(axis=0)
scaler['y_factor'] = y.max(axis=0) - y.min(axis=0)

# Scale inputs X and output y
dataset = {}
dataset['X_scaled'] = scale(X, scaler['X_shift'], scaler['X_factor']).dropna(axis = 1, how = 'all')
dataset['Y_scaled'] = scale(y, scaler['y_shift'], scaler['y_factor'])
print('Scaled input data size:', dataset['X_scaled'].shape)
print('Scaled outputs data size:', dataset['Y_scaled'].shape)

Scaled input data size: (23767, 309)
Scaled outputs data size: (23767, 1)


# Define Model and Other Functions

This section defines two neural network models using PyTorch: a simple Multi-Layer Perceptron (MLP) and a Physics-Informed Neural Network (PINN). Additionally, it includes a function to calculate and print error statistics between two dataframes.

The MLPModel_torch class defines a straightforward multi-layer perceptron with two hidden layers, each containing 100 neurons, and an output layer with one neuron for regression tasks. The forward pass method applies the ReLU activation function to the outputs of the hidden layers and computes the final predicted value.

The PINNModel_torch class defines a physics-informed neural network, also with two hidden layers of 100 neurons each. This model has multiple output layers to predict various physical parameters: density (predicted_density), base density (rho_0_pred), compressibility factor (k_pred), and specific volume (v_pred). The forward pass method similarly uses ReLU activations and produces multiple predicted values.

The getErrorStats function calculates and prints various error statistics between two dataframes. It first applies unit conversions if specified, then computes the difference between the two dataframes. The function calculates several error metrics, including Average Absolute Relative Deviation (AARD), Mean Absolute Error (MAE), standard deviation of the differences, and the minimum and maximum errors. These statistics are then printed to provide a detailed assessment of the discrepancies between the dataframes.

In [4]:
class MLPModel_torch(nn.Module):
    """
    A simple multi-layer perceptron (MLP) model using PyTorch.
    
    Parameters:
    input_dim (int): The number of input features.
    """
    def __init__(self, input_dim):
        super(MLPModel_torch, self).__init__()
        # Define the first fully connected layer with 100 neurons
        self.dense1 = nn.Linear(input_dim, 100)
        # Define the second fully connected layer with 100 neurons
        self.dense2 = nn.Linear(100, 100)
        # Define the output layer with 1 neuron for regression output
        self.dense_density = nn.Linear(100, 1)

    def forward(self, x):
        """
        Forward pass through the network.
        
        Parameters:
        x (Tensor): Input tensor.
        
        Returns:
        Tensor: Output tensor.
        """
        # Apply ReLU activation function to the output of the first layer
        x = torch.relu(self.dense1(x))
        # Apply ReLU activation function to the output of the second layer
        x = torch.relu(self.dense2(x))
        # Generate the final output
        y = self.dense_density(x)
        return y
    
class PINNModel_torch(nn.Module):
    """
    A simple physics-informed neural network (PINN) model using PyTorch.
    
    Parameters:
    input_dim (int): The number of input features.
    """
    def __init__(self, input_dim):
        super(PINNModel_torch, self).__init__()
        # Define the first fully connected layer with 100 neurons
        self.dense1 = nn.Linear(input_dim, 100)
        # Define the second fully connected layer with 100 neurons
        self.dense2 = nn.Linear(100, 100)
        # Define the output layers for different predicted values
        self.dense_density = nn.Linear(100, 1)  # Predicted density
        self.dense_rho0 = nn.Linear(100, 1)     # Predicted rho_0
        self.dense_k = nn.Linear(100, 1)        # Predicted k
        self.dense_v = nn.Linear(100, 1)        # Predicted v

    def forward(self, x):
        """
        Forward pass through the network.
        
        Parameters:
        x (Tensor): Input tensor.
        
        Returns:
        Tuple[Tensor, Tensor, Tensor, Tensor]: Output tensors for predicted values.
        """
        # Apply ReLU activation function to the output of the first layer
        x = torch.relu(self.dense1(x))
        # Apply ReLU activation function to the output of the second layer
        x = torch.relu(self.dense2(x))
        # Generate the final outputs
        predicted_density = self.dense_density(x)
        rho_0_pred = self.dense_rho0(x)
        k_pred = self.dense_k(x)
        v_pred = self.dense_v(x)
        return predicted_density, rho_0_pred, k_pred, v_pred

def getErrorStats(df_1, df_2, unit_conversion = 1):
    """
    Calculate and print various error statistics between two dataframes.
    
    Parameters:
    df_1 (DataFrame): First dataframe to compare.
    df_2 (DataFrame): Second dataframe to compare.
    unit_conversion (float): Factor by which to divide the dataframes for unit conversion (default is 1, meaning no conversion).
    
    Returns:
    None
    """
    # Apply unit conversion to the dataframes
    df_1 = df_1 / unit_conversion
    df_2 = df_2 / unit_conversion
    
    # Calculate the difference between the two dataframes
    diff = df_1 - df_2
    
    # Calculate Average Absolute Relative Deviation (AARD)
    AARD = (diff / df_1 * 100).abs().mean()
    
    # Calculate Mean Absolute Error (MAE)
    MAE = diff.abs().mean()
    
    # Calculate the standard deviation of the differences
    std = diff.std()
    
    # Find the minimum error in the differences
    min_error = diff.min()
    
    # Find the maximum error in the differences
    max_error = diff.max()
    
    # Print the calculated error statistics
    print('AARD: {:.4f}%'.format(AARD))
    print('MAE: {:.4f}'.format(MAE))
    print('std: {:.4f}'.format(std))
    print('Minimum error: {:.4f}'.format(min_error))
    print('Maximum error: {:.4f}'.format(max_error))

# Load models and Compare Performance

This code demonstrates the process of loading pre-trained models, making predictions on a scaled dataset, and rescaling the predicted values to their original scale.

First, the Multi-Layer Perceptron (MLP) model is initialized with the number of input features from the scaled dataset. The pre-trained model parameters are loaded from a saved file. Predictions are made using the MLP model on the scaled dataset, and the predicted values are stored in a DataFrame. These predictions are then rescaled to their original scale using the descale function, which applies the inverse of the scaling transformations.

Similarly, the Physics-Informed Neural Network (PINN) model is initialized with the same number of input features. The pre-trained PINN model parameters are loaded from a saved file. Predictions are made using the PINN model on the scaled dataset, and the predicted values are also stored in a DataFrame. These predictions are then rescaled to their original scale using the same descale function.

In [5]:
# Initialize the MLP model with the number of input features from the scaled dataset
model_MLP = MLPModel_torch(dataset['X_scaled'].shape[1])

# Load the pre-trained MLP model parameters from a saved file
model_MLP.load_state_dict(torch.load('./Models/model_MLP.pt'))

# Make predictions with the MLP model on the scaled dataset
y_pred_MLP = pd.DataFrame(
    index=dataset['X_scaled'].index, 
    data=model_MLP(torch.tensor(dataset['X_scaled'].values).float()).cpu().detach().flatten(),
    columns=['ρ / kg/m3'])

# Descale the predicted values to their original scale
y_pred_MLP = descale(y_pred_MLP, scaler['y_shift'], scaler['y_factor'])

# Initialize the PINN model with the number of input features from the scaled dataset
model_PINN = PINNModel_torch(dataset['X_scaled'].shape[1])

# Load the pre-trained PINN model parameters from a saved file
model_PINN.load_state_dict(torch.load('./Models/model_PINN.pt'))

# Make predictions with the PINN model on the scaled dataset
y_pred_PINN = pd.DataFrame(
    index=dataset['X_scaled'].index, 
    data=model_PINN(torch.tensor(dataset['X_scaled'].values).float())[0].cpu().detach().flatten(),
    columns=['ρ / kg/m3'])

# Descale the predicted values to their original scale
y_pred_PINN = descale(y_pred_PINN, scaler['y_shift'], scaler['y_factor'])

print('Prediction errors for MLP')
getErrorStats(y['ρ / kg/m3'], y_pred_MLP['ρ / kg/m3'])

print('\nPrediction errors for PINN')
getErrorStats(y['ρ / kg/m3'], y_pred_PINN['ρ / kg/m3'])

Prediction errors for MLP
AARD: 0.3390%
MAE: 0.0042
std: 0.0133
Minimum error: -0.4179
Maximum error: 0.9949

Prediction errors for PINN
AARD: 0.1728%
MAE: 0.0021
std: 0.0126
Minimum error: -0.4154
Maximum error: 1.1822


# Visualisation of Models Predictions

This section uses Plotly to create an interactive figure that visualizes the actual versus predicted values for two models: a Multi-Layer Perceptron (MLP) and a Physics-Informed Neural Network (PINN).

First, a new Plotly figure is created. Hover text for both MLP and PINN predictions is generated, providing detailed information about each data point, including the index, dataset ID, pressure (p), temperature (T), and molecular weight (M). This detailed hover text enhances the interactivity and informational content of the plot.

Scatter plot traces are then added to the figure for the MLP and PINN predictions. Each trace compares the actual values against the predicted values, with markers representing each data point. The hover template is customized to show the actual and predicted values along with the detailed hover text.

Additionally, a line trace representing the perfect prediction line (where the predicted values exactly match the actual values) is added to the plot. This line serves as a reference to evaluate the performance of the models visually.

The layout of the figure is updated to include axis titles and a plot title, making the plot more informative and easier to understand. Finally, the figure is saved as an HTML file, allowing for interactive exploration of the visualization.

This visualization helps in comparing the performance of the MLP and PINN models by providing a clear graphical representation of how well the predicted values match the actual values.

In [6]:
# Create a new figure using Plotly
figure = go.Figure()

# Generate hover text for MLP predictions with details about each data point
text_MLP = ["Index: {}<br>Dataset ID: {}<br>p: {}<br>T: {}<br>M: {}".format(i, df.loc[i, 'Dataset ID'],
                                                                            X.loc[i, 'p / MPa'],
                                                                            X.loc[i, 'T / K'],
                                                                            X.loc[i, 'M g/mol']) for i in y.index]

# Generate hover text for PINN predictions with details about each data point
text_PINN = ["Index: {}<br>Dataset ID: {}<br>p: {}<br>T: {}<br>M: {}".format(i, df.loc[i, 'Dataset ID'],
                                                                             X.loc[i, 'p / MPa'],
                                                                             X.loc[i, 'T / K'],
                                                                             X.loc[i, 'M g/mol']) for i in y.index]

# Add a scatter plot trace for MLP predictions vs actual values
figure.add_trace(go.Scatter(x=y.values.flatten(), y=y_pred_MLP.values.flatten(),
                            mode='markers', name='MLP',
                            hovertemplate="Actual: %{x:.4f}<br>Predicted: %{y:.4f}<br>%{text}",
                            text=text_MLP))

# Add a scatter plot trace for PINN predictions vs actual values
figure.add_trace(go.Scatter(x=y.values.flatten(), y=y_pred_PINN.values.flatten(),
                            mode='markers', name='PINN',
                            hovertemplate="Actual: %{x:.4f}<br>Predicted: %{y:.4f}<br>%{text}",
                            text=text_PINN))

# Add a line trace representing the perfect prediction line
figure.add_trace(go.Scatter(x=[0.8, 2.8], y=[0.8, 2.8], mode='lines', name='Perfect prediction line'))

# Update the layout of the figure with axis titles and plot title
figure.update_layout(xaxis_title='Actual values', yaxis_title='Predicted values',
                     title='Actual versus predicted values for PINN and MLP')

# Save the figure as an HTML file
figure.write_html('./Figures/pred_versus_actual.html')

# Visualization of Model Predictions for Specific Dataset

This code visualizes the actual and predicted density values from MLP and PINN models for a specific dataset identified by dataset_ID.

First, the code selects data corresponding to a specific dataset ID and extracts unique pressure values within that dataset. A new Plotly figure is created for the visualization, and a set of colors is defined to distinguish different pressure values.

For each unique pressure value, the code retrieves the corresponding indices and temperature values. It then adds scatter plot traces to the figure for the actual density values, and the predicted density values from the MLP and PINN models. The actual density values are represented as markers, while the predicted values are represented as lines with different dash styles to distinguish between the models.

Each trace is grouped by pressure value, allowing for a clear comparison of actual and predicted values across different pressures. The legend group titles and colors are used to maintain clarity in the plot.

The layout of the figure is updated to include appropriate axis titles and a plot title, which specifies the dataset ID. Finally, the figure is saved as an HTML file and displayed, providing an interactive visualization of the model predictions compared to the actual data for the selected dataset.

In [7]:
# Select a specific dataset by its ID
dataset_ID = 3931
df_sel = df.loc[df['Dataset ID'] == dataset_ID]

# Get the unique pressure values in the selected dataset
unique_p = np.unique(df_sel['p / MPa'])

# Create a new Plotly figure
figure = go.Figure()

# Define a set of colors for different pressure values
colors = px.colors.qualitative.Dark24

# Loop through each unique pressure value and create traces for actual, MLP, and PINN data
for pdx, p in enumerate(unique_p):
    # Get the indices for the current pressure value
    indices = df_sel.loc[df_sel['p / MPa'] == p].index
    # Get the corresponding temperature values
    T = df_sel.loc[indices, 'T / K']
    
    # Create a legend group title text
    legendgrouptitle_text = f'p = {p} MPa'
    
    # Add a scatter plot trace for actual density values
    figure.add_trace(go.Scatter(
        x=T,
        y=df_sel.loc[indices, 'ρ / kg/m3'] / 1000,
        name='Actual',
        legendgroup=p,
        legendgrouptitle_text=legendgrouptitle_text,
        mode='markers',
        marker_color=colors[pdx]
    ))
    
    # Add a scatter plot trace for MLP predicted density values
    figure.add_trace(go.Scatter(
        x=T,
        y=y_pred_MLP.loc[indices, 'ρ / kg/m3'],
        name='MLP',
        legendgroup=p,
        legendgrouptitle_text=legendgrouptitle_text,
        mode='lines',
        line=dict(color=colors[pdx], dash='dash')
    ))
    
    # Add a scatter plot trace for PINN predicted density values
    figure.add_trace(go.Scatter(
        x=T,
        y=y_pred_PINN.loc[indices, 'ρ / kg/m3'],
        name='PINN',
        legendgroup=p,
        legendgrouptitle_text=legendgrouptitle_text,
        mode='lines',
        line=dict(color=colors[pdx], dash='dot')
    ))

# Update the layout of the figure with axis titles and plot title
figure.update_layout(
    xaxis_title='T (K)',
    yaxis_title='ρ / kg/m3',
    title=f'MLP and PINN predictions for dataset: {dataset_ID}'
)

# Save the figure as an HTML file
figure.write_html(f'./Figures/Dataset: {dataset_ID}.html')

In [8]:
# Select a specific dataset by its ID
dataset_ID = 1186
df_sel = df.loc[df['Dataset ID'] == dataset_ID]

# Get the unique pressure values in the selected dataset
unique_p = np.unique(df_sel['p / MPa'])

# Create a new Plotly figure
figure = go.Figure()

# Define a set of colors for different pressure values
colors = px.colors.qualitative.Dark24

# Loop through each unique pressure value and create traces for actual, MLP, and PINN data
for pdx, p in enumerate(unique_p):
    # Get the indices for the current pressure value
    indices = df_sel.loc[df_sel['p / MPa'] == p].index
    # Get the corresponding temperature values
    T = df_sel.loc[indices, 'T / K']
    
    # Create a legend group title text
    legendgrouptitle_text = f'p = {p} MPa'
    
    # Add a scatter plot trace for actual density values
    figure.add_trace(go.Scatter(
        x=T,
        y=df_sel.loc[indices, 'ρ / kg/m3'] / 1000,
        name='Actual',
        legendgroup=p,
        legendgrouptitle_text=legendgrouptitle_text,
        mode='markers',
        marker_color=colors[pdx]
    ))
    
    # Add a scatter plot trace for MLP predicted density values
    figure.add_trace(go.Scatter(
        x=T,
        y=y_pred_MLP.loc[indices, 'ρ / kg/m3'],
        name='MLP',
        legendgroup=p,
        legendgrouptitle_text=legendgrouptitle_text,
        mode='lines',
        line=dict(color=colors[pdx], dash='dash')
    ))
    
    # Add a scatter plot trace for PINN predicted density values
    figure.add_trace(go.Scatter(
        x=T,
        y=y_pred_PINN.loc[indices, 'ρ / kg/m3'],
        name='PINN',
        legendgroup=p,
        legendgrouptitle_text=legendgrouptitle_text,
        mode='lines',
        line=dict(color=colors[pdx], dash='dot')
    ))

# Update the layout of the figure with axis titles and plot title
figure.update_layout(
    xaxis_title='T (K)',
    yaxis_title='ρ / kg/m3',
    title=f'MLP and PINN predictions for dataset: {dataset_ID}'
)

# Save the figure as an HTML file
figure.write_html(f'./Figures/Dataset: {dataset_ID}.html')