# Generate Fluid Properties Data

In [1]:
import pandas as pd
import os

from generate_data import generate_data

TEMP_MIN = 5
TEMP_MAX = 30
TEMP_INCREMENT = 0.1
PRESS_MIN = 3
PRESS_MAX = 46
PRESS_INCREMENT = 0.1
FLUID = "CO2"

path = f'data/{FLUID}_nist_data.csv'

if os.path.isfile(path):
    print(f"Existing data found: {os.getcwd()}/{path}")
    df = pd.read_csv(path)
else:
    df = generate_data(FLUID, TEMP_MIN, TEMP_MAX, TEMP_INCREMENT,
                       PRESS_MIN, PRESS_MAX, PRESS_INCREMENT)
    df.to_csv(f'data/{FLUID}_nist_data.csv', index=False)

Existing data found: c:\Users\josep\OneDrive\Documents\GitHub\nel\fluid_properties/data/CO2_nist_data.csv


# Pre-Process Data

In [2]:
X_unscaled = df[['Temperature (C)', 'Pressure (bar)']].to_numpy()
Y_unscaled = df['Density (kg/m3)'].to_numpy()

X = (X_unscaled - X_unscaled.mean(axis=0)) / X_unscaled.std(axis=0)
Y = (Y_unscaled - Y_unscaled.mean()) / Y_unscaled.std()

# Linear $\rho$-$PT$ Regression

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Split the data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42)

# Instantiate the model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train, Y_train)

# Make predictions on the test set
predictions = model.predict(X_test)

In [4]:
df_test = pd.DataFrame()

df_test['Temperature (C)'] = X_test[:, 0] * X_unscaled[:, 0].std() + \
    X_unscaled[:, 0].mean()
df_test['Pressure (bar)'] = X_test[:, 1] * X_unscaled[:, 1].std() + \
    X_unscaled[:, 1].mean()

df_test['Density (kg/m3)'] = Y_test * Y_unscaled.std() + Y_unscaled.mean()
df_test['Predicted Density (kg/m3)'] = predictions * Y_unscaled.std() + Y_unscaled.mean()

df_test['Square Error, Density'] = (df_test['Predicted Density (kg/m3)'] - df_test['Density (kg/m3)']) ** 2

In [5]:
import plotly.express as px

# plot with marker size
fig = px.scatter(df_test, x='Temperature (C)', y='Pressure (bar)', color='Square Error, Density')
fig.update_layout(
    template='plotly_dark',
    paper_bgcolor='#1F1F1F'
)

# Polynomial $\rho$-$PT$ Regression

In [6]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures

POLY_DEGREE = 11

# Assume X and Y are your data
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, random_state=123)

# Transform your inputs
poly = PolynomialFeatures(degree=POLY_DEGREE)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Instantiate the model

model = LinearRegression()

# Fit the model on the training data
model.fit(X_train_poly, Y_train)

np.savetxt(f'data/co2_coef_{POLY_DEGREE}poly', model.coef_)

# Make predictions on the test set
predictions = model.predict(X_test_poly)

In [7]:
df_test = pd.DataFrame()

df_test['Temperature (C)'] = X_test[:, 0] * X_unscaled[:, 0].std() + \
    X_unscaled[:, 0].mean()
df_test['Pressure (bar)'] = X_test[:, 1] * X_unscaled[:, 1].std() + \
    X_unscaled[:, 1].mean()

df_test['Density (kg/m3)'] = Y_test * Y_unscaled.std() + Y_unscaled.mean()
df_test['Predicted Density (kg/m3)'] = predictions * \
    Y_unscaled.std() + Y_unscaled.mean()

df_test['Square Error, Density'] = (df_test['Predicted Density (kg/m3)'] - df_test['Density (kg/m3)']) ** 2
df_test['Absolute Relative Error, Density (%)'] = (
    df_test['Predicted Density (kg/m3)'] - df_test['Density (kg/m3)']).abs() / df_test['Density (kg/m3)']


In [8]:
fig = px.scatter(
    df_test, 
    x='Temperature (C)', 
    y='Pressure (bar)', 
    color='Square Error, Density',
    title=f"Poly Regression Degree: {POLY_DEGREE}; Max Square Error, Density: {df_test['Square Error, Density'].max(): .2e}"
)
fig.update_layout(
    template='plotly_dark',
    paper_bgcolor='#1F1F1F'
)

In [9]:
fig = px.scatter(
    df_test, 
    x='Temperature (C)',
    y='Pressure (bar)', color='Absolute Relative Error, Density (%)',
    title=f"Poly Regression Degree: {POLY_DEGREE}; Max Density Rel Error: {df_test['Absolute Relative Error, Density (%)'].max(): .2e} %"
)

fig.update_layout(
    template='plotly_dark',
    paper_bgcolor='#1F1F1F'
)