In [1]:
import pandas as pd

DATA = '/kaggle/input/nanofluid-density-prediction/Density_Prediction_Dataset.csv'
df = pd.read_csv(filepath_or_buffer=DATA)
df.head()

Unnamed: 0,Nano Particle,Base Fluid,Temperature (°C),Volume Concentration (ϕ),Density of Nano Particle 1 (ρnp),Density of Nano Particle 2 (ρnp),Density of Base Fluid (ρbf),Volume Mixture of Particle 1,Volume Mixture of Particle 2,Density (ρ)
0,Al₂O₃/SiO₂,Water,20,0.05,3890,2220,998.29,20,80,1004.814815
1,Al₂O₃/SiO₂,Water,30,0.05,3890,2220,995.71,20,80,1002.222222
2,Al₂O₃/SiO₂,Water,40,0.05,3890,2220,992.25,20,80,997.037037
3,Al₂O₃/SiO₂,Water,50,0.05,3890,2220,988.02,20,80,994.444444
4,Al₂O₃/SiO₂,Water,60,0.05,3890,2220,983.13,20,80,990.0


In [2]:
df.nunique().to_frame()

Unnamed: 0,0
Nano Particle,14
Base Fluid,4
Temperature (°C),18
Volume Concentration (ϕ),23
Density of Nano Particle 1 (ρnp),10
Density of Nano Particle 2 (ρnp),6
Density of Base Fluid (ρbf),30
Volume Mixture of Particle 1,6
Volume Mixture of Particle 2,6
Density (ρ),413


Do we have any duplicate rows? We have duplicate densities.

In [3]:
len(df), len(df.drop(columns=['Nano Particle']).drop_duplicates())

(436, 436)

Will TSNE find any clusters in our data?

In [4]:
from sklearn.manifold import TSNE

RANDOM_STATE = 2025

reducer = TSNE(random_state=RANDOM_STATE)
plot_df = pd.DataFrame(columns=['x', 'y'], data=reducer.fit_transform(X=df.drop(columns=['Nano Particle', 'Base Fluid'])))
for column in [
    'Base Fluid', 'Temperature (°C)',
       'Volume Concentration (ϕ)', 'Density of Nano Particle 1 (ρnp)',
       'Density of Nano Particle 2 (ρnp)', 'Density of Base Fluid (ρbf)',
       'Volume Mixture of Particle 1', 'Volume Mixture of Particle 2',
]:
    plot_df[column] = df[column].tolist()

In [5]:
from plotly import express
from plotly import io

io.renderers.default = 'iframe'
express.scatter(data_frame=plot_df, x='x', y='y', color='Base Fluid')

We definitely have TSNE clusters, but only some of them can be explained by our base fluid.

In [6]:
express.scatter(data_frame=plot_df, x='x', y='y', color='Temperature (°C)')

Variation within clusters seems to be determined by temperature.

In [7]:
express.scatter(data_frame=plot_df, x='x', y='y', color='Density of Nano Particle 1 (ρnp)')

Density explains our water clusters.

In [8]:
express.scatter(data_frame=plot_df, x='x', y='y', color='Density of Nano Particle 2 (ρnp)')

In [9]:
df.columns

Index(['Nano Particle', 'Base Fluid', 'Temperature (°C)',
       'Volume Concentration (ϕ)', 'Density of Nano Particle 1 (ρnp)',
       'Density of Nano Particle 2 (ρnp)', 'Density of Base Fluid (ρbf)',
       'Volume Mixture of Particle 1', 'Volume Mixture of Particle 2',
       'Density (ρ)'],
      dtype='object')

In [10]:
express.scatter(data_frame=plot_df, x='x', y='y', color='Density of Base Fluid (ρbf)')