In [27]:
%load_ext autoreload
%autoreload 2
import os
from pathlib import Path
import numpy as np
import pandas as pd
import pyvista as pv
from tqdm.auto import tqdm
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
import tensorflow as tf
import imageio
import plotly.express as px
import umap
print("TensorFlow version:", tf.__version__)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
TensorFlow version: 2.12.1


In [2]:
root_dir = Path("/home/max/Desktop/python_projects/SimJeb/")
metadata_dir = root_dir / "SimJEB_metadata"
meshes_dir = root_dir / "SimJEB_volmesh"

In [3]:
train_df = pd.read_csv(metadata_dir / "train_bracket_category.csv")
test_df = pd.read_csv(metadata_dir / "test_bracket_category.csv")

In [4]:
X_train = train_df.iloc[:,2:201].to_numpy()
X_test = test_df.iloc[:,2:201].to_numpy()

In [5]:
le = preprocessing.LabelEncoder()
le.fit(train_df.category)
y_train = le.transform(train_df.category)
y_test = le.transform(test_df.category)

In [6]:
model = tf.keras.models.Sequential([
  #tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(16, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(len(np.unique(y_train)))
])

2024-02-22 21:58:55.538768: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


In [7]:
predictions = model(X_train[:1]).numpy()
predictions

array([[ 0.05635617,  0.03180658,  0.03963365,  0.09407528,  0.20209105,
        -0.34132394]], dtype=float32)

In [8]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [9]:
loss_fn(y_train[:1], predictions).numpy()

1.6166596

In [10]:
model.compile(
    optimizer='adam',
    loss=loss_fn,
    metrics=['accuracy']
)

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (1, 128)                  25600     
                                                                 
 dense_1 (Dense)             (1, 16)                   2064      
                                                                 
 dropout (Dropout)           (1, 16)                   0         
                                                                 
 dense_2 (Dense)             (1, 6)                    102       
                                                                 
Total params: 27,766
Trainable params: 27,766
Non-trainable params: 0
_________________________________________________________________


In [12]:
embeddings_dir = metadata_dir / "mlp_embeddings"
embeddings_dir.mkdir(parents=True, exist_ok=True)

In [13]:
EPOCHS = 200
for i in tqdm(range(EPOCHS)):
    layer_name = 'dense_1'
    intermediate_layer_model = tf.keras.Model(
        inputs=model.input,
        outputs=model.get_layer(layer_name).output
    )
    intermediate_output = intermediate_layer_model(X_train).numpy()
    with open(embeddings_dir / f"{str(i).zfill(4)}.npy", "wb") as f:
        np.save(f, intermediate_output)
    model.fit(X_train, y_train, epochs=1)

  0%|          | 0/200 [00:00<?, ?it/s]







In [50]:
model.evaluate(X_test,  y_test, verbose=2)

3/3 - 0s - loss: 5.3000 - accuracy: 0.7349 - 24ms/epoch - 8ms/step


[5.299956798553467, 0.7349397540092468]

In [43]:
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])

In [44]:
probability_model(X_test[:5])

<tf.Tensor: shape=(5, 6), dtype=float32, numpy=
array([[1.51246890e-10, 3.11009656e-11, 9.99996781e-01, 2.31875177e-07,
        2.80550148e-06, 6.22688958e-08],
       [9.50063989e-02, 5.81629336e-01, 1.29224523e-03, 3.13605726e-01,
        6.82642590e-03, 1.63984590e-03],
       [1.90493956e-01, 1.24345481e-01, 1.17606260e-02, 6.35109246e-01,
        3.23987864e-02, 5.89181762e-03],
       [6.41778111e-01, 2.38964632e-01, 3.34232673e-02, 7.27518946e-02,
        6.91814395e-03, 6.16393145e-03],
       [8.42930198e-01, 1.60081629e-02, 9.88573115e-03, 1.13700494e-01,
        1.67239588e-02, 7.51469575e-04]], dtype=float32)>

In [46]:
layer_name = 'dense_5'
intermediate_layer_model = tf.keras.Model(
    inputs=model.input,
    outputs=model.get_layer(layer_name).output
)
intermediate_output = intermediate_layer_model(X_train)

In [47]:
intermediate_output.shape

TensorShape([247, 16])

In [14]:
embedding_last = np.load(embeddings_dir / "0199.npy")

In [18]:
mapping = umap.UMAP().fit(embedding_last)
embedding_umap_last = mapping.transform(embedding_last)

In [19]:
np.min(embedding_umap_last, axis=0), np.max(embedding_umap_last, axis=0)

(array([-3.3294551, -5.5773387], dtype=float32),
 array([24.625387, 22.008465], dtype=float32))

In [54]:
train_df["component_0"] = embedding[:,0]
train_df["component_1"] = embedding[:,1]

In [56]:
fig = px.scatter(
    train_df,
    x='component_0',
    y='component_1',
    color='category',
    hover_data=['id'],
    title='UMAP of SimJEB geometries based on MLP embeddings',
    labels={'X': 'component_0', 'Y': 'component_1', 'Type': 'category', 'ID': 'ID'}
)

# Save the plot to an HTML file
fig.write_html(metadata_dir / "MLP_umap_categories.html")

In [34]:
embeddings_images_dir = metadata_dir / "mlp_embeddings_images"
embeddings_images_dir.mkdir(parents=True, exist_ok=True)
for i in tqdm(range(120)):
    embeddings = np.load(embeddings_dir / f"{str(i).zfill(4)}.npy")
    embeddings_umap = mapping.transform(embeddings)
    train_df["component_0"] = embeddings_umap[:,0]
    train_df["component_1"] = embeddings_umap[:,1]
    fig = px.scatter(
        train_df,
        x='component_0',
        y='component_1',
        color='category',
        hover_data=['id'],
        title=f'UMAP of SimJEB geometries based on MLP embeddings: epoch {i}',
        labels={'X': 'umap_component_0', 'Y': 'umap_component_1', 'Type': 'category', 'ID': 'ID'},
        range_x=[-4.2,25.6],
        range_y=[-6.2,23],
    )
    fig.write_image(embeddings_images_dir / f"{str(i).zfill(4)}.png")

  0%|          | 0/120 [00:00<?, ?it/s]

In [35]:
images = []
for j in tqdm(range(i)):
    images.append(imageio.imread(embeddings_images_dir / f"{str(j).zfill(4)}.png"))
imageio.mimsave(embeddings_images_dir / "umap_embeddings_evolution.gif", images)

  0%|          | 0/119 [00:00<?, ?it/s]





In [58]:
pca = PCA(n_components=2)
embeddings_pca = pca.fit_transform(intermediate_output.numpy())
train_df["component_0"] = embeddings_pca[:,0]
train_df["component_1"] = embeddings_pca[:,1]
fig = px.scatter(
    train_df,
    x='component_0',
    y='component_1',
    color='category',
    hover_data=['id'],
    title='PCA of SimJEB geometries based on MLP embeddings',
    labels={'X': 'component_0', 'Y': 'component_1', 'Type': 'category', 'ID': 'ID'}
)

# Save the plot to an HTML file
fig.write_html(metadata_dir / "MLP_pca_categories.html")