In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import sys
sys.path.append('..')
import pandas as pd
from model import VAE, plus_encode_data
import seaborn as sns
from sklearn.mixture import GaussianMixture
import joblib
import input_pipeline
from sklearn.manifold import TSNE

2024-04-17 08:16:43.731013: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Gaussian mixture fitting

In [4]:
def load_model_at_T(t, folder,  latent_dim = 200):
    model = VAE(latent_dim)
    dummy_input = tf.zeros((1, 32, 32, 1))
    model(dummy_input)
    model.load_weights(f'{folder}/vae{t:.1f}.h5')
    return model

In [5]:
temps = np.arange(2.0, 3.0, 0.1)
folder = "Results/16-04-2024"
titles = [f"Temperature: {t:.1f}" for t in temps]
models = {t: load_model_at_T(t, folder) for t in temps}

In [7]:
gm_vars = GaussianMixture(n_components=2, random_state=0)
gm_means = GaussianMixture(n_components=1, random_state=0)

for temp in temps:
    model = models[temp]
    batch_size = 100
    
    trainset_path = f"../../GetData/Python/Data/Data{temp:.1f}.tfrecord"
    train_set = input_pipeline.dataset_tfrecord_pipeline(trainset_path, flatten=False, batch_size=batch_size)

    # data mc trainset
    data_var = []
    data_mean = []
    for batch in train_set:
        data_mc = batch
        data_plus = plus_encode_data(data_mc)
        mean, var = model.encode(data_plus)
        data_var.append(var)
        data_mean.append(mean)
    
    vars = np.concatenate(data_var, axis=0)
    means = np.concatenate(data_mean, axis=0)

    gm_vars.fit(vars)
    gm_means.fit(means)

    # Save the GMM models
    joblib.dump(gm_vars, f"{folder}/{temp:.1f}/gm_vars.pkl")
    joblib.dump(gm_means, f"{folder}/{temp:.1f}/gm_means.pkl")
    
    print(f"Temperature: {temp:.1f}")

Temperature: 2e+00
Temperature: 2e+00
Temperature: 2e+00
Temperature: 2e+00
Temperature: 2e+00
Temperature: 3e+00
Temperature: 3e+00
Temperature: 3e+00
Temperature: 3e+00
Temperature: 3e+00


# Tsne

In [11]:
temp = 2.3
model = load_model_at_T(temp, folder)
batch_size = 100

trainset_path = f"../../GetData/Python/Data/Data{temp:.1f}.tfrecord"
train_set = input_pipeline.dataset_tfrecord_pipeline(trainset_path, flatten=False, batch_size=batch_size)

# data mc trainset
data_var = []
data_mean = []
for batch in train_set:
    data_mc = batch
    data_plus = plus_encode_data(data_mc)
    mean, var = model.encode(data_plus)
    data_var.append(var)
    data_mean.append(mean)

vars = np.concatenate(data_var, axis=0)
means = np.concatenate(data_mean, axis=0)

In [None]:
tsne_mean = TSNE(n_components=2, perplexity=50, n_iter=10000, verbose=1, random_state=123)
v = tsne_mean.fit_transform(means) 

df_mean = pd.DataFrame()
df_mean["x"] = v[:,0]
df_mean["y"] = v[:,1]

[t-SNE] Computing 151 nearest neighbors...
[t-SNE] Indexed 50000 samples in 0.008s...
[t-SNE] Computed neighbors for 50000 samples in 34.188s...
[t-SNE] Computed conditional probabilities for sample 1000 / 50000
[t-SNE] Computed conditional probabilities for sample 2000 / 50000
[t-SNE] Computed conditional probabilities for sample 3000 / 50000
[t-SNE] Computed conditional probabilities for sample 4000 / 50000
[t-SNE] Computed conditional probabilities for sample 5000 / 50000
[t-SNE] Computed conditional probabilities for sample 6000 / 50000
[t-SNE] Computed conditional probabilities for sample 7000 / 50000
[t-SNE] Computed conditional probabilities for sample 8000 / 50000
[t-SNE] Computed conditional probabilities for sample 9000 / 50000
[t-SNE] Computed conditional probabilities for sample 10000 / 50000
[t-SNE] Computed conditional probabilities for sample 11000 / 50000
[t-SNE] Computed conditional probabilities for sample 12000 / 50000
[t-SNE] Computed conditional probabilities for s

In [None]:
sns.kdeplot(data=df_mean, x='x', y='y', cmap='viridis', fill=True, cbar=True)

In [None]:
tsne_var = TSNE(n_components=2, perplexity=50, n_iter=10000, verbose=1, random_state=123)
v = tsne_var.fit_transform(vars) 
df_var = pd.DataFrame()
df_var["x"] = v[:,0]
df_var["y"] = v[:,1]

In [None]:
sns.kdeplot(data=df_var, x='x', y='y', cmap='viridis', fill=True, cbar=True)

In [None]:
gm_vars = GaussianMixture(n_components=2, random_state=0)
gm_vars.fit(vars)
labels = gm_vars.predict(vars)
df_var['label'] = labels
sns.kdeplot(data=df_var, x='x', y='y', cmap='viridis', fill=True, cbar=True, hue='label')