In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, Lambda
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import backend as K

In [8]:
data = pd.read_csv("post_componator_spectra.csv")
samples = data['Sample'].values
spectra = data.drop(columns=['Device', 'Material', 'Temperature', 'Sample']).values

spectra = (spectra - spectra.mean(axis=0)) / spectra.std(axis=0)

label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(samples)

In [9]:
num_classes = len(np.unique(encoded_labels))

inputs = Input(shape=(spectra.shape[1],))
x = Dense(512, activation='relu')(inputs)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu')(x)
outputs = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [10]:
X_train, X_val, y_train, y_val = train_test_split(spectra, encoded_labels, test_size=2/11)
model.fit(X_train, y_train, 
          validation_data=(X_val, y_val),
          epochs=100,
          batch_size=32)
        #   callbacks=[EarlyStopping(patience=5)])

Epoch 1/100
[1m307/307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0265 - loss: 6.0053 - val_accuracy: 0.1810 - val_loss: 3.8309
Epoch 2/100
[1m307/307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.2720 - loss: 3.0349 - val_accuracy: 0.3621 - val_loss: 2.2069
Epoch 3/100
[1m307/307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4468 - loss: 1.8568 - val_accuracy: 0.4418 - val_loss: 1.7387
Epoch 4/100
[1m307/307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5338 - loss: 1.4922 - val_accuracy: 0.5174 - val_loss: 1.5128
Epoch 5/100
[1m307/307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5963 - loss: 1.2543 - val_accuracy: 0.5545 - val_loss: 1.3258
Epoch 6/100
[1m307/307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6474 - loss: 1.0735 - val_accuracy: 0.5747 - val_loss: 1.2467
Epoch 7/100
[1m307/30

<keras.src.callbacks.history.History at 0x7a76ce7c9f90>

In [11]:
def find_similar_classifier(model, query_spectrum, top_n=3):
    query = (query_spectrum - spectra.mean(axis=0)) / spectra.std(axis=0)
    probs = model.predict(np.array([query]))[0]
    top_indices = np.argsort(probs)[-top_n:][::-1]
    return label_encoder.inverse_transform(top_indices), probs[top_indices]

In [12]:
spec_id = 2048
my_spectrum = X_val[spec_id]
top_n = 3

finded_classes = find_similar_classifier(model, my_spectrum, top_n)

print(f"Sample <{data["Sample"].iloc[spec_id]}>({spec_id}) test for {top_n} classes:")
for i in range(top_n):
    print(f"    <{finded_classes[0][i]}> by {finded_classes[1][i]}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Sample <AISI 1010>(2048) test for 3 classes:
    <ep164> by 0.9999797344207764
    <08x15h24b4tp> by 1.880094532680232e-05
    <lrg5> by 8.505230084665527e-07
