In [32]:
import ramanspy as rp

# Load training and testing datasets
X_train, y_train = rp.datasets.bacteria("train", folder="../data/bacteria/")
X_test, y_test = rp.datasets.bacteria("test", folder="../data/bacteria/")

# Load the names of the species and antibiotics corresponding to the 30 classes
y_labels, antibiotics_labels = rp.datasets.bacteria("labels")

In [None]:
# !pip3 install torch torchvision torchaudio
# !pip install tensorflow



In [34]:
import numpy as np
import ramanspy as rp
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Load training and testing datasets
X_train, y_train = rp.datasets.bacteria("train", folder="../data/bacteria/")
X_test, y_test = rp.datasets.bacteria("test", folder="../data/bacteria/")

# Load the names of the species and antibiotics corresponding to the 30 classes
y_labels, antibiotics_labels = rp.datasets.bacteria("labels")

# Extract spectral data from SpectralContainer objects
X_train = X_train.spectral_data
X_test = X_test.spectral_data

# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Randomize the training data
indices = np.arange(X_train.shape[0])
np.random.shuffle(indices)
X_train = X_train[indices]
y_train = y_train[indices]

# Encode the labels to integers
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Convert labels to one-hot encoding
num_classes = 30
y_train_one_hot = to_categorical(y_train_encoded, num_classes)
y_test_one_hot = to_categorical(y_test_encoded, num_classes)

# Compute class weights
class_weights = compute_class_weight('balanced', classes=np.unique(y_train_encoded), y=y_train_encoded)
class_weights = dict(enumerate(class_weights))

# Define the neural network model with regularization and batch normalization
model = models.Sequential([
    layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],), kernel_regularizer=regularizers.l2(0.01)),
    layers.BatchNormalization(),
    layers.Dropout(0.6),
    layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    layers.BatchNormalization(),
    layers.Dropout(0.6),
    layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    layers.BatchNormalization(),
    layers.Dropout(0.6),
    layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Early stopping and learning rate scheduling
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)

# Train the model
history = model.fit(X_train, y_train_one_hot, epochs=100, batch_size=32,
                    validation_split=0.2, class_weight=class_weights,
                    callbacks=[early_stopping, reduce_lr])

# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(X_test, y_test_one_hot)
print(f"Test accuracy: {test_acc:.4f}")

# Save the model
model.save("bacteria_classifier_model.h5")

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.2331 - loss: 6.8644 - val_accuracy: 0.7595 - val_loss: 1.7226 - learning_rate: 0.0010
Epoch 2/100
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.5762 - loss: 2.1294 - val_accuracy: 0.8061 - val_loss: 1.4910 - learning_rate: 0.0010
Epoch 3/100
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.6376 - loss: 1.9873 - val_accuracy: 0.8302 - val_loss: 1.4587 - learning_rate: 0.0010
Epoch 4/100
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.6474 - loss: 1.9415 - val_accuracy: 0.8363 - val_loss: 1.4197 - learning_rate: 0.0010
Epoch 5/100
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.6607 - loss: 1.9167 - val_accuracy: 0.8291 - val_loss: 1.4011 - learning_rate: 0.0010
Epoch 6/100
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0



Test accuracy: 0.4077
