In [5]:
# ===============================================================
# Imports
# ===============================================================
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.utils import to_categorical

# ===============================================================
# Load and preprocess dataset
# ===============================================================
df = pd.read_csv("gsalc.csv", header=None)

# Gas labels (y) and sensor features (X)
X = df.iloc[:, 2:].values
y = df.iloc[:, 0].astype(str).values  # gas labels

# Encode class labels (text -> integers -> one-hot)
le = LabelEncoder()
y_int = le.fit_transform(y)
y_cat = to_categorical(y_int)  # for Keras softmax output

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y_cat, test_size=0.2, stratify=y_int, random_state=42
)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# ===============================================================
# Build and Train the Model
# ===============================================================

# Clear backend
tf.keras.backend.clear_session()

# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Create the Layers
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Tried sigmoid for the hidden layers before, relu gives better accuracy
# Tried 3 hidden layers but 2 give the same accuracy.
model = keras.Sequential([
    layers.Input(shape=(X_train.shape[1],)),  # 9000 features
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(y_cat.shape[1], activation='softmax')  # 6 output classes
])

# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Compile the Model
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
model.compile(
    optimizer=keras.optimizers.SGD(learning_rate=0.01),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Check Model Summary
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
model.summary()

# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Train the Model
# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
print('\n Beginning training... \n')
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=8,
    validation_split=0.2,
    verbose=1
)
print('\n Training complete!\n')

# ===============================================================
# Initial Evaluation
# ===============================================================
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('\nTest accuracy:', test_acc)

# ===============================================================
# Make Predictions
# ===============================================================
probabilities = model.predict(X_test)
pred_classes = np.argmax(probabilities, axis=1)
true_classes = np.argmax(y_test, axis=1)

print("\nFirst 10 predictions vs ground truth:\n")
for i in range(10):
    print(f"Pred: {le.inverse_transform([pred_classes[i]])[0]}, True: {le.inverse_transform([true_classes[i]])[0]}")



 Beginning training... 

Epoch 1/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - accuracy: 0.3383 - loss: 1.8157 - val_accuracy: 0.3333 - val_loss: 2.1973
Epoch 2/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.4873 - loss: 1.5722 - val_accuracy: 0.8000 - val_loss: 0.7541
Epoch 3/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.7567 - loss: 0.5844 - val_accuracy: 0.8667 - val_loss: 0.4117
Epoch 4/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8611 - loss: 0.3448 - val_accuracy: 0.9333 - val_loss: 0.2901
Epoch 5/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.9053 - loss: 0.3090 - val_accuracy: 0.8000 - val_loss: 0.3555
Epoch 6/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.9236 - loss: 0.2448 - val_accuracy: 0.8000 - val_loss: 0.4191
Epoch 7/50
[1m8/8[