In [1]:
import os, glob
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

def load_and_split_sensor_data(
    csv_path: str,
    test_size: float = 0.1,
    val_size: float = 0.2,
    random_state: int = 42
):
    """
    Load sensor CSV, preprocess (drop columns, encode labels, scale), and split into train/val/test.

    Args:
        csv_path: Path to the Gas Sensors Measurements CSV file.
        test_size: Fraction of data reserved for final test set.
        val_size: Fraction of remaining data reserved for validation.
        random_state: Random seed for reproducibility.

    Returns:
        train_ds: tf.data.Dataset for training (features, labels).
        val_ds: tf.data.Dataset for validation.
        test_ds: tf.data.Dataset for testing.
    """
    # 1) Load and clean
    df = pd.read_csv(csv_path)
    # drop unused columns
    df = df.drop(columns=["Serial Number", "Corresponding Image Name"], errors='ignore')
    # encode labels
    df['Gas'] = df['Gas'].astype('category').cat.codes

    # 2) Extract features and labels
    feature_cols = [c for c in df.columns if c != 'Gas']
    X = df[feature_cols].values.astype('float32')
    y = df['Gas'].values.astype('int32')

    # 3) Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 4) Split into train+val and test
    X_temp, X_test, y_temp, y_test = train_test_split(
        X_scaled, y, test_size=test_size, stratify=y, random_state=random_state
    )
    # Split train_temp into train and val
    val_fraction = val_size / (1 - test_size)
    X_train, X_val, y_train, y_val = train_test_split(
        X_temp, y_temp, test_size=val_fraction,
        stratify=y_temp, random_state=random_state
    )

    # 5) Build tf.data datasets
    def make_ds(features, labels, batch_size=32, shuffle=False):
        ds = tf.data.Dataset.from_tensor_slices((features, labels))
        if shuffle:
            ds = ds.shuffle(buffer_size=len(features), seed=random_state)
        return ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)

    batch_size = 32
    train_ds = make_ds(X_train, y_train, batch_size, shuffle=True)
    val_ds   = make_ds(X_val,   y_val,   batch_size, shuffle=False)
    test_ds  = make_ds(X_test,  y_test,  batch_size, shuffle=False)

    return train_ds, val_ds, test_ds


2025-05-05 00:04:42.336633: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746403482.658573      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746403482.773912      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
train_ds, val_ds, test_ds = load_and_split_sensor_data("/kaggle/input/gas-dataset/zkwgkjkjn9-2/Gas Sensors Measurements/Gas_Sensors_Measurements.csv")

2025-05-05 00:04:59.711109: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [3]:
class RandomSensorDropout(tf.keras.layers.Layer):
    """
    Layer that randomly zeros individual sensor channels with a given rate during training.
    Supports proper serialization.
    """
    def __init__(self, rate=0.3, **kwargs):
        super().__init__(**kwargs)
        self.rate = rate

    def call(self, inputs, training=False):
        if training and self.rate > 0.0:
            mask = tf.cast(tf.random.uniform(tf.shape(inputs)) > self.rate, inputs.dtype)
            return inputs * mask
        return inputs

    def get_config(self):
        config = super().get_config()
        config.update({"rate": self.rate})
        return config
        
def build_sensor_model(
    input_dim=7,
    sensor_dropout_rate=0.3,
    layer_dropout = 0.5,
    hidden_units=(64, 32),
    output_units=4,
    lr=1e-4
):
    """
    Sensor-only MLP with RandomSensorDropout option for ablation.
    """
    inp = tf.keras.Input(shape=(input_dim,), name="sensor_input")
    x = RandomSensorDropout(sensor_dropout_rate, name="sensor_dropout")(inp)
    x = tf.keras.layers.Dense(hidden_units[0], activation="relu")(x)
    x = tf.keras.layers.Dropout(layer_dropout)(x)
    x = tf.keras.layers.Dense(hidden_units[1], activation="relu")(x)
    x = tf.keras.layers.Dropout(layer_dropout)(x)
    out = tf.keras.layers.Dense(output_units, activation="softmax", name="output")(x)

    model = tf.keras.Model(inp, out, name="sensor_only")
    model.compile(
        optimizer=tf.keras.optimizers.Adam(lr),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model

In [7]:
model = build_sensor_model()
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'best_sensor_model.keras', monitor='val_accuracy',
    save_best_only=True, mode='max', verbose=1
)
history = model.fit(train_ds,
                    validation_data=val_ds,
                    epochs=100,
                    batch_size=32, callbacks = [checkpoint])

Epoch 1/100
[1m114/140[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 2ms/step - accuracy: 0.2760 - loss: 1.5917
Epoch 1: val_accuracy improved from -inf to 0.48359, saving model to best_sensor_model.keras
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.2775 - loss: 1.5854 - val_accuracy: 0.4836 - val_loss: 1.4004
Epoch 2/100
[1m129/140[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 2ms/step - accuracy: 0.3309 - loss: 1.4363
Epoch 2: val_accuracy improved from 0.48359 to 0.50156, saving model to best_sensor_model.keras
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3316 - loss: 1.4344 - val_accuracy: 0.5016 - val_loss: 1.2210
Epoch 3/100
[1m125/140[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 2ms/step - accuracy: 0.3967 - loss: 1.3206
Epoch 3: val_accuracy improved from 0.50156 to 0.59062, saving model to best_sensor_model.keras
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [9]:
model = tf.keras.models.load_model('best_sensor_model.keras',custom_objects={"RandomSensorDropout": RandomSensorDropout})
test_loss, test_accuracy = model.evaluate(test_ds, verbose=0)
print("test accuracy is ", test_accuracy)

test accuracy is  0.871874988079071
