In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
# Cell 1: Upload & auto-detect train/test filenames

from google.colab import files
import io, pandas as pd

# prompt
uploaded = files.upload()

# see exactly what got uploaded
print("Uploaded files:", list(uploaded.keys()))

# pick the ones with "train" / "test" in their name (case-insensitive)
fnames = list(uploaded.keys())
train_fname = next(f for f in fnames if 'train' in f.lower())
test_fname  = next(f for f in fnames if 'test'  in f.lower())

print("Using →", train_fname, "as TRAIN and", test_fname, "as TEST")

# read into DataFrames
train_df = pd.read_csv(io.BytesIO(uploaded[train_fname]))
test_df  = pd.read_csv(io.BytesIO(uploaded[test_fname]))

print("Train shape:", train_df.shape)
print("Test  shape:", test_df.shape)


Saving FINALtestUpdated.csv to FINALtestUpdated (7).csv
Saving FINALtrainUpdated.csv to FINALtrainUpdated (5).csv
Uploaded files: ['FINALtestUpdated (7).csv', 'FINALtrainUpdated (5).csv']
Using → FINALtrainUpdated (5).csv as TRAIN and FINALtestUpdated (7).csv as TEST
Train shape: (120000, 25)
Test  shape: (7600, 25)


In [3]:
# Cell 2: Preprocessing (with LabelEncoder)

import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# 1) Specify your true label column:
label_col = 'Class Index'

# 2) Select only numeric features (drops Title/Description text cols):
numeric_cols = train_df.select_dtypes(include='number').columns.tolist()
numeric_cols.remove(label_col)
feature_cols = numeric_cols

# 3) Pull out X and raw y:
X_train = train_df[feature_cols].values
X_test  = test_df[feature_cols].values

y_train_raw = train_df[label_col].values
y_test_raw  = test_df[label_col].values

# 4) Encode labels 1–4 → 0–3:
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train_raw)
y_test_enc  = le.transform(y_test_raw)

# 5) One-hot encode into shape (n_samples, n_classes):
num_classes = len(le.classes_)
y_train = to_categorical(y_train_enc, num_classes)
y_test  = to_categorical(y_test_enc,  num_classes)

# 6) Reshape for Conv1D: (samples, timesteps, channels)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test  = X_test.reshape((X_test.shape[0],  X_test.shape[1],  1))

print("Classes found:", le.classes_)
print("X_train shape:", X_train.shape)
print("X_test  shape:", X_test.shape)


Classes found: [1 2 3 4]
X_train shape: (120000, 21, 1)
X_test  shape: (7600, 21, 1)


In [4]:
import tensorflow as tf

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("Built with CUDA:", tf.test.is_built_with_cuda())
print("GPU supported:", tf.test.is_gpu_available())


Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


Num GPUs Available:  0
Built with CUDA: True
GPU supported: False


In [5]:
# Cell 3: Build & compile the deeper 1D-CNN (with same‐padding)

import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Prevent TensorFlow from allocating all memory at once
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("Enabled memory growth on GPU")
    except Exception as e:
        print("Could not set memory growth:", e)

from tensorflow.keras import layers, models

input_shape = X_train.shape[1:]  # (timesteps, channels)

model = models.Sequential([
    # Block 1
    layers.Conv1D(32, 3, activation='relu', padding='same', input_shape=input_shape),
    layers.BatchNormalization(),
    layers.MaxPooling1D(2),
    layers.Dropout(0.25),

    # Block 2
    layers.Conv1D(64, 3, activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPooling1D(2),
    layers.Dropout(0.25),

    # Block 3
    layers.Conv1D(128, 3, activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPooling1D(2),
    layers.Dropout(0.25),

    # Classifier head
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
# Cell 4: Train the model

from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(
    monitor='val_loss',     # or 'val_accuracy'
    patience=3,             # stop after 3 epochs of no improvement
    restore_best_weights=True
)

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=20,
    batch_size=32,
    callbacks=[early_stop]
)


Epoch 1/20
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 6ms/step - accuracy: 0.7792 - loss: 0.5862 - val_accuracy: 0.7897 - val_loss: 0.5411
Epoch 2/20
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 6ms/step - accuracy: 0.7802 - loss: 0.5822 - val_accuracy: 0.7895 - val_loss: 0.5318
Epoch 3/20
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 6ms/step - accuracy: 0.7786 - loss: 0.5823 - val_accuracy: 0.7901 - val_loss: 0.5332
Epoch 4/20
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 6ms/step - accuracy: 0.7823 - loss: 0.5788 - val_accuracy: 0.7938 - val_loss: 0.5302
Epoch 5/20
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 6ms/step - accuracy: 0.7831 - loss: 0.5780 - val_accuracy: 0.7933 - val_loss: 0.5262
Epoch 6/20
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 6ms/step - accuracy: 0.7823 - loss: 0.5787 - val_accuracy: 0.7907 - val_loss: 0.5423
Epoch 7/20

In [11]:
# Cell 5: Evaluate on the test set

test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test loss:     {test_loss:.4f}")
print(f"Test accuracy: {test_acc:.4f}")


Test loss:     0.5372
Test accuracy: 0.7911


In [12]:
# Cell X: Gather slide stats

import numpy as np
import time
from sklearn.metrics import f1_score
from tensorflow.keras.layers import Conv1D

# 1) Total trainable parameters
trainable_params = np.sum([np.prod(w.shape) for w in model.trainable_weights])
print("Total trainable parameters:", trainable_params)

# 2) Epochs until convergence (when EarlyStopping fired)
epochs_ran = len(history.history['loss'])
print("Epochs run:", epochs_ran)

# 3) Macro-averaged F1 score on the test set
y_pred = model.predict(X_test, verbose=0).argmax(axis=1)
y_true = y_test.argmax(axis=1)
macro_f1 = f1_score(y_true, y_pred, average='macro')
print("Macro F1 score:", round(macro_f1, 4))

# 4) Avg inference time per sample (ms)
start = time.time()
_ = model.predict(X_test, batch_size=32, verbose=0)
elapsed = time.time() - start
print("Avg inference time:", round(elapsed / X_test.shape[0] * 1000, 2), "ms/sample")

# 5) Number of Conv1D layers (depth)
conv_layers = sum(isinstance(l, Conv1D) for l in model.layers)
print("Conv1D layers:", conv_layers)


Total trainable parameters: 48196
Epochs run: 11
Macro F1 score: 0.789
Avg inference time: 0.09 ms/sample
Conv1D layers: 3
