In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization


In [3]:
# Paths
base_dir = "/content/drive/MyDrive/carsounds-sm"
AUDIO_DIRS = [os.path.join(base_dir, d) for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]



In [4]:
# Constants
SAMPLE_RATE = 16000
DURATION = 1  # in seconds
SAMPLES_PER_CLIP = SAMPLE_RATE * DURATION
CLASS_NAMES = [os.path.basename(d) for d in AUDIO_DIRS]

def load_audio_data(dirs):
    X, y = [], []
    for folder in dirs:
        label = os.path.basename(folder)
        for fname in os.listdir(folder):
            if fname.endswith(".wav"):
                path = os.path.join(folder, fname)
                try:
                    audio, _ = librosa.load(path, sr=SAMPLE_RATE)
                    if len(audio) < SAMPLES_PER_CLIP:
                        audio = np.pad(audio, (0, SAMPLES_PER_CLIP - len(audio)))
                    else:
                        audio = audio[:SAMPLES_PER_CLIP]
                    X.append(audio)
                    y.append(label)
                except Exception as e:
                    print(f"Skipping {path}: {e}")
    return np.array(X), np.array(y)

print("Loading raw audio data...")
X, y = load_audio_data(AUDIO_DIRS)
print(f"Loaded {len(X)} samples.")


Loading raw audio data...
Loaded 401 samples.


In [5]:
# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_cat = to_categorical(y_encoded)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, random_state=42)

# Add channel dim: [samples, time] -> [samples, time, 1]
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]


In [6]:
# Model
model = Sequential([
    Conv1D(16, kernel_size=9, activation='relu', input_shape=(SAMPLES_PER_CLIP, 1)),
    BatchNormalization(),
    MaxPooling1D(pool_size=4),

    Conv1D(32, kernel_size=9, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=4),

    Conv1D(64, kernel_size=9, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=4),

    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(len(CLASS_NAMES), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

print("Training raw audio classifier...")
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_split=0.2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training raw audio classifier...
Epoch 1/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 198ms/step - accuracy: 0.2276 - loss: 7.2558 - val_accuracy: 0.2812 - val_loss: 5.0378
Epoch 2/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 183ms/step - accuracy: 0.2587 - loss: 1.3794 - val_accuracy: 0.2812 - val_loss: 9.1966
Epoch 3/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 204ms/step - accuracy: 0.2330 - loss: 1.3784 - val_accuracy: 0.2812 - val_loss: 13.1523
Epoch 4/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 188ms/step - accuracy: 0.3046 - loss: 1.3242 - val_accuracy: 0.2812 - val_loss: 17.2889
Epoch 5/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 177ms/step - accuracy: 0.2986 - loss: 1.2939 - val_accuracy: 0.2812 - val_loss: 20.4000
Epoch 6/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 219ms/step - accuracy: 0.4170 - loss: 1.1823 - val_accuracy: 0.2812 - val_loss:

In [7]:
# Evaluate
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {acc:.4f}")

# Save as .keras and convert to TFLite
model.save("raw_audio_classifier.keras")

print(" Converting to TFLite...")
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open("raw_audio_model.tflite", "wb") as f:
    f.write(tflite_model)

# Save class names
with open("labels.txt", "w") as f:
    for name in le.classes_:
        f.write(f"{name}\n")

print(" Model and labels saved!")

Test Accuracy: 0.2716
 Converting to TFLite...
Saved artifact at '/tmp/tmptk6rio4i'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 16000, 1), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 4), dtype=tf.float32, name=None)
Captures:
  138774134627920: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138774134620240: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138774134632144: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138774134627152: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138774134624848: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138774134631184: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138774134631760: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138774134633296: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138774134633680: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138774134631952: TensorSpec(shape=(), dt