## Malayalam

In [1]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv1D, MaxPooling1D, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay

# Setting parameters
test_dim = 431  # Dimension to pad/truncate MFCC features
batch_size = 24  # Batch size for training
nb_filter = 16  # Number of filters in Conv1D layers
filter_length_1 = 50  # Length of the first filter
filter_length_2 = 25  # Length of the second filter
nb_epoch = 50  # Number of epochs for training
hidden_dims = 64  # Number of hidden units in Dense layer
n_mfcc = 39  # Number of MFCC coefficients
hop_length = 512  # Hop length for MFCC extraction
n_fft = 480  # FFT window size

# Function to load audio files and extract MFCC features
def load_data(data_dir):
    X = []
    y = []
    class_labels = {}
    class_index = 0

    for subdir, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(subdir, file)
                label = os.path.basename(subdir)
                if label not in class_labels:
                    class_labels[label] = class_index
                    class_index += 1
                
                # Load audio file and extract MFCC features
                audio, sr = librosa.load(file_path, sr=None)
                mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc, hop_length=hop_length, n_fft=n_fft)
                mfcc = (mfcc - np.mean(mfcc)) / np.std(mfcc)  # Normalize MFCC features
                if mfcc.shape[1] < test_dim:
                    mfcc = np.pad(mfcc, ((0, 0), (0, test_dim - mfcc.shape[1])), mode='constant')
                else:
                    mfcc = mfcc[:, :test_dim]
                X.append(mfcc.T)
                y.append(class_labels[label])
    
    return np.array(X), np.array(y), class_labels

# Load data
data_dir = '/dist_home/jairam/Malayalam/'
print('Loading data...')
X, y, class_labels = load_data(data_dir)

# Ensure X has the correct shape for Conv1D input (batch_size, steps, input_dim)
X = np.expand_dims(X, -1)  # Add a new axis to match (batch_size, steps, input_dim, 1)

# Shuffle the data before splitting
indices = np.arange(X.shape[0])
np.random.shuffle(indices)
X = X[indices]
y = y[indices]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

nb_classes = len(class_labels)  # Update number of classes based on data
Y_train = to_categorical(y_train, nb_classes)
Y_test = to_categorical(y_test, nb_classes)

# Define the model with additional regularization and learning rate scheduling
model = Sequential()

# Add convolutional layers with BatchNormalization and Dropout
model.add(Conv1D(filters=nb_filter,
                 kernel_size=filter_length_1,
                 input_shape=(test_dim, n_mfcc),  # Adjusted input shape for Conv1D
                 activation='relu',
                 kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Conv1D(filters=nb_filter,
                 kernel_size=filter_length_2,
                 padding='same',
                 activation='relu',
                 kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.6))

model.add(Conv1D(filters=nb_filter,
                 kernel_size=filter_length_2,
                 padding='same',
                 activation='relu',
                 kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.6))

# Flatten the output of the conv layer
model.add(Flatten())

# Add dropout for regularization
model.add(Dropout(0.6))

# Add output layer with softmax activation for multiclass classification
model.add(Dense(nb_classes, activation='softmax'))

# Compile the model with a learning rate schedule
initial_learning_rate = 0.0005
lr_schedule = ExponentialDecay(
    initial_learning_rate,
    decay_steps=10000,
    decay_rate=0.96,
    staircase=True)

optimizer = Adam(learning_rate=lr_schedule)

model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

model.summary()

# Train the model with early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)

history = model.fit(X_train, Y_train, epochs=50, batch_size=batch_size, validation_data=(X_test, Y_test), callbacks=[early_stopping])

# Evaluate the model
score = model.evaluate(X_test, Y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# Generate classification report
y_preds = model.predict(X_test)
y_pred_classes = np.argmax(y_preds, axis=1)
y_true_classes = np.argmax(Y_test, axis=1)
print(classification_report(y_true_classes, y_pred_classes, target_names=list(class_labels.keys())))


2025-03-29 18:10:05.005417: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-29 18:10:05.017116: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-29 18:10:05.029778: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-29 18:10:05.033570: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-29 18:10:05.044464: I tensorflow/core/platform/cpu_feature_guar

Loading data...
6455 train sequences
1614 test sequences


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-03-29 18:10:53.214427: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 79078 MB memory:  -> device: 0, name: NVIDIA A100 80GB PCIe, pci bus id: 0000:41:00.0, compute capability: 8.0


Epoch 1/50


I0000 00:00:1743252057.168880 1318606 service.cc:146] XLA service 0x778bc40055e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1743252057.168920 1318606 service.cc:154]   StreamExecutor device (0): NVIDIA A100 80GB PCIe, Compute Capability 8.0
2025-03-29 18:10:57.235483: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-03-29 18:10:57.424650: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907


[1m 63/269[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 2ms/step - accuracy: 0.3430 - loss: 4.4072

I0000 00:00:1743252060.503959 1318606 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 20ms/step - accuracy: 0.4194 - loss: 3.6060 - val_accuracy: 0.5496 - val_loss: 1.6871
Epoch 2/50
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5185 - loss: 2.2102 - val_accuracy: 0.5700 - val_loss: 1.6998
Epoch 3/50
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5525 - loss: 1.6399 - val_accuracy: 0.5081 - val_loss: 1.7818
Epoch 4/50
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6343 - loss: 1.3202 - val_accuracy: 0.7107 - val_loss: 1.1094
Epoch 5/50
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6900 - loss: 1.1585 - val_accuracy: 0.6939 - val_loss: 1.1787
Epoch 6/50
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7151 - loss: 1.0579 - val_accuracy: 0.6388 - val_loss: 1.0931
Epoch 7/50
[1m269/269[0m [32m━━━━━

## 1D-CNN for Tamil

In [3]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv1D, MaxPooling1D, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay

# Setting parameters
test_dim = 431  # Dimension to pad/truncate MFCC features
batch_size = 24  # Batch size for training
nb_filter = 16  # Number of filters in Conv1D layers
filter_length_1 = 50  # Length of the first filter
filter_length_2 = 25  # Length of the second filter
nb_epoch = 50  # Number of epochs for training
hidden_dims = 64  # Number of hidden units in Dense layer
n_mfcc = 39  # Number of MFCC coefficients
hop_length = 512  # Hop length for MFCC extraction
n_fft = 480  # FFT window size

# Function to load audio files and extract MFCC features
def load_data(data_dir):
    X = []
    y = []
    class_labels = {}
    class_index = 0

    for subdir, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(subdir, file)
                label = os.path.basename(subdir)
                if label not in class_labels:
                    class_labels[label] = class_index
                    class_index += 1
                
                # Load audio file and extract MFCC features
                audio, sr = librosa.load(file_path, sr=None)
                mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc, hop_length=hop_length, n_fft=n_fft)
                mfcc = (mfcc - np.mean(mfcc)) / np.std(mfcc)  # Normalize MFCC features
                if mfcc.shape[1] < test_dim:
                    mfcc = np.pad(mfcc, ((0, 0), (0, test_dim - mfcc.shape[1])), mode='constant')
                else:
                    mfcc = mfcc[:, :test_dim]
                X.append(mfcc.T)
                y.append(class_labels[label])
    
    return np.array(X), np.array(y), class_labels

# Load data
data_dir = '/dist_home/jairam/Tamil/'
print('Loading data...')
X, y, class_labels = load_data(data_dir)

# Ensure X has the correct shape for Conv1D input (batch_size, steps, input_dim)
X = np.expand_dims(X, -1)  # Add a new axis to match (batch_size, steps, input_dim, 1)

# Shuffle the data before splitting
indices = np.arange(X.shape[0])
np.random.shuffle(indices)
X = X[indices]
y = y[indices]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

nb_classes = len(class_labels)  # Update number of classes based on data
Y_train = to_categorical(y_train, nb_classes)
Y_test = to_categorical(y_test, nb_classes)

# Define the model with additional regularization and learning rate scheduling
model = Sequential()

# Add convolutional layers with BatchNormalization and Dropout
model.add(Conv1D(filters=nb_filter,
                 kernel_size=filter_length_1,
                 input_shape=(test_dim, n_mfcc),  # Adjusted input shape for Conv1D
                 activation='relu',
                 kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Conv1D(filters=nb_filter,
                 kernel_size=filter_length_2,
                 padding='same',
                 activation='relu',
                 kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.6))

model.add(Conv1D(filters=nb_filter,
                 kernel_size=filter_length_2,
                 padding='same',
                 activation='relu',
                 kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.6))

# Flatten the output of the conv layer
model.add(Flatten())

# Add dropout for regularization
model.add(Dropout(0.6))

# Add output layer with softmax activation for multiclass classification
model.add(Dense(nb_classes, activation='softmax'))

# Compile the model with a learning rate schedule
initial_learning_rate = 0.0005
lr_schedule = ExponentialDecay(
    initial_learning_rate,
    decay_steps=10000,
    decay_rate=0.96,
    staircase=True)

optimizer = Adam(learning_rate=lr_schedule)

model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

model.summary()

# Train the model with early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)

history = model.fit(X_train, Y_train, epochs=50, batch_size=batch_size, validation_data=(X_test, Y_test), callbacks=[early_stopping])

# Evaluate the model
score = model.evaluate(X_test, Y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# Generate classification report
y_preds = model.predict(X_test)
y_pred_classes = np.argmax(y_preds, axis=1)
y_true_classes = np.argmax(Y_test, axis=1)
print(classification_report(y_true_classes, y_pred_classes, target_names=list(class_labels.keys())))


Loading data...
5948 train sequences
1487 test sequences


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 23ms/step - accuracy: 0.4515 - loss: 3.1356 - val_accuracy: 0.7337 - val_loss: 1.0693
Epoch 2/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6804 - loss: 1.6622 - val_accuracy: 0.5931 - val_loss: 1.6879
Epoch 3/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7430 - loss: 1.2636 - val_accuracy: 0.6785 - val_loss: 1.1636
Epoch 4/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7620 - loss: 1.0732 - val_accuracy: 0.7068 - val_loss: 1.3377
Epoch 5/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7821 - loss: 0.9431 - val_accuracy: 0.6295 - val_loss: 1.3549
Epoch 6/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8051 - loss: 0.8511 - val_accuracy: 0.7754 - val_loss: 0.7857
Epoch 7/50
[1m248/248[0m

## Kannada

In [5]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv1D, MaxPooling1D, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay

# Setting parameters
test_dim = 431  # Dimension to pad/truncate MFCC features
batch_size = 24  # Batch size for training
nb_filter = 16  # Number of filters in Conv1D layers
filter_length_1 = 50  # Length of the first filter
filter_length_2 = 25  # Length of the second filter
nb_epoch = 50  # Number of epochs for training
hidden_dims = 64  # Number of hidden units in Dense layer
n_mfcc = 39  # Number of MFCC coefficients
hop_length = 512  # Hop length for MFCC extraction
n_fft = 480  # FFT window size

# Function to load audio files and extract MFCC features
def load_data(data_dir):
    X = []
    y = []
    class_labels = {}
    class_index = 0

    for subdir, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(subdir, file)
                label = os.path.basename(subdir)
                if label not in class_labels:
                    class_labels[label] = class_index
                    class_index += 1
                
                # Load audio file and extract MFCC features
                audio, sr = librosa.load(file_path, sr=None)
                mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc, hop_length=hop_length, n_fft=n_fft)
                mfcc = (mfcc - np.mean(mfcc)) / np.std(mfcc)  # Normalize MFCC features
                if mfcc.shape[1] < test_dim:
                    mfcc = np.pad(mfcc, ((0, 0), (0, test_dim - mfcc.shape[1])), mode='constant')
                else:
                    mfcc = mfcc[:, :test_dim]
                X.append(mfcc.T)
                y.append(class_labels[label])
    
    return np.array(X), np.array(y), class_labels

# Load data
data_dir = '/dist_home/jairam/Kannada/'
print('Loading data...')
X, y, class_labels = load_data(data_dir)

# Ensure X has the correct shape for Conv1D input (batch_size, steps, input_dim)
X = np.expand_dims(X, -1)  # Add a new axis to match (batch_size, steps, input_dim, 1)

# Shuffle the data before splitting
indices = np.arange(X.shape[0])
np.random.shuffle(indices)
X = X[indices]
y = y[indices]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

nb_classes = len(class_labels)  # Update number of classes based on data
Y_train = to_categorical(y_train, nb_classes)
Y_test = to_categorical(y_test, nb_classes)

# Define the model with additional regularization and learning rate scheduling
model = Sequential()

# Add convolutional layers with BatchNormalization and Dropout
model.add(Conv1D(filters=nb_filter,
                 kernel_size=filter_length_1,
                 input_shape=(test_dim, n_mfcc),  # Adjusted input shape for Conv1D
                 activation='relu',
                 kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Conv1D(filters=nb_filter,
                 kernel_size=filter_length_2,
                 padding='same',
                 activation='relu',
                 kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.6))

model.add(Conv1D(filters=nb_filter,
                 kernel_size=filter_length_2,
                 padding='same',
                 activation='relu',
                 kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.6))

# Flatten the output of the conv layer
model.add(Flatten())

# Add dropout for regularization
model.add(Dropout(0.6))

# Add output layer with softmax activation for multiclass classification
model.add(Dense(nb_classes, activation='softmax'))

# Compile the model with a learning rate schedule
initial_learning_rate = 0.0005
lr_schedule = ExponentialDecay(
    initial_learning_rate,
    decay_steps=10000,
    decay_rate=0.96,
    staircase=True)

optimizer = Adam(learning_rate=lr_schedule)

model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

model.summary()

# Train the model with early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)

history = model.fit(X_train, Y_train, epochs=50, batch_size=batch_size, validation_data=(X_test, Y_test), callbacks=[early_stopping])

# Evaluate the model
score = model.evaluate(X_test, Y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# Generate classification report
y_preds = model.predict(X_test)
y_pred_classes = np.argmax(y_preds, axis=1)
y_true_classes = np.argmax(Y_test, axis=1)
print(classification_report(y_true_classes, y_pred_classes, target_names=list(class_labels.keys())))


Loading data...
6456 train sequences
1615 test sequences


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.2466 - loss: 4.5016 - val_accuracy: 0.3152 - val_loss: 2.0169
Epoch 2/50
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.3924 - loss: 2.3598 - val_accuracy: 0.5858 - val_loss: 1.4249
Epoch 3/50
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4961 - loss: 1.6922 - val_accuracy: 0.5975 - val_loss: 1.3461
Epoch 4/50
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5628 - loss: 1.4869 - val_accuracy: 0.5895 - val_loss: 1.3440
Epoch 5/50
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6559 - loss: 1.2932 - val_accuracy: 0.6105 - val_loss: 1.3416
Epoch 6/50
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6753 - loss: 1.2237 - val_accuracy: 0.5492 - val_loss: 1.6433
Epoch 7/50
[1m269/269[0m 