## Malayalam

In [15]:
import pickle

# Replace 'your_file.pkl' with the path to your pickle file
with open('Malayalam-eGeMAPS-Features/features_batch_1.pkl', 'rb') as file:
    data = pickle.load(file)

# Display the content of the loaded data
print(type(data))  # Check the type of the data
print(data)  # Print the actual content


<class 'dict'>
{'/home/ubuntu/Jairam/DialectClassification/Comparison-Study/Malayalam/Kottayam/kottayam_1083.wav': {'label': 'Kottayam', 'egemaps_features': array([ 4.85885773e+01,  1.43769413e-01,  4.03474541e+01,  5.04012146e+01,
        5.45559845e+01,  1.42085304e+01,  1.92815948e+02,  1.24873405e+02,
        1.41092148e+02,  1.28403793e+02,  4.75490540e-01,  7.13055789e-01,
        2.07283482e-01,  3.38676095e-01,  7.52748191e-01,  5.45464694e-01,
        9.04355907e+00,  5.19115543e+00,  7.81002092e+00,  5.29999781e+00,
        1.78301558e-01,  1.18629622e+00, -3.24554825e+01, -3.29270899e-01,
       -5.69578476e+01, -1.75663516e-01, -1.88619938e+01, -6.66414440e-01,
        2.49369907e+00,  3.85421133e+00,  7.79156759e-02,  1.12973464e+00,
        2.41948652e+00,  5.92031002e-01, -3.07648039e+00, -1.68421197e+00,
       -8.87751865e+00, -1.85675550e+00, -1.24536428e+01, -1.66140330e+00,
        1.29558325e+03,  5.30387014e-02,  7.28718933e+02,  1.71551481e-01,
        1.43788586

In [3]:
import os
import pickle
import numpy as np
import torch
import opensmile
from sklearn.preprocessing import LabelEncoder

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize OpenSMILE for eGeMAPS feature extraction
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.Functionals,
)

# Function to clear GPU memory
def clear_gpu_memory():
    torch.cuda.empty_cache()
    torch.cuda.synchronize()

# Function to extract eGeMAPS features
def extract_egemaps_features(audio_file):
    # Extract eGeMAPS features using OpenSMILE
    features = smile.process_file(audio_file)
    features_np = features.values.flatten()  # Convert to a numpy array
    print(f"eGeMAPS features shape: {features_np.shape}")  # Debugging statement
    return features_np

# Function to process a single audio file
def process_audio_file(audio_file):
    egemaps_features = extract_egemaps_features(audio_file)
    clear_gpu_memory()  # Clear GPU memory after each file
    return audio_file, egemaps_features

# Function to process audio files in parallel and batch-wise
def process_audio_files_parallel(main_folder, batch_size=50):
    audio_files = []
    for class_label in os.listdir(main_folder):
        class_path = os.path.join(main_folder, class_label)
        if os.path.isdir(class_path):
            for audio_file in os.listdir(class_path):
                if audio_file.endswith('.wav'):
                    audio_files.append(os.path.join(class_path, audio_file))
    
    processed_batches = set()
    if os.path.exists('Malayalam-eGeMAPS-Features/processed_batches.pkl'):
        with open('Malayalam-eGeMAPS-Features/processed_batches.pkl', 'rb') as f:
            processed_batches = pickle.load(f)
        print(f"Resuming from batch {len(processed_batches) + 1}")
    else:
        print("Starting from the beginning")
        os.makedirs('Malayalam-eGeMAPS-Features', exist_ok=True)  # Create the directory if it doesn't exist

    num_batches = len(audio_files) // batch_size + 1
    for i in range(num_batches):
        if i in processed_batches:
            continue
        
        clear_gpu_memory()  # Clear GPU memory before each batch

        batch_files = audio_files[i*batch_size:(i+1)*batch_size]
        features_dict = {}
        for audio_file in batch_files:
            audio_path, egemaps_features = process_audio_file(audio_file)
            class_label = os.path.basename(os.path.dirname(audio_path))
            features_dict[audio_path] = {
                'label': class_label,
                'egemaps_features': egemaps_features
            }
        
        # Save intermediate results
        with open(f'Malayalam-eGeMAPS-Features/features_batch_{i}.pkl', 'wb') as f:
            pickle.dump(features_dict, f)
        
        processed_batches.add(i)
        with open('Malayalam-eGeMAPS-Features/processed_batches.pkl', 'wb') as f:
            pickle.dump(processed_batches, f)
        
        print(f"Processed batch {i+1}/{num_batches}")

        # Clear memory after each batch
        clear_gpu_memory()
    
    return num_batches

# Load and concatenate results
def load_and_concatenate_batches(num_batches):
    features_dict = {}
    for i in range(num_batches):
        try:
            with open(f'Malayalam-eGeMAPS-Features/features_batch_{i}.pkl', 'rb') as f:
                batch_features = pickle.load(f)
                features_dict.update(batch_features)
        except FileNotFoundError:
            print(f"Warning: Malayalam-eGeMAPS-Features/features_batch_{i}.pkl not found. Skipping this batch.")
    return features_dict

# Pooling function
def pooling_function(features_dict, pool_type='mean'):
    pooled_features_dict = {}
    for audio_path, data in features_dict.items():
        egemaps_features = data['egemaps_features']
        
        if pool_type == 'mean':
            pooled_features = np.mean(egemaps_features, axis=0)
        elif pool_type == 'max':
            pooled_features = np.max(egemaps_features, axis=0)
        elif pool_type == 'min':
            pooled_features = np.min(egemaps_features, axis=0)
        else:
            raise ValueError("Invalid pool_type. Choose 'mean', 'max', or 'min'.")
        
        pooled_features_dict[audio_path] = {
            'label': data['label'],
            'pooled_features': pooled_features
        }
    
    return pooled_features_dict

# Prepare dataset
# def prepare_dataset(features_dict):
#     X = []
#     y = []
#     label_encoder = LabelEncoder()
    
#     for audio_path, features in features_dict.items():
#         egemaps_features = features['pooled_features']
#         label = features['label']
        
#         X.append(egemaps_features)
#         y.append(label)
    
#     X = np.array(X)
#     y = np.array(y)
#     y = label_encoder.fit_transform(y)
    
#     num_classes = len(label_encoder.classes_)
    
#     print(f"Features shape: {X.shape}, Labels shape: {y.shape}, Number of classes: {num_classes}")
    
#     return X, y, num_classes

# Example U8sage:
# Process and save features in batches
num_batches = process_audio_files_parallel(main_folder='/dist_home/jairam/Malayalam/', batch_size=100)

# Load and concatenate all features
features_egemaps = load_and_concatenate_batches(num_batches)

# Apply pooling to the features
pooled_features = pooling_function(features_egemaps, pool_type='mean')

# Prepare the dataset for training
# X, y, num_classes = prepare_dataset(pooled_features)

Starting from the beginning
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS feat

In [4]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Step 1: Function to Load Pickle Files from a Directory
def load_data_from_directory(directory):
    X = []
    y = []
    
    # Iterate through all files in the directory and its subdirectories
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.pkl'):
                file_path = os.path.join(root, file)
                with open(file_path, 'rb') as f:
                    data = pickle.load(f)
                    
                    # Check if the data is a dictionary
                    if isinstance(data, dict):
                        for key, value in data.items():
                            if 'egemaps_features' in value and 'label' in value:
                                X.append(value['egemaps_features'])
                                y.append(value['label'])
                            else:
                                print(f"Unexpected structure in file: {file_path}")
                    else:
                        print(f"Unexpected data type in file: {file_path}")

    return np.array(X), np.array(y)

# Replace with your actual directory containing pickle files
directory = 'Malayalam-eGeMAPS-Features/'

X, y = load_data_from_directory(directory)

# Step 2: Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
num_classes = len(label_encoder.classes_)

# Step 3: Split data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y_encoded, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Step 4: Create DataLoaders
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))

train_loader = DataLoader(train_dataset, batch_size=100, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=100, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)

# Define your DNN model
class AudioClassifier(nn.Module):
    def __init__(self, num_classes):
        super(AudioClassifier, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 32)
        self.fc5 = nn.Linear(32, num_classes)
        self.dropout = nn.Dropout(0.1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.relu(self.fc4(x))
        x = self.fc5(x)
        return x

# Step 5: Define the training function
def train_model(model, train_loader, val_loader, num_epochs=50, learning_rate=1e-4):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * batch_X.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)

        # Evaluate on validation data
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for val_X, val_y in val_loader:
                outputs = model(val_X)
                _, predicted = torch.max(outputs, 1)
                total += val_y.size(0)
                correct += (predicted == val_y).sum().item()
        val_accuracy = correct / total

        # Print training logs
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

    return model

# Step 6: Initialize and Train the Model
model = AudioClassifier(num_classes)
model = train_model(model, train_loader, val_loader, num_epochs=50, learning_rate=1e-4)

# Step 7: Save the Trained Model
torch.save(model.state_dict(), 'eGeMAPS_Malayalam_classifier_50.pth')


Unexpected data type in file: Malayalam-eGeMAPS-Features/processed_batches.pkl
Epoch 1/50, Loss: 3.3761, Validation Accuracy: 0.3545
Epoch 2/50, Loss: 1.5518, Validation Accuracy: 0.5421
Epoch 3/50, Loss: 1.3612, Validation Accuracy: 0.5438
Epoch 4/50, Loss: 1.2246, Validation Accuracy: 0.5339
Epoch 5/50, Loss: 1.1559, Validation Accuracy: 0.5702
Epoch 6/50, Loss: 1.0996, Validation Accuracy: 0.5719
Epoch 7/50, Loss: 1.0172, Validation Accuracy: 0.5719
Epoch 8/50, Loss: 0.9800, Validation Accuracy: 0.5694
Epoch 9/50, Loss: 0.9530, Validation Accuracy: 0.5736
Epoch 10/50, Loss: 0.9329, Validation Accuracy: 0.5785
Epoch 11/50, Loss: 0.9336, Validation Accuracy: 0.5818
Epoch 12/50, Loss: 0.9250, Validation Accuracy: 0.5826
Epoch 13/50, Loss: 0.9070, Validation Accuracy: 0.5860
Epoch 14/50, Loss: 0.9170, Validation Accuracy: 0.5917
Epoch 15/50, Loss: 0.9047, Validation Accuracy: 0.5785
Epoch 16/50, Loss: 0.8723, Validation Accuracy: 0.5843
Epoch 17/50, Loss: 0.8822, Validation Accuracy: 0.

In [6]:
from sklearn.metrics import classification_report

# Evaluate the model
model.eval()
all_labels = []
all_preds = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        outputs = model(batch_X)
        _, predicted = torch.max(outputs, 1)
        
        all_labels.extend(batch_y.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())

# Calculate the accuracy
correct = sum(p == l for p, l in zip(all_preds, all_labels))
total = len(all_labels)
test_accuracy = correct / total
print(f"Test Accuracy: {test_accuracy:.4f}")

# Generate and print the classification report
report = classification_report(all_labels, all_preds)
print("Classification Report:")
print(report)

Test Accuracy: 0.6251
Classification Report:
              precision    recall  f1-score   support

           0       0.51      0.77      0.62       279
           1       0.72      0.99      0.83       379
           2       0.62      0.50      0.55       323
           3       0.50      0.01      0.03       230

    accuracy                           0.63      1211
   macro avg       0.59      0.57      0.51      1211
weighted avg       0.60      0.63      0.56      1211



## Kannada

In [2]:
import os
import pickle
import numpy as np
import torch
import opensmile
from sklearn.preprocessing import LabelEncoder

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize OpenSMILE for eGeMAPS feature extraction
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.Functionals,
)

# Function to clear GPU memory
def clear_gpu_memory():
    torch.cuda.empty_cache()
    torch.cuda.synchronize()

# Function to extract eGeMAPS features
def extract_egemaps_features(audio_file):
    # Extract eGeMAPS features using OpenSMILE
    features = smile.process_file(audio_file)
    features_np = features.values.flatten()  # Convert to a numpy array
    print(f"eGeMAPS features shape: {features_np.shape}")  # Debugging statement
    return features_np

# Function to process a single audio file
def process_audio_file(audio_file):
    egemaps_features = extract_egemaps_features(audio_file)
    clear_gpu_memory()  # Clear GPU memory after each file
    return audio_file, egemaps_features

# Function to process audio files in parallel and batch-wise
def process_audio_files_parallel(main_folder, batch_size=50):
    audio_files = []
    for class_label in os.listdir(main_folder):
        class_path = os.path.join(main_folder, class_label)
        if os.path.isdir(class_path):
            for audio_file in os.listdir(class_path):
                if audio_file.endswith('.wav'):
                    audio_files.append(os.path.join(class_path, audio_file))
    
    processed_batches = set()
    if os.path.exists('Kannada-eGeMAPS-Features/processed_batches.pkl'):
        with open('Kannada-eGeMAPS-Features/processed_batches.pkl', 'rb') as f:
            processed_batches = pickle.load(f)
        print(f"Resuming from batch {len(processed_batches) + 1}")
    else:
        print("Starting from the beginning")
        os.makedirs('Kannada-eGeMAPS-Features', exist_ok=True)  # Create the directory if it doesn't exist

    num_batches = len(audio_files) // batch_size + 1
    for i in range(num_batches):
        if i in processed_batches:
            continue
        
        clear_gpu_memory()  # Clear GPU memory before each batch

        batch_files = audio_files[i*batch_size:(i+1)*batch_size]
        features_dict = {}
        for audio_file in batch_files:
            audio_path, egemaps_features = process_audio_file(audio_file)
            class_label = os.path.basename(os.path.dirname(audio_path))
            features_dict[audio_path] = {
                'label': class_label,
                'egemaps_features': egemaps_features
            }
        
        # Save intermediate results
        with open(f'Kannada-eGeMAPS-Features/features_batch_{i}.pkl', 'wb') as f:
            pickle.dump(features_dict, f)
        
        processed_batches.add(i)
        with open('Kannada-eGeMAPS-Features/processed_batches.pkl', 'wb') as f:
            pickle.dump(processed_batches, f)
        
        print(f"Processed batch {i+1}/{num_batches}")

        # Clear memory after each batch
        clear_gpu_memory()
    
    return num_batches

# Load and concatenate results
def load_and_concatenate_batches(num_batches):
    features_dict = {}
    for i in range(num_batches):
        try:
            with open(f'Kannada-eGeMAPS-Features/features_batch_{i}.pkl', 'rb') as f:
                batch_features = pickle.load(f)
                features_dict.update(batch_features)
        except FileNotFoundError:
            print(f"Warning: Kannada-eGeMAPS-Features/features_batch_{i}.pkl not found. Skipping this batch.")
    return features_dict

# Pooling function
def pooling_function(features_dict, pool_type='mean'):
    pooled_features_dict = {}
    for audio_path, data in features_dict.items():
        egemaps_features = data['egemaps_features']
        
        if pool_type == 'mean':
            pooled_features = np.mean(egemaps_features, axis=0)
        elif pool_type == 'max':
            pooled_features = np.max(egemaps_features, axis=0)
        elif pool_type == 'min':
            pooled_features = np.min(egemaps_features, axis=0)
        else:
            raise ValueError("Invalid pool_type. Choose 'mean', 'max', or 'min'.")
        
        pooled_features_dict[audio_path] = {
            'label': data['label'],
            'pooled_features': pooled_features
        }
    
    return pooled_features_dict

# Prepare dataset
# def prepare_dataset(features_dict):
#     X = []
#     y = []
#     label_encoder = LabelEncoder()
    
#     for audio_path, features in features_dict.items():
#         egemaps_features = features['pooled_features']
#         label = features['label']
        
#         X.append(egemaps_features)
#         y.append(label)
    
#     X = np.array(X)
#     y = np.array(y)
#     y = label_encoder.fit_transform(y)
    
#     num_classes = len(label_encoder.classes_)
    
#     print(f"Features shape: {X.shape}, Labels shape: {y.shape}, Number of classes: {num_classes}")
    
#     return X, y, num_classes

# Example Usage:
# Process and save features in batches
num_batches = process_audio_files_parallel(main_folder='/home/ubuntu/Jairam/Dialects/Kannada/', batch_size=100)

# Load and concatenate all features
features_egemaps = load_and_concatenate_batches(num_batches)

# Apply pooling to the features
pooled_features = pooling_function(features_egemaps, pool_type='mean')

# Prepare the dataset for training
# X, y, num_classes = prepare_dataset(pooled_features)

Starting from the beginning
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS features shape: (88,)
eGeMAPS feat

In [3]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Step 1: Function to Load Pickle Files from a Directory
def load_data_from_directory(directory):
    X = []
    y = []
    
    # Iterate through all files in the directory and its subdirectories
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.pkl'):
                file_path = os.path.join(root, file)
                with open(file_path, 'rb') as f:
                    data = pickle.load(f)
                    
                    # Check if the data is a dictionary
                    if isinstance(data, dict):
                        for key, value in data.items():
                            if 'egemaps_features' in value and 'label' in value:
                                X.append(value['egemaps_features'])
                                y.append(value['label'])
                            else:
                                print(f"Unexpected structure in file: {file_path}")
                    else:
                        print(f"Unexpected data type in file: {file_path}")

    return np.array(X), np.array(y)

# Replace with your actual directory containing pickle files
directory = 'Kannada-eGeMAPS-Features/'

X, y = load_data_from_directory(directory)

# Step 2: Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
num_classes = len(label_encoder.classes_)

# Step 3: Split data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y_encoded, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Step 4: Create DataLoaders
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))

train_loader = DataLoader(train_dataset, batch_size=100, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=100, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)

# Define your DNN model
class AudioClassifier(nn.Module):
    def __init__(self, num_classes):
        super(AudioClassifier, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 32)
        self.fc5 = nn.Linear(32, num_classes)
        self.dropout = nn.Dropout(0.1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.relu(self.fc4(x))
        x = self.fc5(x)
        return x

# Step 5: Define the training function
def train_model(model, train_loader, val_loader, num_epochs=50, learning_rate=1e-4):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * batch_X.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)

        # Evaluate on validation data
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for val_X, val_y in val_loader:
                outputs = model(val_X)
                _, predicted = torch.max(outputs, 1)
                total += val_y.size(0)
                correct += (predicted == val_y).sum().item()
        val_accuracy = correct / total

        # Print training logs
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

    return model

# Step 6: Initialize and Train the Model
model = AudioClassifier(num_classes)
model = train_model(model, train_loader, val_loader, num_epochs=50, learning_rate=1e-4)

# Step 7: Save the Trained Model
torch.save(model.state_dict(), 'eGeMAPS_Kannada_classifier_50.pth')

Unexpected data type in file: Kannada-eGeMAPS-Features/processed_batches.pkl
Epoch 1/50, Loss: 2.3953, Validation Accuracy: 0.3088
Epoch 2/50, Loss: 1.6146, Validation Accuracy: 0.2618
Epoch 3/50, Loss: 1.4779, Validation Accuracy: 0.2667
Epoch 4/50, Loss: 1.4473, Validation Accuracy: 0.2576
Epoch 5/50, Loss: 1.4295, Validation Accuracy: 0.2510
Epoch 6/50, Loss: 1.4118, Validation Accuracy: 0.2502
Epoch 7/50, Loss: 1.4054, Validation Accuracy: 0.2510
Epoch 8/50, Loss: 1.3972, Validation Accuracy: 0.2510
Epoch 9/50, Loss: 1.3962, Validation Accuracy: 0.2510
Epoch 10/50, Loss: 1.3956, Validation Accuracy: 0.2535
Epoch 11/50, Loss: 1.3924, Validation Accuracy: 0.2560
Epoch 12/50, Loss: 1.3950, Validation Accuracy: 0.2519
Epoch 13/50, Loss: 1.3913, Validation Accuracy: 0.2519
Epoch 14/50, Loss: 1.3931, Validation Accuracy: 0.2519
Epoch 15/50, Loss: 1.3911, Validation Accuracy: 0.2527
Epoch 16/50, Loss: 1.3917, Validation Accuracy: 0.2519
Epoch 17/50, Loss: 1.3874, Validation Accuracy: 0.25

In [4]:
from sklearn.metrics import classification_report

# Evaluate the model
model.eval()
all_labels = []
all_preds = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        outputs = model(batch_X)
        _, predicted = torch.max(outputs, 1)
        
        all_labels.extend(batch_y.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())

# Calculate the accuracy
correct = sum(p == l for p, l in zip(all_preds, all_labels))
total = len(all_labels)
test_accuracy = correct / total
print(f"Test Accuracy: {test_accuracy:.4f}")

# Generate and print the classification report
report = classification_report(all_labels, all_preds)
print("Classification Report:")
print(report)

Test Accuracy: 0.4310
Classification Report:
              precision    recall  f1-score   support

           0       0.37      0.19      0.25       312
           1       0.61      0.77      0.68       321
           2       0.35      0.53      0.42       292
           3       0.29      0.22      0.25       286

    accuracy                           0.43      1211
   macro avg       0.41      0.43      0.40      1211
weighted avg       0.41      0.43      0.41      1211



## Tamil

In [20]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Step 1: Function to Load Pickle Files from a Directory
def load_data_from_directory(directory):
    X = []
    y = []
    
    # Iterate through all files in the directory and its subdirectories
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.pkl'):
                file_path = os.path.join(root, file)
                with open(file_path, 'rb') as f:
                    data = pickle.load(f)
                    
                    # Check if the data is a dictionary
                    if isinstance(data, dict):
                        for key, value in data.items():
                            if 'egemaps_features' in value and 'label' in value:
                                X.append(value['egemaps_features'])
                                y.append(value['label'])
                            else:
                                print(f"Unexpected structure in file: {file_path}")
                    else:
                        print(f"Unexpected data type in file: {file_path}")

    return np.array(X), np.array(y)

# Replace with your actual directory containing pickle files
directory = 'Tamil-eGeMAPS-Features/'

X, y = load_data_from_directory(directory)

# Step 2: Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
num_classes = len(label_encoder.classes_)

# Step 3: Split data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y_encoded, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Step 4: Create DataLoaders
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))

train_loader = DataLoader(train_dataset, batch_size=100, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=100, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)

# Define your DNN model
class AudioClassifier(nn.Module):
    def __init__(self, num_classes):
        super(AudioClassifier, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 32)
        self.fc5 = nn.Linear(32, num_classes)
        self.dropout = nn.Dropout(0.1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.relu(self.fc4(x))
        x = self.fc5(x)
        return x

# Step 5: Define the training function
def train_model(model, train_loader, val_loader, num_epochs=50, learning_rate=1e-4):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * batch_X.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)

        # Evaluate on validation data
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for val_X, val_y in val_loader:
                outputs = model(val_X)
                _, predicted = torch.max(outputs, 1)
                total += val_y.size(0)
                correct += (predicted == val_y).sum().item()
        val_accuracy = correct / total

        # Print training logs
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

    return model

# Step 6: Initialize and Train the Model
model = AudioClassifier(num_classes)
model = train_model(model, train_loader, val_loader, num_epochs=50, learning_rate=1e-4)

# Step 7: Save the Trained Model
torch.save(model.state_dict(), 'eGeMAPS_tamil_classifier_50.pth')

Unexpected data type in file: Tamil-eGeMAPS-Features/processed_batches.pkl
Epoch 1/50, Loss: 2.7229, Validation Accuracy: 0.2552
Epoch 2/50, Loss: 1.6942, Validation Accuracy: 0.2785
Epoch 3/50, Loss: 1.5452, Validation Accuracy: 0.4109
Epoch 4/50, Loss: 1.4832, Validation Accuracy: 0.4197
Epoch 5/50, Loss: 1.4624, Validation Accuracy: 0.4406
Epoch 6/50, Loss: 1.4080, Validation Accuracy: 0.3957
Epoch 7/50, Loss: 1.3859, Validation Accuracy: 0.4535
Epoch 8/50, Loss: 1.3466, Validation Accuracy: 0.4270
Epoch 9/50, Loss: 1.3242, Validation Accuracy: 0.4815
Epoch 10/50, Loss: 1.2857, Validation Accuracy: 0.4952
Epoch 11/50, Loss: 1.2260, Validation Accuracy: 0.4984
Epoch 12/50, Loss: 1.1930, Validation Accuracy: 0.4976
Epoch 13/50, Loss: 1.1410, Validation Accuracy: 0.5193
Epoch 14/50, Loss: 1.1040, Validation Accuracy: 0.5209
Epoch 15/50, Loss: 1.0747, Validation Accuracy: 0.5233
Epoch 16/50, Loss: 1.0503, Validation Accuracy: 0.5417
Epoch 17/50, Loss: 1.0204, Validation Accuracy: 0.5417

In [21]:
from sklearn.metrics import classification_report

# Evaluate the model
model.eval()
all_labels = []
all_preds = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        outputs = model(batch_X)
        _, predicted = torch.max(outputs, 1)
        
        all_labels.extend(batch_y.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())

# Calculate the accuracy
correct = sum(p == l for p, l in zip(all_preds, all_labels))
total = len(all_labels)
test_accuracy = correct / total
print(f"Test Accuracy: {test_accuracy:.4f}")

# Generate and print the classification report
report = classification_report(all_labels, all_preds)
print("Classification Report:")
print(report)

Test Accuracy: 0.6921
Classification Report:
              precision    recall  f1-score   support

           0       0.47      0.72      0.57       263
           1       0.97      0.94      0.95       337
           2       0.76      0.83      0.79       339
           3       0.51      0.24      0.33       308

    accuracy                           0.69      1247
   macro avg       0.68      0.68      0.66      1247
weighted avg       0.69      0.69      0.67      1247

