In [20]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout
from sklearn.model_selection import train_test_split
import librosa
from tqdm import tqdm

In [21]:
# Load data
def load_data(data_path):
    data = []
    labels = []
    with open(data_path, 'r') as file:
        next(file)  # Skip header
        for line in file:
            values = line.strip().split(',')
            if len(values) == 2:
                filename, label = values
                data.append(filename)
                labels.append(1 if label.lower() == 'yes' else 0)  # Convert labels to binary (0 or 1)
            else:
                print(f"Issue with line: {line}")

    return data, labels

In [22]:
# Preprocess audio data
def preprocess_audio(file_path, target_duration=10):
    # Load audio file
    audio, _ = librosa.load(file_path, sr=None)

    # Pad or truncate audio to the target duration
    target_length = int(target_duration * _)
    if len(audio) < target_length:
        audio = np.pad(audio, (0, target_length - len(audio)))
    else:
        audio = audio[:target_length]

    # Extract Mel-frequency cepstral coefficients (MFCCs)
    mfccs = librosa.feature.mfcc(y=audio, sr=_, n_mfcc=13)

    return mfccs


In [23]:
def build_model(input_shape):
    model = Sequential()

    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(BatchNormalization())
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    return model

In [24]:
# Train the model
def train_model(X_train, y_train, X_val, y_val):
    model = build_model(input_shape=X_train.shape[1:])
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32)

    return model


In [25]:
# Main function
def main():
    train_data_path = 'Hindi_train.csv'  # Replace with your actual training dataset file
    test_data_path = 'Hindi_test.csv'    # Replace with your actual testing dataset file

    # Load training data
    train_data, train_labels = load_data(train_data_path)
    
    # Load and preprocess training audio
    X_train = []
    for filename in tqdm(train_data, desc="Processing Training Audio"):
        file_path = os.path.join(os.path.dirname(train_data_path), 'audios', filename)
        features = preprocess_audio(file_path)
        X_train.append(features)

    X_train = np.array(X_train)
    y_train = np.array(train_labels)

    # Reshape the input data to add a channel dimension
    X_train = X_train[..., np.newaxis]

    # Split the training dataset into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

    # Reshape the validation data
    X_val = X_val[..., np.newaxis]

    # Train the model
    model = train_model(X_train, y_train, X_val, y_val)

    # Save the model
    model.save('hindi_hate_speech_model.h5')

    # Load test data
    test_data, test_labels = load_data(test_data_path)

    # Load and preprocess test audio
    X_test = []
    for filename in tqdm(test_data, desc="Processing Test Audio"):
        file_path = os.path.join(os.path.dirname(test_data_path), 'audios', filename)
        features = preprocess_audio(file_path)
        X_test.append(features)

    X_test = np.array(X_test)
    y_test = np.array(test_labels)

    # Reshape the test data
    X_test = X_test[..., np.newaxis]

    # Evaluate the model on the test set
    test_loss, test_accuracy = model.evaluate(X_test, y_test)
    print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


In [26]:
if __name__ == "__main__":
    main()

Processing Training Audio: 100%|██████████| 822/822 [00:52<00:00, 15.52it/s]


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(
Processing Test Audio: 100%|██████████| 369/369 [00:27<00:00, 13.23it/s]


Test Accuracy: 71.54%
