In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!unzip -uq "/content/drive/MyDrive/mini_proj/audio.zip" -d "/content/audio"
!unzip -uq "/content/drive/MyDrive/mini_proj/video.zip" -d "/content/video"

In [None]:
!ls "/content/audio"
!ls "/content/video"

Actor_01  Actor_06  Actor_11  Actor_16	Actor_21
Actor_02  Actor_07  Actor_12  Actor_17	Actor_22
Actor_03  Actor_08  Actor_13  Actor_18	Actor_23
Actor_04  Actor_09  Actor_14  Actor_19	Actor_24
Actor_05  Actor_10  Actor_15  Actor_20	audio_speech_actors_01-24
'Real Life Violence Dataset'  'real life violence situations'


**AUDIO MODEL**

1. Install and Import Required Libraries

In [None]:
!pip install librosa
!pip install tensorflow

import os
import numpy as np
import librosa
import librosa.display
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout




2. Define Feature Extraction Function

In [None]:
def extract_features_cnn(file_path, max_pad_len=130):
    try:
        audio, sr = librosa.load(file_path, duration=3, offset=0.5)
        mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
        if mfcc.shape[1] < max_pad_len:
            pad_width = max_pad_len - mfcc.shape[1]
            mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
        else:
            mfcc = mfcc[:, :max_pad_len]
        return mfcc
    except Exception as e:
        print(f"Error: {file_path} - {e}")
        return None


3. Load Data and Extract Features

In [None]:
data = []
labels = []

RAVDESS_PATH = '/content/audio'  # Make sure your dataset is here

for root, _, files in os.walk(RAVDESS_PATH):
    for file in files:
        if file.endswith('.wav'):
            path = os.path.join(root, file)
            label = int(file.split('-')[2])  # Emotion label
            features = extract_features_cnn(path)
            if features is not None:
                data.append(features)
                labels.append(label)

X = np.array(data)
y = np.array(labels)
X = X[..., np.newaxis]  # Add channel dimension for CNN


4. Encode Labels and Split Data

In [None]:
le = LabelEncoder()
y_encoded = to_categorical(le.fit_transform(y))  # One-hot encode

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)

5. Build CNN Model

In [None]:
model_cnn = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(40, 130, 1)),
    MaxPooling2D((2,2)),
    Dropout(0.3),

    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    Dropout(0.3),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(y_encoded.shape[1], activation='softmax')
])

model_cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_cnn.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


6. Train the Model

In [None]:
history = model_cnn.fit(
    X_train, y_train,
    epochs=30,
    batch_size=32,
    validation_data=(X_test, y_test)
)

Epoch 1/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - accuracy: 0.1594 - loss: 13.3024 - val_accuracy: 0.2205 - val_loss: 2.0475
Epoch 2/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2278 - loss: 1.9815 - val_accuracy: 0.2656 - val_loss: 1.8615
Epoch 3/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2803 - loss: 1.8179 - val_accuracy: 0.3003 - val_loss: 1.7854
Epoch 4/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.3538 - loss: 1.7098 - val_accuracy: 0.3854 - val_loss: 1.6490
Epoch 5/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.4141 - loss: 1.5430 - val_accuracy: 0.4826 - val_loss: 1.4592
Epoch 6/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5101 - loss: 1.3456 - val_accuracy: 0.5243 - val_loss: 1.3501
Epoch 7/30
[1m72/72[0m [32m━━━━━━━━

In [None]:
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# Get predicted classes for train and test
y_train_pred = model_cnn.predict(X_train)
y_test_pred = model_cnn.predict(X_test)

# Convert one-hot predictions and true values to class labels
y_train_pred_labels = np.argmax(y_train_pred, axis=1)
y_test_pred_labels = np.argmax(y_test_pred, axis=1)
y_train_true_labels = np.argmax(y_train, axis=1)
y_test_true_labels = np.argmax(y_test, axis=1)

# Classification report for test set
print("🔍 Classification Report (Test Set):")
print(classification_report(y_test_true_labels, y_test_pred_labels))

# Accuracy
train_accuracy = accuracy_score(y_train_true_labels, y_train_pred_labels)
test_accuracy = accuracy_score(y_test_true_labels, y_test_pred_labels)

print(f"✅ Train Accuracy: {train_accuracy * 100:.2f}%")
print(f"✅ Test Accuracy: {test_accuracy * 100:.2f}%")


[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
🔍 Classification Report (Test Set):
              precision    recall  f1-score   support

           0       0.91      0.91      0.91        43
           1       0.93      0.90      0.92        63
           2       0.86      0.83      0.84        71
           3       0.80      0.89      0.84        71
           4       1.00      0.93      0.96        81
           5       0.92      0.78      0.85        93
           6       0.87      0.95      0.91        84
           7       0.87      0.97      0.92        70

    accuracy                           0.89       576
   macro avg       0.89      0.90      0.89       576
weighted avg       0.90      0.89      0.89       576

✅ Train Accuracy: 100.00%
✅ Test Accuracy: 89.24%


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

model_cnn = Sequential([
    Conv2D(32, (3, 3), activation='relu', kernel_regularizer=l2(0.001), input_shape=(40, 130, 1)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.3),

    Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.3),

    Flatten(),
    Dense(128, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.4),

    Dense(y_encoded.shape[1], activation='softmax')
])

optimizer = Adam(learning_rate=0.0003)  # Slower learning rate helps reduce overfitting

model_cnn.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)

history = model_cnn.fit(
    X_train, y_train,
    epochs=60,
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[early_stop]
)


Epoch 1/60


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 20ms/step - accuracy: 0.2041 - loss: 3.0754 - val_accuracy: 0.2413 - val_loss: 2.2277
Epoch 2/60
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.2565 - loss: 2.1576 - val_accuracy: 0.2882 - val_loss: 2.0677
Epoch 3/60
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.3070 - loss: 2.0145 - val_accuracy: 0.2899 - val_loss: 1.9798
Epoch 4/60
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.3681 - loss: 1.9012 - val_accuracy: 0.4444 - val_loss: 1.8189
Epoch 5/60
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.3884 - loss: 1.8277 - val_accuracy: 0.4931 - val_loss: 1.7161
Epoch 6/60
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.4208 - loss: 1.7394 - val_accuracy: 0.5382 - val_loss: 1.6179
Epoch 7/60
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━

In [None]:
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# Predict on training and testing data
y_train_pred = model_cnn.predict(X_train)
y_test_pred = model_cnn.predict(X_test)

# Convert one-hot encoded vectors to class labels
y_train_true = np.argmax(y_train, axis=1)
y_test_true = np.argmax(y_test, axis=1)
y_train_pred_labels = np.argmax(y_train_pred, axis=1)
y_test_pred_labels = np.argmax(y_test_pred, axis=1)

# Accuracy
train_accuracy = accuracy_score(y_train_true, y_train_pred_labels)
test_accuracy = accuracy_score(y_test_true, y_test_pred_labels)

print(f"✅ Train Accuracy: {train_accuracy * 100:.2f}%")
print(f"✅ Test Accuracy: {test_accuracy * 100:.2f}%")

# Classification report for test set
print("\n🔍 Classification Report (Test Set):")
print(classification_report(y_test_true, y_test_pred_labels))


[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
✅ Train Accuracy: 99.96%
✅ Test Accuracy: 93.58%

🔍 Classification Report (Test Set):
              precision    recall  f1-score   support

           0       0.95      0.98      0.97        43
           1       0.97      0.97      0.97        63
           2       0.84      0.89      0.86        71
           3       0.92      0.92      0.92        71
           4       1.00      0.98      0.99        81
           5       0.91      0.89      0.90        93
           6       0.98      0.95      0.96        84
           7       0.93      0.94      0.94        70

    accuracy                           0.94       576
   macro avg       0.94      0.94      0.94       576
weighted avg       0.94      0.94      0.94       576



Save the model

In [None]:
model_cnn.save("audio_emotion_model.keras")
print("✅ Model saved in native Keras (.keras) format")


✅ Model saved in native Keras (.keras) format


load the model

In [None]:
from keras.models import load_model
model_cnn = load_model("audio_emotion_model.keras")


**VIDEO MODEL**

Feature Extraction (Frame-wise)

In [None]:
import os
import cv2
import numpy as np
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D
from tqdm import tqdm

# Load MobileNetV2 + GAP for feature extraction
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
gap_output = GlobalAveragePooling2D()(base_model.output)
model = Model(inputs=base_model.input, outputs=gap_output)

max_frames = 60

def extract_sequence_features(video_path, max_frames=max_frames):
    cap = cv2.VideoCapture(video_path)
    frames = []
    count = 0

    while count < max_frames and cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        img = cv2.resize(frame, (224, 224))
        img = preprocess_input(img)
        frames.append(img)
        count += 1

    cap.release()

    if len(frames) == 0:
        # Return zeros if no frames read
        return np.zeros((max_frames, 1280), dtype=np.float32)

    frames = np.array(frames)
    features = model.predict(frames, verbose=0)  # (num_frames, 1280)

    # Pad if less than max_frames
    if features.shape[0] < max_frames:
        pad_len = max_frames - features.shape[0]
        features = np.pad(features, ((0, pad_len), (0, 0)), mode='constant')

    return features  # shape: (max_frames, 1280)


# Paths
violent_path = "/content/video/Real Life Violence Dataset/Violence"
non_violent_path = "/content/video/Real Life Violence Dataset/NonViolence"

X = []
y = []

print("Extracting features from Violent videos...")
for video_file in tqdm(os.listdir(violent_path)):
    path = os.path.join(violent_path, video_file)
    X.append(extract_sequence_features(path))
    y.append(1)

print("Extracting features from Non-Violent videos...")
for video_file in tqdm(os.listdir(non_violent_path)):
    path = os.path.join(non_violent_path, video_file)
    X.append(extract_sequence_features(path))
    y.append(0)

X = np.array(X, dtype=np.float32)  # shape: (num_samples, 60, 1280)
y = np.array(y)

# Save features for later use
np.save("X_lstm.npy", X)
np.save("y_lstm.npy", y)


Extracting features from Violent videos...


100%|██████████| 1000/1000 [06:52<00:00,  2.42it/s]


Extracting features from Non-Violent videos...


100%|██████████| 1000/1000 [05:45<00:00,  2.90it/s]


 LSTM Model Training and Evaluation

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Load features
X = np.load("X_lstm.npy")
y = np.load("y_lstm.npy")

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Build LSTM model
model = Sequential([
    LSTM(128, return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
    BatchNormalization(),
    Dropout(0.3),

    LSTM(64),
    BatchNormalization(),
    Dropout(0.3),

    Dense(32, activation='relu'),
    Dropout(0.3),

    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping to prevent overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train
history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=50,
    batch_size=16,
    callbacks=[early_stop],
    verbose=2
)

# Evaluate on train set
train_loss, train_acc = model.evaluate(X_train, y_train, verbose=0)
print(f"🎯 Train Accuracy: {train_acc:.4f}")

# Evaluate on test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"🎯 Test Accuracy : {test_acc:.4f}")

# Predictions & classification report on test set
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int).reshape(-1)

print("\n📊 Classification Report (Test Set):\n")
print(classification_report(y_test, y_pred))


  super().__init__(**kwargs)


Epoch 1/50
90/90 - 8s - 92ms/step - accuracy: 0.8125 - loss: 0.4241 - val_accuracy: 0.9250 - val_loss: 0.3193
Epoch 2/50
90/90 - 6s - 68ms/step - accuracy: 0.9160 - loss: 0.2187 - val_accuracy: 0.9500 - val_loss: 0.1663
Epoch 3/50
90/90 - 1s - 13ms/step - accuracy: 0.9333 - loss: 0.1821 - val_accuracy: 0.9375 - val_loss: 0.1998
Epoch 4/50
90/90 - 2s - 17ms/step - accuracy: 0.9493 - loss: 0.1446 - val_accuracy: 0.9187 - val_loss: 0.2505
Epoch 5/50
90/90 - 2s - 20ms/step - accuracy: 0.9549 - loss: 0.1244 - val_accuracy: 0.8625 - val_loss: 0.3273
Epoch 6/50
90/90 - 2s - 22ms/step - accuracy: 0.9528 - loss: 0.1341 - val_accuracy: 0.8438 - val_loss: 0.4221
Epoch 7/50
90/90 - 1s - 14ms/step - accuracy: 0.9667 - loss: 0.0849 - val_accuracy: 0.8813 - val_loss: 0.3474
🎯 Train Accuracy: 0.9413
🎯 Test Accuracy : 0.9175
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step

📊 Classification Report (Test Set):

              precision    recall  f1-score   support

           0 

Bidirectional LSTM with MobileNetV2 Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import numpy as np

# Load data
X = np.load("X_lstm.npy")
y = np.load("y_lstm.npy")

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Model
model = Sequential([
    Input(shape=(60, 62720)),
    Bidirectional(LSTM(128, return_sequences=False)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

# Compile
optimizer = Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping
early_stop = EarlyStopping(patience=5, restore_best_weights=True)

# Train
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=16,
    callbacks=[early_stop],
    verbose=2
)

# Evaluate
train_acc = model.evaluate(X_train, y_train, verbose=0)[1]
test_acc = model.evaluate(X_test, y_test, verbose=0)[1]
print(f"\n🎯 Train Accuracy: {train_acc:.4f}")
print(f"🎯 Test Accuracy : {test_acc:.4f}")

# Classification Report
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print("\n📊 Classification Report (Test Set):\n")
print(classification_report(y_test, y_pred))


Epoch 1/50
100/100 - 10s - 100ms/step - accuracy: 0.8325 - loss: 0.3721 - val_accuracy: 0.9200 - val_loss: 0.2082
Epoch 2/50
100/100 - 6s - 63ms/step - accuracy: 0.9344 - loss: 0.1731 - val_accuracy: 0.9450 - val_loss: 0.1497
Epoch 3/50
100/100 - 6s - 60ms/step - accuracy: 0.9550 - loss: 0.1268 - val_accuracy: 0.9400 - val_loss: 0.2022
Epoch 4/50
100/100 - 11s - 109ms/step - accuracy: 0.9650 - loss: 0.0934 - val_accuracy: 0.9275 - val_loss: 0.1454
Epoch 5/50
100/100 - 10s - 96ms/step - accuracy: 0.9669 - loss: 0.0882 - val_accuracy: 0.9500 - val_loss: 0.1477
Epoch 6/50
100/100 - 11s - 105ms/step - accuracy: 0.9856 - loss: 0.0423 - val_accuracy: 0.9400 - val_loss: 0.1508
Epoch 7/50
100/100 - 6s - 63ms/step - accuracy: 0.9900 - loss: 0.0298 - val_accuracy: 0.9425 - val_loss: 0.1621
Epoch 8/50
100/100 - 11s - 106ms/step - accuracy: 0.9800 - loss: 0.0488 - val_accuracy: 0.9550 - val_loss: 0.1252
Epoch 9/50
100/100 - 9s - 95ms/step - accuracy: 0.9937 - loss: 0.0198 - val_accuracy: 0.9500 - 

Save model

In [None]:
# Save entire model
model.save("violence_detection_bilstm.h5")



load model

In [None]:
from tensorflow.keras.models import load_model

# Load model from file
model = load_model("violence_detection_bilstm.h5")

