In [1]:
import os
import random
import numpy as np
import tensorflow as tf

from tensorflow.keras import layers, models, losses, optimizers, metrics

from model_train import load_c3d_model, train_msupcl_model, linear_evaluation
from data_uniform_sup import VideoDataGenerator
from paired_generator import PairedDataGenerator

In [2]:
seed = 2042
np.random.seed(seed)
random.seed(seed)
tf.random.set_seed(seed)
input_shape = (16, 112, 112, 3)  # As defined in data generator
num_classes = 2  # Harmful or Safe

In [3]:
# Define dataset paths
violence_negative_dir = './data/violence_dataset/NonViolence'
violence_positive_dir = './data/violence_dataset/Violence'
tiktok_negative_dir = './data/tiktok/train/Safe'
tiktok_positive_dir = './data/tiktok/train/Harmful Content'


In [4]:
def sample_videos(directory, num_samples=100):
    all_videos = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.mp4')]
    sampled_videos = random.sample(all_videos, min(num_samples, len(all_videos)))
    return sampled_videos


In [5]:
# Violence dataset
violence_negative_videos = sample_videos(violence_negative_dir, 100)
violence_positive_videos = sample_videos(violence_positive_dir, 100)

# TikTok dataset
tiktok_negative_videos = sample_videos(tiktok_negative_dir, 100)
tiktok_positive_videos = sample_videos(tiktok_positive_dir, 100)


In [6]:
def split_data(negative_videos, positive_videos, train_ratio=0.55, val_ratio=0.15):
    # Combine and shuffle
    videos = negative_videos + positive_videos
    labels = [0]*len(negative_videos) + [1]*len(positive_videos)
    combined = list(zip(videos, labels))
    random.shuffle(combined)
    videos[:], labels[:] = zip(*combined)
    
    # Calculate split indices
    total = len(videos)
    train_end = int(total * train_ratio)
    val_end = train_end + int(total * val_ratio)
    
    # Split data
    train_videos = videos[:train_end]
    train_labels = labels[:train_end]
    val_videos = videos[train_end:val_end]
    val_labels = labels[train_end:val_end]
    test_videos = videos[val_end:]
    test_labels = labels[val_end:]
    
    return (train_videos, train_labels), (val_videos, val_labels), (test_videos, test_labels)


In [7]:
# Violence dataset
(violence_train_videos, violence_train_labels), \
(violence_val_videos, violence_val_labels), \
(violence_test_videos, violence_test_labels) = split_data(violence_negative_videos, violence_positive_videos)

# TikTok dataset
(tiktok_train_videos, tiktok_train_labels), \
(tiktok_val_videos, tiktok_val_labels), \
(tiktok_test_videos, tiktok_test_labels) = split_data(tiktok_negative_videos, tiktok_positive_videos)


In [8]:

# Convert labels to numpy arrays and one-hot encode them if necessary
def prepare_labels(labels):
    return np.array(labels)

# Violence dataset generators
violence_train_labels_np = prepare_labels(violence_train_labels)
violence_val_labels_np = prepare_labels(violence_val_labels)
violence_test_labels_np = prepare_labels(violence_test_labels)

violence_train_generator = VideoDataGenerator(violence_train_videos, violence_train_labels_np, batch_size=4, shuffle=True, augment=True)
violence_val_generator = VideoDataGenerator(violence_val_videos, violence_val_labels_np, batch_size=4, shuffle=False)
violence_test_generator = VideoDataGenerator(violence_test_videos, violence_test_labels_np, batch_size=4, shuffle=False)

In [9]:

# TikTok dataset generators
tiktok_train_labels_np = prepare_labels(tiktok_train_labels)
tiktok_val_labels_np = prepare_labels(tiktok_val_labels)
tiktok_test_labels_np = prepare_labels(tiktok_test_labels)

tiktok_train_generator = VideoDataGenerator(tiktok_train_videos, tiktok_train_labels_np, batch_size=4, shuffle=True, augment=True)
tiktok_val_generator = VideoDataGenerator(tiktok_val_videos, tiktok_val_labels_np, batch_size=4, shuffle=False)
tiktok_test_generator = VideoDataGenerator(tiktok_test_videos, tiktok_test_labels_np, batch_size=4, shuffle=False)


In [10]:

# Load the model
model = load_c3d_model(input_shape=input_shape, feature_dim=512)


In [11]:
# Modify the model to output class probabilities


# Freeze the base model if desired
for layer in model.layers:
    layer.trainable = False

# Add classification layer
features = model.output
outputs = layers.Dense(num_classes, activation='softmax')(features)
classification_model = models.Model(inputs=model.input, outputs=outputs)

# Compile the model
classification_model.compile(
    loss=losses.SparseCategoricalCrossentropy(),
    optimizer=optimizers.Adam(learning_rate=1e-4),
    metrics=[metrics.SparseCategoricalAccuracy()]
)


In [12]:
# Train on Violence dataset
history_violence = classification_model.fit(
    violence_train_generator,
    validation_data=violence_val_generator,
    epochs=10
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [13]:
# Evaluate on Violence test set
results_violence = classification_model.evaluate(violence_test_generator)
print(f"Violence Dataset - Test Loss: {results_violence[0]}, Test Accuracy: {results_violence[1]}")


Violence Dataset - Test Loss: 0.6931942701339722, Test Accuracy: 0.5166666507720947


In [14]:
history_tiktok = classification_model.fit(
    tiktok_train_generator,
    validation_data=tiktok_val_generator,
    epochs=10
)

# Evaluate on TikTok test set
results_tiktok = classification_model.evaluate(tiktok_test_generator)
print(f"TikTok Dataset - Test Loss: {results_tiktok[0]}, Test Accuracy: {results_tiktok[1]}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
TikTok Dataset - Test Loss: 0.6941482424736023, Test Accuracy: 0.44999998807907104


## MSupCL implementation

In [14]:
# Combine training data from both datasets
combined_train_videos = violence_train_videos + tiktok_train_videos
combined_train_labels = violence_train_labels_np.tolist() + tiktok_train_labels_np.tolist()

# Create a combined data generator
combined_train_generator = VideoDataGenerator(combined_train_videos, combined_train_labels, batch_size=4, shuffle=True, augment=True)


In [11]:


# Create separate generators for violence and tiktok datasets
violence_train_generator_no_aug = VideoDataGenerator(violence_train_videos, violence_train_labels_np, batch_size=4, shuffle=True, augment=False)
tiktok_train_generator_no_aug = VideoDataGenerator(tiktok_train_videos, tiktok_train_labels_np, batch_size=4, shuffle=True, augment=False)

# Create paired data generator
paired_train_generator = PairedDataGenerator(violence_train_generator_no_aug, tiktok_train_generator_no_aug)




In [12]:
# Load the model
msupcl_model = load_c3d_model(input_shape=input_shape)

# Train the model
train_msupcl_model(msupcl_model, paired_train_generator, epochs=10)


Epoch 1/10
Training Loss: 0.7348
Epoch 2/10
Training Loss: 0.7323
Epoch 3/10
Training Loss: 0.7217
Epoch 4/10
Training Loss: 0.7297
Epoch 5/10
Training Loss: 0.7315
Epoch 6/10
Training Loss: 0.7242
Epoch 7/10
Training Loss: 0.7307
Epoch 8/10
Training Loss: 0.7272
Epoch 9/10
Training Loss: 0.7261
Epoch 10/10
Training Loss: 0.7242


In [13]:
linear_evaluation(msupcl_model, combined_train_generator,violence_val_generator, tiktok_val_generator)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Evaluating on Violence Test Set:
Violence Test Loss: 0.7154548764228821, Test Accuracy: 0.5
Evaluating on TikTok Test Set:
TikTok Test Loss: 0.7024644017219543, Test Accuracy: 0.6428571343421936


## R2+1d_18 model

In [10]:
from model_train_r2plus1d_18 import load_r2plus1d_model, linear_evaluation, train_msupcl_model, linear_evaluation

input_shape = (16, 112, 112, 3)
feature_dim = 512

msupcl_model = load_r2plus1d_model(input_shape=input_shape, feature_dim=feature_dim, include_top=False)

In [11]:
batch_size = 4

violence_train_generator_no_aug = VideoDataGenerator(violence_train_videos, violence_train_labels_np, batch_size=batch_size, shuffle=True, augment=False)
tiktok_train_generator_no_aug = VideoDataGenerator(tiktok_train_videos, tiktok_train_labels_np, batch_size=batch_size, shuffle=True, augment=False)
paired_train_generator = PairedDataGenerator(violence_train_generator_no_aug, tiktok_train_generator_no_aug)

In [12]:
train_msupcl_model(msupcl_model, paired_train_generator, epochs=10)

Epoch 1/10
Training Loss: 0.7068
Epoch 2/10
Training Loss: 0.7050
Epoch 3/10
Training Loss: 0.7007
Epoch 4/10
Training Loss: 0.6996
Epoch 5/10
Training Loss: 0.6959
Epoch 6/10
Training Loss: 0.6910
Epoch 7/10
Training Loss: 0.7074
Epoch 8/10
Training Loss: 0.6976
Epoch 9/10
Training Loss: 0.7011
Epoch 10/10
Training Loss: 0.6987


In [15]:
linear_evaluation(msupcl_model, combined_train_generator, violence_val_generator,tiktok_val_generator, num_classes=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Evaluating on Violence Test Set:
Violence Test Loss: 0.7471337914466858, Test Accuracy: 0.3571428656578064
Evaluating on TikTok Test Set:
TikTok Test Loss: 0.6955334544181824, Test Accuracy: 0.5357142686843872
