In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
import tensorflow as tf 
from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score
import matplotlib.pyplot as plt

In [None]:
# load input data and labels
features_df = pd.read_csv('/Users/scovitz/datadir/video_features.csv')
labels_csv = '/Users/scovitz/datadir/class_info.csv'
class_df = pd.read_csv(labels_csv)

# convert to numpy
X = features_df.iloc[:, 2:].to_numpy()  # All columns except the first two (features)
y = class_df.iloc[:, -1].to_numpy()   # The last column (labels)
y.shape

In [None]:
# Stratified splitting
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)  # If validation set exists


In [None]:
X_train.shape[1]

In [1]:
# tried focal loss, did not work 

# def focal_loss(gamma=2., alpha=0.25):
#     def focal_loss_fixed(y_true, y_pred):
#         pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
#         pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
#         return -alpha * tf.pow(1. - pt_1, gamma) * tf.math.log(pt_1) - \
#                (1 - alpha) * tf.pow(pt_0, gamma) * tf.math.log(1. - pt_0)
#     return focal_loss_fixed

In [None]:
# Define the model, use L2 regularization
def build_model(input_shape):
    model = Sequential()
    # Input Layer, First Hidden Layer
    model.add(Dense(64, input_shape=input_shape, activation='relu', kernel_regularizer=l2(0.01)))  # Add L2 regularization
    model.add(BatchNormalization())
    model.add(Dropout(0.3))  # Dropout to reduce overfitting
    
    # Second Hidden Layer
    model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))  # Add L2 regularization
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    
    # Third Hidden Layer
    model.add(Dense(16, activation='relu', kernel_regularizer=l2(0.01)))  # Add L2 regularization
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
    # Output Layer
    model.add(Dense(1, activation='sigmoid'))  # Binary output --> sigmoid for output layer 
    
    # Compile the model using focal loss 
#     model.compile(optimizer=Adam(learning_rate=0.001), 
#               loss=focal_loss(gamma=2., alpha=0.25), 
#               metrics=['accuracy'])
    
    # Compile the model, use Adam and BCE Loss
    model.compile(optimizer=Adam(learning_rate=0.001),
                   loss='binary_crossentropy',
                   metrics=['accuracy'])
    return model

# Build the model
input_shape = (X_train.shape[1],)
model = build_model(input_shape)

# Compute class weights to balance the dataset
class_weights = compute_class_weight(
    class_weight='balanced', 
    classes=np.unique(y_train), 
    y=y_train
)
class_weight_dict = dict(enumerate(class_weights))

print(f"Class Weights: {class_weight_dict}")

# Train the model with class weights dict
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=70,
    batch_size=16,
    verbose=1,
    class_weight=class_weight_dict  
)


In [None]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

# Make predictions
y_pred = (model.predict(X_test) > 0.5).astype("int32")

# evaluation
# Print final training and validation accuracy
final_train_accuracy = history.history['accuracy'][-1]
final_val_accuracy = history.history['val_accuracy'][-1]

print(f"Final Training Accuracy: {final_train_accuracy}")
print(f"Final Validation Accuracy: {final_val_accuracy}")


# Print classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, digits=4))

# Compute the weighted F1 score
test_f1 = f1_score(y_test, y_pred, average="weighted")
print('Weighted F1 Score: ', test_f1)

plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss - Features CSV')
plt.legend()
plt.show()

In [None]:
# Predict probabilities for the positive class
y_pred_prob = model.predict(X_test).ravel()  # `.ravel()` ensures 1D array

# Compute ROC Curve and AUC
fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
roc_auc = auc(fpr, tpr)

# Compute Precision-Recall Curve and AUPRC
precision, recall, _ = precision_recall_curve(y_test, y_pred_prob)
average_precision = average_precision_score(y_test, y_pred_prob)
weighted_recall = classification_report(y_test, (y_pred_prob > 0.5).astype(int), output_dict=True)['weighted avg']['recall']
weighted_precision = classification_report(y_test, (y_pred_prob > 0.5).astype(int), output_dict=True)['weighted avg']['precision']

print('weighted average precision: ', weighted_precision)
print("average recall: ", weighted_recall)

# Plot ROC Curve
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.plot(fpr, tpr, label=f'ROC curve (AUC = {roc_auc:.2f})', color='blue')
plt.plot([0, 1], [0, 1], 'k--', label='Chance')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')

# Plot Precision-Recall Curve
plt.subplot(1, 2, 2)
plt.plot(recall, precision, label=f'PR curve (AUPRC = {average_precision:.2f})', color='green')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall (PR) Curve')
plt.legend(loc='lower left')

plt.tight_layout()
plt.show()
