In [None]:
# parse input

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.utils.class_weight import compute_class_weight

features_df = pd.read_csv('/Users/scovitz/datadir/video_features.csv')
labels_csv = '/Users/scovitz/datadir/class_info.csv'
class_df = pd.read_csv(labels_csv)

# convert to numpy
X = features_df.iloc[:, 2:].to_numpy()  # All columns except the first two (features)
y = class_df.iloc[:, -1].to_numpy()   # The last column (labels)
y.shape

In [None]:
# Stratified splitting
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Optional: Split validation set
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42, stratify=y_train
)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)  # If validation set exists


In [None]:
X_train.shape[1]

In [2]:
# Define the model, use L2 regularization
def build_model(input_shape):
    model = Sequential()
    # Input Layer + First Hidden Layer
    model.add(Dense(64, input_shape=input_shape, activation='relu', kernel_regularizer=l2(0.01)))  # Add L2 regularization
    model.add(BatchNormalization())
    model.add(Dropout(0.3))  # Dropout to reduce overfitting
    
    # Second Hidden Layer
    model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))  # Add L2 regularization
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    
    # Third Hidden Layer
    model.add(Dense(16, activation='relu', kernel_regularizer=l2(0.01)))  # Add L2 regularization
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
    # Output Layer
    model.add(Dense(1, activation='sigmoid'))  # Binary output --> sigmoid for output layer 

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# Build the model
input_shape = (X_train.shape[1],)
model = build_model(input_shape)

# Compute class weights to balance the dataset
class_weights = compute_class_weight(
    class_weight='balanced', 
    classes=np.unique(y_train), 
    y=y_train
)
class_weight_dict = dict(enumerate(class_weights))

print(f"Class Weights: {class_weight_dict}")

# Train the model with class weights
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=300,
    batch_size=16,
    verbose=1,
    class_weight=class_weight_dict  # Pass the class weights here
)


NameError: name 'X_train' is not defined

In [None]:


# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

# Make predictions
y_pred = (model.predict(X_test) > 0.5).astype("int32")

# evaluation
# Print final training and validation accuracy
final_train_accuracy = history.history['accuracy'][-1]
final_val_accuracy = history.history['val_accuracy'][-1]

print(f"Final Training Accuracy: {final_train_accuracy}")
print(f"Final Validation Accuracy: {final_val_accuracy}")


# Plot training and validation loss


plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

In [1]:
import tensorflow as tf
print(tf.__version__)

2.18.0


In [None]:
### PREVIOUS ATTEMPT

# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
# from tensorflow.keras.optimizers import Adam
# from tensorflow.keras.regularizers import l2
# from scikeras.wrappers import KerasClassifier
# from sklearn.model_selection import StratifiedKFold, cross_val_score
# from sklearn.metrics import classification_report
# import numpy as np

# # Define the model with L2 regularization
# def build_model(input_shape):
#     model = Sequential()
#     # Input Layer + First Hidden Layer
#     model.add(Dense(64, input_shape=input_shape, activation='relu', kernel_regularizer=l2(0.01)))
#     model.add(BatchNormalization())
#     model.add(Dropout(0.3))  # Dropout to reduce overfitting
    
#     # Second Hidden Layer
#     model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))
#     model.add(BatchNormalization())
#     model.add(Dropout(0.3))
    
#     # Third Hidden Layer
#     model.add(Dense(16, activation='relu', kernel_regularizer=l2(0.01)))
#     model.add(BatchNormalization())
#     model.add(Dropout(0.2))
    
#     # Output Layer
#     model.add(Dense(1, activation='sigmoid'))  # Binary output

#     # Compile the model
#     model.compile(optimizer=Adam(learning_rate=0.001),
#                   loss='binary_crossentropy',
#                   metrics=['accuracy'])
#     return model

# # Wrap the model using scikeras KerasClassifier
# keras_model = KerasClassifier(
#     model=build_model, 
#     model__input_shape=(X_train.shape[1],),  # Pass input shape explicitly
#     epochs=300,
#     batch_size=16,
#     verbose=0
# )

# # Perform cross-validation
# from sklearn.model_selection import StratifiedKFold

# kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
# scores = cross_val_score(keras_model, X, y, cv=kfold)

# print(f"Cross-Validation Accuracy: {scores.mean() * 100:.2f}% (+/- {scores.std() * 100:.2f}%)")

# # Generate predictions and evaluate precision/recall
# keras_model.fit(X_train, y_train)  # Train the model
# y_pred = keras_model.predict(X_test)

# # Evaluate with precision/recall
# print(classification_report(y_test, y_pred))
