## Imports

In [31]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import os
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

## Load the dataframe

In [32]:
annotation_df = pd.read_csv('./cleaned_ratios.csv')
annotation_df['video_name'] = annotation_df['file_path'].apply(lambda x: x.split('/')[-2].split('.')[0] + '.mp4')
annotation_df.head()

Unnamed: 0,file_path,distance_0_1_to_distance_0_2,distance_0_3_to_distance_5_10,distance_0_4_to_distance_4_13,distance_0_7_to_distance_18_23,distance_0_9_to_distance_14_18,distance_0_10_to_distance_8_20,distance_0_11_to_distance_11_20,distance_0_16_to_distance_2_4,distance_0_17_to_distance_1_14,...,angle_2_5_9,angle_2_14_22,angle_3_10_15,angle_4_15_16,angle_5_8_12,angle_5_14_17,angle_6_8_20,angle_7_16_17,salient,video_name
0,train/cow_bite/salient1/01b715099d0b0605600624...,1.020027,2.098896,4.916117,2.684519,1.554847,0.726802,0.581233,7.198481,1.484318,...,0.265472,0.364389,0.004095,0.469564,0.097709,0.327963,0.163143,0.183878,0,01b715099d0b060560062494f445d64273968b38c4947a...
1,train/cow_bite/salient1/01b715099d0b0605600624...,1.025078,2.212228,4.97817,2.569172,1.484482,0.695759,0.578634,6.979357,1.488992,...,0.303421,0.372807,0.003113,0.494791,0.10539,0.313466,0.170611,0.170144,0,01b715099d0b060560062494f445d64273968b38c4947a...
2,train/cow_bite/salient1/01b715099d0b0605600624...,1.027181,1.832562,4.986065,2.517126,1.51036,1.300608,0.640325,9.157373,3.347487,...,0.36262,0.334458,0.006785,0.471296,0.007272,0.346034,0.263366,0.273151,0,01b715099d0b060560062494f445d64273968b38c4947a...
3,train/cow_bite/salient1/01b715099d0b0605600624...,1.034087,2.237776,4.927011,2.580544,1.370526,0.672495,0.574309,9.23927,1.547455,...,0.328241,0.339795,0.004166,0.370708,0.135475,0.332853,0.167066,0.162496,0,01b715099d0b060560062494f445d64273968b38c4947a...
4,train/cow_bite/salient1/01b715099d0b0605600624...,1.040316,2.18392,4.615139,2.654145,1.403396,0.683371,0.604192,8.286063,1.564145,...,0.329435,0.331054,0.004886,0.49982,0.092372,0.319599,0.153758,0.171903,0,01b715099d0b060560062494f445d64273968b38c4947a...


In [33]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score, accuracy_score
import pandas as pd

# Initialize lists to store the metrics for each fold
roc_auc_scores = []
accuracies = []

# Iterate over the 10 folds
for fold in range(1, 11):
    # Load the train and test fold CSV files
    test_fold = pd.read_csv(f'./folds/test_fold_{fold}.csv')
    train_fold = pd.read_csv(f'./folds/train_fold_{fold}.csv')

    # Extract video names from the folds
    train_video_names = train_fold['name'].values
    test_video_names = test_fold['name'].values

    # Get the corresponding rows in annotation_df for training and testing
    X_train = annotation_df[annotation_df['video_name'].isin(train_video_names)]
    X_test = annotation_df[annotation_df['video_name'].isin(test_video_names)]

    # Drop the 'video_name', 'file_path', and 'salient' columns from the training data
    y_train = X_train['salient']
    X_train = X_train.drop(columns=['video_name', 'file_path', 'salient'])

    # Extract the target variable for the test data (salient) and drop unnecessary columns
    y_test = X_test['salient']
    X_test = X_test.drop(columns=['video_name', 'file_path', 'salient'])

    # Initialize and train the Decision Tree Classifier with specified parameters
    clf = DecisionTreeClassifier(max_depth=10, min_samples_leaf=1, min_samples_split=10)
    clf.fit(X_train, y_train)

    # Predict on the test data
    y_pred = clf.predict(X_test)
    y_pred_proba = clf.predict_proba(X_test)[:, 1]  # Probabilities for ROC-AUC

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

    # Calculate ROC-AUC score
    roc_auc = roc_auc_score(y_test, y_pred_proba)
    roc_auc_scores.append(roc_auc)

    print(f'Fold {fold}: Accuracy = {accuracy:.4f}, ROC-AUC = {roc_auc:.4f}')

Fold 1: Accuracy = 0.9032, ROC-AUC = 0.9132
Fold 2: Accuracy = 0.9309, ROC-AUC = 0.9341
Fold 3: Accuracy = 0.8653, ROC-AUC = 0.8586
Fold 4: Accuracy = 0.8268, ROC-AUC = 0.8028
Fold 5: Accuracy = 0.9301, ROC-AUC = 0.9155
Fold 6: Accuracy = 0.7973, ROC-AUC = 0.7949
Fold 7: Accuracy = 0.8438, ROC-AUC = 0.8585
Fold 8: Accuracy = 0.8963, ROC-AUC = 0.8909
Fold 9: Accuracy = 0.8346, ROC-AUC = 0.8625
Fold 10: Accuracy = 0.9128, ROC-AUC = 0.8897
Average Accuracy: 0.8741192016110884
Average ROC-AUC: 0.8720747913210068


In [35]:
# Calculate the mean and standard deviation of accuracy and ROC-AUC
mean_accuracy = np.mean(accuracies)
std_accuracy = np.std(accuracies)

mean_roc_auc = np.mean(roc_auc_scores)
std_roc_auc = np.std(roc_auc_scores)

# Print out the final results
print(f"Average Accuracy: {mean_accuracy:.6f} ± {std_accuracy:.6f}")
print(f"Average ROC-AUC: {mean_roc_auc:.6f} ± {std_roc_auc:.6f}")

Average Accuracy: 0.874119 ± 0.044605
Average ROC-AUC: 0.872075 ± 0.043946
