In [None]:
import os
import joblib
import numpy as np
import pandas as pd
from sklearn.metrics import roc_curve, auc, confusion_matrix, accuracy_score

## 1.Path

In [None]:
model_type = model_path.split('_')[-1][:-4]
window_size = int(WIN_OVER[0])
csv_save_path = '...'.format(WIN_OVER,model_type)
grouped_data_save_path = '...'.format(window_size,model_type)

S_DATA_ROOT = '...'
S_GROUP_NAME = '...'.format(WIN_OVER)
S_txt_file = os.path.join(S_DATA_ROOT,S_GROUP_NAME+'-label.txt') 
S_feature_save_folder = os.path.join(S_DATA_ROOT,S_GROUP_NAME+'-eilev-feature')

SP_DATA_ROOT = '...'
SP_GROUP_NAME = '...'.format(WIN_OVER)
SP_txt_file = os.path.join(SP_DATA_ROOT,SP_GROUP_NAME+'-label.txt') 
SP_feature_save_folder = os.path.join(SP_DATA_ROOT,SP_GROUP_NAME+'-eilev-feature')
csv_save_path

In [None]:
S_true_NAME = '...'
S_true_txt_file = os.path.join(S_DATA_ROOT,S_true_NAME+'-label.txt')  

SP_true_NAME = '...'
SP_true_txt_file = os.path.join(SP_DATA_ROOT,SP_true_NAME+'-label.txt') 

## 2. feature_path,video_path,true_label

In [None]:
import numpy as np
import pandas as pd
def load_data(txt_path, root_path):
    features = []
    labels = []
    video_paths = []
    feature_paths = []
    with open(txt_path, 'r') as file:
        lines = file.readlines()
    
    for line in lines:
        video_path, video_label = line.strip().split()
        
        video_name = os.path.basename(video_path).replace('.mp4', '.npy')
        feature_path = os.path.join(root_path, video_name)
        
        feature = np.load(feature_path)
        
        features.append(feature.flatten()) 
        labels.append(int(round(float(video_label))))
        video_paths.append(video_path)
        feature_paths.append(feature_path)
    return np.array(features), np.array(labels), video_paths,feature_paths

def merge_data(features1,labels1,video_paths1,feature_paths1,features2,labels2,video_paths2,feature_paths2):
    features = np.vstack((features1, features2)) 
    labels = np.hstack((labels1, labels2))  
    video_paths = video_paths1+video_paths2
    feature_paths = feature_paths1+feature_paths2

    return features, labels, video_paths, feature_paths

In [None]:
features1, labels1 ,video_paths1,feature_paths1= load_data(S_txt_file, S_feature_save_folder)
features2, labels2 ,video_paths2,feature_paths2= load_data(SP_txt_file, SP_feature_save_folder)


merged_features,  merged_labels,  merged_video_paths, merged_feature_paths= merge_data(features1, labels1, video_paths1,feature_paths1,features2, labels2,video_paths2,feature_paths2)


## 3. Prediction

In [None]:
def predict_with_model(features, model_path):
    clf = joblib.load(model_path)
    
    predictions = clf.predict_proba(features)[:, 1]

    return predictions

def evaluate_predictions(true_labels, predictions):
    fpr, tpr, _ = roc_curve(true_labels, predictions)
    roc_auc = auc(fpr, tpr)
    print("ROC_AUC",roc_auc)
    
    predictions = [1 if avg >= 0.5 else 0 for avg in predictions]

    cm = confusion_matrix(true_labels, predictions)
    print("Confusion Matrix:\n", cm)
    
    accuracy = accuracy_score(true_labels, predictions)
    print(f"Accuracy: {accuracy * 100:.2f}%")
    
    
    return cm, accuracy,roc_auc


In [None]:
win_predictions = predict_with_model(merged_features, model_path)
cm, accuracy,roc_auc = evaluate_predictions(merged_labels, win_predictions)

## 4. Window mean

In [None]:
data = {
    'True':merged_labels,
    'Video_Path':merged_video_paths,
    'Feature_Path':merged_feature_paths,
    'win_predictions':win_predictions
}
df = pd.DataFrame(data)

In [None]:
df['video_id'] = df['Video_Path'].apply(lambda x: os.path.basename(os.path.dirname(x)))

In [None]:
df.head()

In [None]:
df.to_csv(csv_save_path, index=False)  
df.head()

In [None]:
def sliding_window_mean(arr, window_size):
    result = []
    n = len(arr)
    for i in range(n + window_size - 1):
        window = arr[max(0, i + 1 - window_size):min(n, i + 1)]
        mean_value = sum(window) / len(window)
        result.append(mean_value)
    return result

In [None]:
grouped_data = df.groupby('video_id')['win_predictions'].apply(list).reset_index()


grouped_data['win_mean'] = grouped_data['win_predictions'].apply(lambda x: sliding_window_mean(x, window_size))

In [None]:
grouped_data.head()

In [None]:
grouped_data.tail()

In [None]:

def create_df_from_txt(file_path):
    data = []
    with open(file_path, 'r') as file:
        for line in file:
            path, label = line.strip().split() 
            data.append([path, label])
    
    df = pd.DataFrame(data, columns=['path', 'label'])
    
    df['last_folder'] = df['path'].apply(lambda x: os.path.basename(os.path.dirname(x)))
    
    return df


In [None]:

df_S = create_df_from_txt(S_true_txt_file)
df_SP = create_df_from_txt(SP_true_txt_file)

df_combined = pd.concat([df_S, df_SP], ignore_index=True)


In [None]:
df_combined.head()

In [None]:

label_lists = df_combined.groupby('last_folder')['label'].apply(list).reset_index()

grouped_data = pd.merge(grouped_data, label_lists, left_on='video_id', right_on='last_folder', how='left')


grouped_data = grouped_data.drop(columns=['last_folder'])


In [None]:
grouped_data.to_csv(grouped_data_save_path, index=False)  
grouped_data.head()