In [15]:
import os
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

### Read the interquartile/ alternate frame dataset

In [None]:
jpeg_files = []

# Set the path of the directory you want to search
path_1 = "./models-og/trained_models_cam/fold_1/_preds_last_epoch_rend.csv"
path_2 = "./models-og/trained_models_cam/fold_1/_test_file.csv"

In [16]:
df1 = pd.read_csv(path_1).drop(columns='index')
df1.head()

Unnamed: 0,0,1
0,0.79016,0.541937
1,0.790646,0.295504
2,0.481589,0.413457
3,0.416499,0.634367
4,0.557233,0.590527


In [17]:
df2 = pd.read_csv(path_2)
df2.head()

Unnamed: 0,file_path
0,abnormal-R90-m10-cut_mid_frame_578.jpg
1,abnormal-R19-m11-cut_mid_frame_164.jpg
2,abnormal-R96-m5-cut_mid_frame_466.jpg
3,abnormal-R67-m7-cut_mid_frame_158.jpg
4,abnormal-R43-m7-cut_mid_frame_264.jpg


In [18]:
print(df1.shape)
df2.shape

(19080, 2)


(19080, 1)

In [19]:
df = pd.concat([df1, df2], axis=1)
df = df.rename(columns={'0': 'abnormal', '1': 'normal'})

print(df)

       abnormal    normal                               file_path
0      0.790160  0.541937  abnormal-R90-m10-cut_mid_frame_578.jpg
1      0.790646  0.295504  abnormal-R19-m11-cut_mid_frame_164.jpg
2      0.481589  0.413457   abnormal-R96-m5-cut_mid_frame_466.jpg
3      0.416499  0.634367   abnormal-R67-m7-cut_mid_frame_158.jpg
4      0.557233  0.590527   abnormal-R43-m7-cut_mid_frame_264.jpg
...         ...       ...                                     ...
19075  0.719985  0.380071    normal-R91-m10-cut_mid_frame_617.jpg
19076  0.604230  0.674178     normal-R62-m7-cut_mid_frame_165.jpg
19077  0.881047  0.306737    normal-R57-m11-cut_mid_frame_139.jpg
19078  0.627480  0.447287     normal-R62-m7-cut_mid_frame_171.jpg
19079  0.629005  0.642010     normal-R59-m5-cut_mid_frame_389.jpg

[19080 rows x 3 columns]


In [20]:
# Loop through the rows and update the values in column 'C'
for index, row in df.iterrows():
    if row['abnormal'] > row['normal']:
        df.loc[index, 'y_pred'] = 1
    else:
        df.loc[index, 'y_pred'] = 0
    
    if 'abnormal' in row['file_path']:
        df.loc[index, 'y_true'] = 1
    else:
        df.loc[index, 'y_true'] = 0
    
    df.loc[index, 'clip'] = '-'.join(row['file_path'].split('-')[:3])

# Print the updated dataframe
print(df[['file_path', 'clip']])

                                    file_path              clip
0      abnormal-R90-m10-cut_mid_frame_578.jpg  abnormal-R90-m10
1      abnormal-R19-m11-cut_mid_frame_164.jpg  abnormal-R19-m11
2       abnormal-R96-m5-cut_mid_frame_466.jpg   abnormal-R96-m5
3       abnormal-R67-m7-cut_mid_frame_158.jpg   abnormal-R67-m7
4       abnormal-R43-m7-cut_mid_frame_264.jpg   abnormal-R43-m7
...                                       ...               ...
19075    normal-R91-m10-cut_mid_frame_617.jpg    normal-R91-m10
19076     normal-R62-m7-cut_mid_frame_165.jpg     normal-R62-m7
19077    normal-R57-m11-cut_mid_frame_139.jpg    normal-R57-m11
19078     normal-R62-m7-cut_mid_frame_171.jpg     normal-R62-m7
19079     normal-R59-m5-cut_mid_frame_389.jpg     normal-R59-m5

[19080 rows x 2 columns]


In [21]:
df = df.drop(columns='file_path', axis=1)
df = df.sort_values('clip')

df.head()

Unnamed: 0,abnormal,normal,y_pred,y_true,clip
6098,0.481814,0.499828,0.0,1.0,abnormal-R101-m4
10946,0.634232,0.50719,1.0,1.0,abnormal-R101-m4
6199,0.325768,0.630033,0.0,1.0,abnormal-R101-m4
13472,0.659384,0.153361,1.0,1.0,abnormal-R101-m4
6224,0.684962,0.366289,1.0,1.0,abnormal-R101-m4


## Final 62 clip classification

In [22]:
# Get distinct column values and their counts
new_df = df['clip'].value_counts().reset_index()

# Rename the columns of the new DataFrame
new_df.columns = ['clip', 'total_frames']

# # Print the new DataFrame
print(new_df)


                clip  total_frames
0   abnormal-R90-m10           482
1    abnormal-R90-m8           478
2     normal-R91-m12           468
3    abnormal-R79-m9           462
4   abnormal-R101-m6           454
..               ...           ...
57     normal-R33-m3           196
58    normal-R12-m11           193
59   abnormal-R43-m5           192
60   abnormal-R33-m5           192
61    normal-R57-m11           114

[62 rows x 2 columns]


In [23]:
new_df_ = df.groupby('clip').agg({'y_pred': 'sum'}).reset_index()

# Rename the columns of the new DataFrame
new_df_.columns = ['clip', 'abn_pred_frames']

# Print the new DataFrame
print(new_df_)

                 clip  abn_pred_frames
0    abnormal-R101-m4            264.0
1    abnormal-R101-m6            233.0
2   abnormal-R102-m11             45.0
3    abnormal-R102-m9             68.0
4    abnormal-R110-m7             16.0
..                ...              ...
57      normal-R67-m5             39.0
58      normal-R71-m9            206.0
59      normal-R74-m7             34.0
60     normal-R91-m10            375.0
61     normal-R91-m12            313.0

[62 rows x 2 columns]


In [24]:
# Merge the dataframes based on the common 'ID' column
merged_df = pd.merge(new_df, new_df_, on='clip')

print(merged_df)

                clip  total_frames  abn_pred_frames
0   abnormal-R90-m10           482            415.0
1    abnormal-R90-m8           478            423.0
2     normal-R91-m12           468            313.0
3    abnormal-R79-m9           462            420.0
4   abnormal-R101-m6           454            233.0
..               ...           ...              ...
57     normal-R33-m3           196             19.0
58    normal-R12-m11           193            110.0
59   abnormal-R43-m5           192             41.0
60   abnormal-R33-m5           192             12.0
61    normal-R57-m11           114             90.0

[62 rows x 3 columns]


In [28]:
merged_df['abn_ratio'] = merged_df['abn_pred_frames']/ merged_df['total_frames']
print(merged_df)

                clip  total_frames  abn_pred_frames  abn_ratio
0   abnormal-R90-m10           482            415.0   0.860996
1    abnormal-R90-m8           478            423.0   0.884937
2     normal-R91-m12           468            313.0   0.668803
3    abnormal-R79-m9           462            420.0   0.909091
4   abnormal-R101-m6           454            233.0   0.513216
..               ...           ...              ...        ...
57     normal-R33-m3           196             19.0   0.096939
58    normal-R12-m11           193            110.0   0.569948
59   abnormal-R43-m5           192             41.0   0.213542
60   abnormal-R33-m5           192             12.0   0.062500
61    normal-R57-m11           114             90.0   0.789474

[62 rows x 4 columns]


In [29]:
# Apply a lambda function to the 'clip' column to create a new column 'is_abn'
merged_df['is_abn'] = merged_df['clip'].apply(lambda x: 1 if 'abnormal' in x else 0)

print(merged_df)

                clip  total_frames  abn_pred_frames  abn_ratio  is_abn
0   abnormal-R90-m10           482            415.0   0.860996       1
1    abnormal-R90-m8           478            423.0   0.884937       1
2     normal-R91-m12           468            313.0   0.668803       0
3    abnormal-R79-m9           462            420.0   0.909091       1
4   abnormal-R101-m6           454            233.0   0.513216       1
..               ...           ...              ...        ...     ...
57     normal-R33-m3           196             19.0   0.096939       0
58    normal-R12-m11           193            110.0   0.569948       0
59   abnormal-R43-m5           192             41.0   0.213542       1
60   abnormal-R33-m5           192             12.0   0.062500       1
61    normal-R57-m11           114             90.0   0.789474       0

[62 rows x 5 columns]


In [30]:
print(merged_df.sort_values(by='clip'))

                 clip  total_frames  abn_pred_frames  abn_ratio  is_abn
5    abnormal-R101-m4           440            264.0   0.600000       1
4    abnormal-R101-m6           454            233.0   0.513216       1
47  abnormal-R102-m11           230             45.0   0.195652       1
44   abnormal-R102-m9           252             68.0   0.269841       1
34   abnormal-R110-m7           291             16.0   0.054983       1
..                ...           ...              ...        ...     ...
11      normal-R67-m5           368             39.0   0.105978       0
45      normal-R71-m9           244            206.0   0.844262       0
49      normal-R74-m7           227             34.0   0.149780       0
6      normal-R91-m10           434            375.0   0.864055       0
2      normal-R91-m12           468            313.0   0.668803       0

[62 rows x 5 columns]


In [31]:
# merged_df.sort_values(by='clip').to_csv('output_alt.csv', index=False)
merged_df.sort_values(by='clip').to_csv('output_mid.csv', index=False)

# Run directly below directly to test classification result

In [32]:
df = pd.read_csv('output_mid.csv')
df.head()

Unnamed: 0,clip,total_frames,abn_pred_frames,abn_ratio,is_abn
0,abnormal-R101-m4,440,264.0,0.6,1
1,abnormal-R101-m6,454,233.0,0.513216,1
2,abnormal-R102-m11,230,45.0,0.195652,1
3,abnormal-R102-m9,252,68.0,0.269841,1
4,abnormal-R110-m7,291,16.0,0.054983,1


In [33]:
# setting threshold
thresholds = [0.4, 0.45, 0.5, 0.55, 0.59]

for threshold in thresholds:

    # Ground truth labels
    true_labels = df['is_abn']

    # Predicted labels
    predicted_labels = df['abn_ratio'].apply(lambda x: 0 if x <= threshold else 1)

    
    # Calculate accuracy
    accuracy = accuracy_score(true_labels, predicted_labels)

    # Calculate precision
    precision = precision_score(true_labels, predicted_labels)

    # Calculate recall (sensitivity)
    recall = recall_score(true_labels, predicted_labels)

    # Calculate F1-score
    f1 = f1_score(true_labels, predicted_labels)

    # Print the results
    print("Threshold:", threshold)
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall (Sensitivity):", recall)
    print("F1-score:", f1)

Threshold: 0.4
Accuracy: 0.6774193548387096
Precision: 0.7906976744186046
Recall (Sensitivity): 0.7555555555555555
F1-score: 0.7727272727272727
Threshold: 0.45
Accuracy: 0.6774193548387096
Precision: 0.7906976744186046
Recall (Sensitivity): 0.7555555555555555
F1-score: 0.7727272727272727
Threshold: 0.5
Accuracy: 0.6451612903225806
Precision: 0.7804878048780488
Recall (Sensitivity): 0.7111111111111111
F1-score: 0.7441860465116279
Threshold: 0.55
Accuracy: 0.5967741935483871
Precision: 0.7631578947368421
Recall (Sensitivity): 0.6444444444444445
F1-score: 0.6987951807228916
Threshold: 0.59
Accuracy: 0.6290322580645161
Precision: 0.8055555555555556
Recall (Sensitivity): 0.6444444444444445
F1-score: 0.7160493827160493


In [34]:
df = pd.read_csv('output_alt.csv')
df.head()

Unnamed: 0,clip,total_frames,abn_pred_frames,abn_ratio,is_abn
0,abnormal-R101-m4,440,258,0.586364,1
1,abnormal-R101-m6,455,230,0.586364,1
2,abnormal-R102-m11,230,45,0.586364,1
3,abnormal-R102-m9,252,71,0.586364,1
4,abnormal-R110-m7,291,15,0.586364,1


In [36]:
# setting threshold
thresholds = [0.4, 0.45, 0.5, 0.55, 0.59]

for threshold in thresholds:

    # Ground truth labels
    true_labels = df['is_abn']

    # Predicted labels
    predicted_labels = df['abn_ratio'].apply(lambda x: 0 if x <= threshold else 1)

    # Calculate accuracy
    accuracy = accuracy_score(true_labels, predicted_labels)

    # Calculate precision
    precision = precision_score(true_labels, predicted_labels)

    # Calculate recall (sensitivity)
    recall = recall_score(true_labels, predicted_labels)

    # Calculate F1-score
    f1 = f1_score(true_labels, predicted_labels)
    
    # Create the confusion matrix
    cm = confusion_matrix(true_labels, predicted_labels)


    # Print the results
    print("Threshold:", threshold)
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall (Sensitivity):", recall)
    print("F1-score:", f1)
    
    # Print the confusion matrix
    print("Confusion Matrix:")
    print(cm)
    
    # Calculate precision at each class label level
    precision = precision_score(true_labels, predicted_labels, average=None)

    # Calculate recall at each class label level
    recall = recall_score(true_labels, predicted_labels, average=None)

    # Calculate F1-score at each class label level
    f1 = f1_score(true_labels, predicted_labels, average=None)

    # Print the metrics at each class label level
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1-score:", f1)



Threshold: 0.4
Accuracy: 0.8548387096774194
Precision: 0.8333333333333334
Recall (Sensitivity): 1.0
F1-score: 0.9090909090909091
Confusion Matrix:
[[ 8  9]
 [ 0 45]]
Precision: [1.         0.83333333]
Recall: [0.47058824 1.        ]
F1-score: [0.64       0.90909091]
Threshold: 0.45
Accuracy: 0.8548387096774194
Precision: 0.8333333333333334
Recall (Sensitivity): 1.0
F1-score: 0.9090909090909091
Confusion Matrix:
[[ 8  9]
 [ 0 45]]
Precision: [1.         0.83333333]
Recall: [0.47058824 1.        ]
F1-score: [0.64       0.90909091]
Threshold: 0.5
Accuracy: 0.8548387096774194
Precision: 0.8333333333333334
Recall (Sensitivity): 1.0
F1-score: 0.9090909090909091
Confusion Matrix:
[[ 8  9]
 [ 0 45]]
Precision: [1.         0.83333333]
Recall: [0.47058824 1.        ]
F1-score: [0.64       0.90909091]
Threshold: 0.55
Accuracy: 0.8548387096774194
Precision: 0.8333333333333334
Recall (Sensitivity): 1.0
F1-score: 0.9090909090909091
Confusion Matrix:
[[ 8  9]
 [ 0 45]]
Precision: [1.         0.833333