Import necessary libraries

In [1]:
import csv
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import seaborn as sns
import matplotlib.pyplot as plt


Load the CSV file

In [2]:
# Load the CSV file into a DataFrame
csv_file = 'image_data_with_results.csv'
df = pd.read_csv(csv_file)

# Display the first few rows of the dataframe
df.head()

Unnamed: 0,label,img-1,img-2,img_1_path,img_2_path,result
0,Akshay Kumar,Akshay Kumar_19.jpg,Akshay Kumar_22.jpg,./Images/Akshay Kumar/Akshay Kumar_19.jpg,./Images/Akshay Kumar/Akshay Kumar_22.jpg,True
1,Akshay Kumar,Akshay Kumar_44.jpg,Akshay Kumar_35.jpg,./Images/Akshay Kumar/Akshay Kumar_44.jpg,./Images/Akshay Kumar/Akshay Kumar_35.jpg,True
2,Akshay Kumar,Akshay Kumar_26.jpg,Akshay Kumar_47.jpg,./Images/Akshay Kumar/Akshay Kumar_26.jpg,./Images/Akshay Kumar/Akshay Kumar_47.jpg,True
3,Akshay Kumar,Akshay Kumar_14.jpg,Akshay Kumar_29.jpg,./Images/Akshay Kumar/Akshay Kumar_14.jpg,./Images/Akshay Kumar/Akshay Kumar_29.jpg,True
4,Akshay Kumar,Akshay Kumar_11.jpg,Akshay Kumar_1.jpg,./Images/Akshay Kumar/Akshay Kumar_11.jpg,./Images/Akshay Kumar/Akshay Kumar_1.jpg,False


Map result based on the new logic: 1 if both result and actual result match, 0 if they differ

In [3]:
# Map 'True' and 'False' as boolean
df['result'] = df['result'].map({'True': True, 'False': False})
df['actual_result'] = df['actual_result'].map({'TRUE': True, 'FALSE': False})

# Apply mapping based on the new logic
df['mapped_result'] = df.apply(lambda row: 1 if row['result'] == row['actual_result'] else 0, axis=1)

# Display updated DataFrame with mapped results
df[['result', 'actual_result', 'mapped_result']].head()

Filter out rows where comparison failed (if necessary)

In [4]:
# Assuming 'Comparison Failed' rows are already excluded
# If there is a column with 'Comparison Failed', you can exclude it like this:
# df = df[df['result'] != 'Comparison Failed']

# For now, continue with all rows


Performance Metrics Calculation

In [5]:
# Calculate the performance metrics based on the mapped result
y_true = df['actual_result']
y_pred = df['mapped_result']

# Compute confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)

# Calculate the performance metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

# Display the performance metrics
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

Confusion Matrix and Performance Matrix Visualization

In [6]:
# Visualize the confusion matrix
plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Predicted False', 'Predicted True'], 
            yticklabels=['Actual False', 'Actual True'])
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Visualize the performance metrics in a bar plot
metrics = ['Accuracy', 'Precision', 'Recall', 'F1 Score']
values = [accuracy, precision, recall, f1]

fig, ax1 = plt.subplots(figsize=(8, 5))

sns.barplot(x=metrics, y=values, palette='Blues_d', ax=ax1)
ax1.set_title('Performance Metrics')
ax1.set_ylabel('Score')
ax1.set_ylim(0, 1)

# Create a secondary y-axis to show the percentage
ax2 = ax1.twinx()
ax2.set_ylim(0, 100)
ax2.set_ylabel('Percentage')

# Add percentage labels inside the bars
for i, value in enumerate(values):
    ax1.text(i, value - 0.05, f'{value:.2%}', ha='center', va='center', color='white', weight='bold')

plt.show()