In [10]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Load the correct data and extracted data from CSV files
correct_data = pd.read_csv('real_files/form.csv')
extracted_data = pd.read_csv('files/output.csv')

# convert all fields into string
correct_data = correct_data.astype(str)
extracted_data = extracted_data.astype(str)


In [11]:
from fuzzywuzzy import fuzz
def text_similarity(string1, string2):
    return fuzz.token_set_ratio(string1, string2)

In [12]:


'''img_path,bill_type,bill_date,bill_amount'''


# Merge the two DataFrames on the 'image_path' column to compare data for the same images
merged_data = pd.merge(correct_data, extracted_data, on='img_path', suffixes=('_correct', '_extracted'))

# Evaluate bill_type accuracy
bill_type_accuracy = accuracy_score(merged_data['bill_type_correct'], merged_data['bill_type_extracted'])

# Evaluate bill_date accuracy (you may need to implement text similarity matching)
# For text similarity matching, you can use libraries like fuzzywuzzy or difflib

# Evaluate bill_amount accuracy
bill_amount_accuracy = accuracy_score(merged_data['bill_amount_correct'], merged_data['bill_amount_extracted'])

bill_date_accuracy = accuracy_score(merged_data['bill_date_correct'], merged_data['bill_date_extracted'])

# Calculate precision, recall, and F1-score for each column
bill_type_precision = precision_score(merged_data['bill_type_correct'], merged_data['bill_type_extracted'], average='weighted' ,zero_division=0)
bill_type_recall = recall_score(merged_data['bill_type_correct'], merged_data['bill_type_extracted'], average='weighted',zero_division=0)
bill_type_f1 = f1_score(merged_data['bill_type_correct'], merged_data['bill_type_extracted'], average='weighted',zero_division=0)

bill_amount_precision = precision_score(merged_data['bill_amount_correct'], merged_data['bill_amount_extracted'], average='weighted',zero_division=0)
bill_amount_recall = recall_score(merged_data['bill_amount_correct'], merged_data['bill_amount_extracted'], average='weighted',zero_division=0)
bill_amount_f1 = f1_score(merged_data['bill_amount_correct'], merged_data['bill_amount_extracted'], average='weighted',zero_division=0)

bill_date_precision = precision_score(merged_data['bill_date_extracted'], merged_data['bill_date_extracted'], average='weighted',zero_division=0)
bill_date_recall = recall_score(merged_data['bill_date_extracted'], merged_data['bill_date_extracted'], average='weighted',zero_division=0)
bill_date_f1 = f1_score(merged_data['bill_date_extracted'], merged_data['bill_date_extracted'], average='weighted',zero_division=0)


# Print the results
print("bill_type Accuracy:", bill_type_accuracy)
print("bill_type Precision:", bill_type_precision)
print("bill_type Recall:", bill_type_recall)
print("bill_type F1-Score:", bill_type_f1)

print("bill_amount Accuracy:", bill_amount_accuracy)
print("bill_amount Precision:", bill_amount_precision)
print("bill_amount Recall:", bill_amount_recall)
print("bill_amount F1-Score:", bill_amount_f1)

print("bill_date Accuracy:", bill_date_accuracy)
print("bill_date Precision:", bill_date_precision)
print("bill_date Recall:", bill_date_recall)
print("bill_date F1-Score:", bill_date_f1)


bill_type Accuracy: 1.0
bill_type Precision: 1.0
bill_type Recall: 1.0
bill_type F1-Score: 1.0
bill_amount Accuracy: 0.8214285714285714
bill_amount Precision: 0.8214285714285714
bill_amount Recall: 0.8214285714285714
bill_amount F1-Score: 0.8214285714285714
bill_date Accuracy: 0.9285714285714286
bill_date Precision: 1.0
bill_date Recall: 1.0
bill_date F1-Score: 1.0


In [13]:
# Create a list to store the text similarity scores
text_similarity_scores = []

# Iterate through the rows in your DataFrames
for index, row in merged_data.iterrows():
    correct_date = row['bill_date_correct']
    extracted_date = row['bill_date_extracted']
    
    # Calculate text similarity score (adjust the threshold as needed)
    similarity_score = text_similarity(correct_date, extracted_date)
    
    # Append the similarity score to the list
    text_similarity_scores.append(similarity_score)

# Calculate an accuracy metric based on the similarity scores
# For example, you can consider dates with a similarity score above a threshold as correct
threshold = 90  # Adjust the threshold as needed
correct_count = sum(1 for score in text_similarity_scores if score >= threshold)
total_count = len(text_similarity_scores)
accuracy = correct_count / total_count

# Print the accuracy
print("bill_date Accuracy:", accuracy)

bill_date Accuracy: 1.0
