In [None]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.translate.meteor_score import meteor_score
from rouge import Rouge
from sklearn.metrics import f1_score
from collections import Counter

!python3 -m nltk.downloader -d /Users/moiz/nltk_data punkt
nltk.data.path.append('/Users/moiz/nltk_data')

def read_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read().strip()

def calculate_bleu(reference, candidate):
    reference_tokens = [nltk.word_tokenize(reference)]
    candidate_tokens = nltk.word_tokenize(candidate)
    smooth = SmoothingFunction().method1
    return sentence_bleu(reference_tokens, candidate_tokens, smoothing_function=smooth)

def calculate_meteor(reference, candidate):
    reference_tokens = nltk.word_tokenize(reference)
    candidate_tokens = nltk.word_tokenize(candidate)
    return meteor_score([reference_tokens], candidate_tokens)

def calculate_rouge(reference, candidate):
    rouge = Rouge()
    scores = rouge.get_scores(candidate, reference)[0]
    return scores

def calculate_f1(reference, candidate):
    reference_tokens = nltk.word_tokenize(reference)
    candidate_tokens = nltk.word_tokenize(candidate)
    common = Counter(reference_tokens) & Counter(candidate_tokens)
    true_positives = sum(common.values())
    pred_positives = len(candidate_tokens)
    real_positives = len(reference_tokens)
    precision = true_positives / pred_positives if pred_positives > 0 else 0
    recall = true_positives / real_positives if real_positives > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    return f1

# Main Data

In [None]:
# Paths to your files
path_to_reference = 'reference.txt'  # Path to the expert's answer file
path_to_candidate = 'candidate.txt'  # Path to the RAG's answer file

# Read the contents
reference_text = read_file(path_to_reference)
candidate_text = read_file(path_to_candidate)

# Calculate metrics
bleu_score = calculate_bleu(reference_text, candidate_text)
meteor_score = calculate_meteor(reference_text, candidate_text)
rouge_scores = calculate_rouge(reference_text, candidate_text)
f1_score = calculate_f1(reference_text, candidate_text)

# Print results
print("BLEU Score:", bleu_score)
print("METEOR Score:", meteor_score)
print("ROUGE Scores:", rouge_scores)
print("F1 Score:", f1_score)

# Seller

# Paths to your files
path_to_reference = 'reference.txt'  # Path to the expert's answer file
path_to_candidate = 'candidate.txt'  # Path to the RAG's answer file

# Read the contents
reference_text = read_file(path_to_reference)
candidate_text = read_file(path_to_candidate)

# Calculate metrics
bleu_score = calculate_bleu(reference_text, candidate_text)
meteor_score = calculate_meteor(reference_text, candidate_text)
rouge_scores = calculate_rouge(reference_text, candidate_text)
f1_score = calculate_f1(reference_text, candidate_text)

# Print results
print("BLEU Score:", bleu_score)
print("METEOR Score:", meteor_score)
print("ROUGE Scores:", rouge_scores)
print("F1 Score:", f1_score)
