## Inference & Performance evaluation

In [6]:
from rouge_score import rouge_scorer
from typing import Dict, List

class AnswerEvaluator:
    def __init__(self):
        self.scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

    def calculate_rouge_scores(self, generated_text: str, reference_text: str) -> Dict:
        """ROUGE Score"""
        scores = self.scorer.score(reference_text, generated_text)
        
        return {
            'rouge1': {
                'precision': scores['rouge1'].precision,
                'recall': scores['rouge1'].recall,
                'fmeasure': scores['rouge1'].fmeasure
            },
            'rouge2': {
                'precision': scores['rouge2'].precision,
                'recall': scores['rouge2'].recall,
                'fmeasure': scores['rouge2'].fmeasure
            },
            'rougeL': {
                'precision': scores['rougeL'].precision,
                'recall': scores['rougeL'].recall,
                'fmeasure': scores['rougeL'].fmeasure
            }
        }

    def evaluate_answer(self, generated_text: str, reference_text: str) -> Dict:
        """Overal results"""
        rouge_scores = self.calculate_rouge_scores(generated_text, reference_text)
        
        return {
            'rouge_scores': rouge_scores,
            'average_f1': sum(score['fmeasure'] for score in rouge_scores.values()) / 3
        }

# Execution
def main():
    # test text
    generated = """Claiming business expenses for a business with no income can be a tricky task, but it is not impossible. 
    There are a few things to keep in mind when claiming business expenses when you have no income. First, you will need to be able 
    to prove that the expenses were incurred in the course of running your business. This can be done by providing receipts, 
    invoices, or other documentation. You will also need to keep track of your expenses throughout the year so that you can 
    accurately report them on your tax return. Finally, it is important to consult with a tax professional to ensure that you 
    are taking advantage of all of the deductions and credits that are available to you."""

    reference = """Yes you can claim your business deductions if you are not making any income yet. But first you should decide 
    what structure you want to have for your business. Either a Company structure or a Sole Trader or Partnership. If you choose 
    a Company Structure (which is more expensive to set up) you would claim your deductions but no income. So you would be making 
    a loss, and continue making losses until your income from the business exceed your expenses."""

    # Evalution
    evaluator = AnswerEvaluator()
    results = evaluator.evaluate_answer(generated, reference)

    # Result
    print("\n=== ROUGE Scores ===")
    for rouge_type, scores in results['rouge_scores'].items():
        print(f"\n{rouge_type}:")
        print(f"Precision: {scores['precision']:.4f}")
        print(f"Recall: {scores['recall']:.4f}")
        print(f"F1-Score: {scores['fmeasure']:.4f}")
    
    print(f"\nAverage F1 Score: {results['average_f1']:.4f}")

if __name__ == "__main__":
    main()


=== ROUGE Scores ===

rouge1:
Precision: 0.3197
Recall: 0.4875
F1-Score: 0.3861

rouge2:
Precision: 0.0496
Recall: 0.0759
F1-Score: 0.0600

rougeL:
Precision: 0.1475
Recall: 0.2250
F1-Score: 0.1782

Average F1 Score: 0.2081


In [7]:
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from typing import Dict, List

class SemanticEvaluator:
    def __init__(self):
        # Load model for sentence embedding
        self.model = SentenceTransformer('all-MiniLM-L6-v2')
        
    def calculate_semantic_similarity(self, generated_text: str, reference_text: str) -> Dict:
        """Sentence-level semantic similarity calculation"""
        # Split sentence
        gen_sentences = generated_text.split('. ')
        ref_sentences = reference_text.split('. ')
        
        # Embedding
        gen_embeddings = self.model.encode(gen_sentences)
        ref_embeddings = self.model.encode(ref_sentences)
        
        # Cosine similarity
        similarity_matrix = cosine_similarity(gen_embeddings, ref_embeddings)
        
        # Maximum similarity at each sentence
        max_similarities = np.max(similarity_matrix, axis=1)
        
        return {
            'overall_similarity': float(np.mean(max_similarities)),
            'sentence_similarities': [float(sim) for sim in max_similarities],
            'max_similarity': float(np.max(max_similarities)),
            'min_similarity': float(np.min(max_similarities))
        }
    
    def analyze_key_concepts(self, text: str, key_concepts: List[str]) -> Dict:
        """Analysis of semantic inclusion of key concepts"""
        text_embedding = self.model.encode([text])[0]
        concept_embeddings = self.model.encode(key_concepts)
        
        similarities = cosine_similarity([text_embedding], concept_embeddings)[0]
        
        concept_coverage = {}
        for concept, similarity in zip(key_concepts, similarities):
            concept_coverage[concept] = float(similarity)
            
        return {
            'concept_coverage': concept_coverage,
            'average_coverage': float(np.mean(similarities))
        }

def main():
    generated = """Claiming business expenses for a business with no income can be a tricky task, but it is not impossible. 
    There are a few things to keep in mind when claiming business expenses when you have no income. First, you will need to be able 
    to prove that the expenses were incurred in the course of running your business."""

    reference = """Yes you can claim your business deductions if you are not making any income yet. But first you should decide 
    what structure you want to have for your business. Either a Company structure or a Sole Trader or Partnership."""
    
    key_concepts = [
        "business expenses claiming",
        "no income situation",
        "business structure",
        "tax deductions",
        "company formation"
    ]

    evaluator = SemanticEvaluator()
    
    # Semantic similarity assessment
    similarity_results = evaluator.calculate_semantic_similarity(generated, reference)
    
    # Analysis including key concepts
    concept_results = evaluator.analyze_key_concepts(generated, key_concepts)
    
    # Result
    print("\n=== Semantic Similarity Results ===")
    print(f"Overall Similarity: {similarity_results['overall_similarity']:.4f}")
    print(f"Maximum Similarity: {similarity_results['max_similarity']:.4f}")
    print(f"Minimum Similarity: {similarity_results['min_similarity']:.4f}")
    
    print("\n=== Key Concept Coverage ===")
    for concept, score in concept_results['concept_coverage'].items():
        print(f"{concept}: {score:.4f}")
    print(f"\nAverage Concept Coverage: {concept_results['average_coverage']:.4f}")

if __name__ == "__main__":
    main()

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


=== Semantic Similarity Results ===
Overall Similarity: 0.6428
Maximum Similarity: 0.7262
Minimum Similarity: 0.4854

=== Key Concept Coverage ===
business expenses claiming: 0.6531
no income situation: 0.4554
business structure: 0.1644
tax deductions: 0.2924
company formation: 0.0897

Average Concept Coverage: 0.3310
