In [2]:
from rapidfuzz import fuzz, process
from tqdm import tqdm

# First, copy the evaluation function
def get_ngrams(text, n):
    """Generate n-grams from text."""
    words = text.split()
    return [' '.join(words[i:i+n]) for i in range(len(words)-n+1)]

def evaluate_summary(summarization_outputs, summarization_val_dataset, tokenizer=None, verbose=True):
    total_keyword_match_percentage = 0
    
    for output, keywords in tqdm(zip(summarization_outputs, summarization_val_dataset["gpt_keywords"])):
        keyword_matches = 0
        generated_text = output["generated_summary"].lower()
        
        max_keyword_length = max(len(keyword.split()) for keyword in keywords)
        all_ngrams = []
        for n in range(1, max_keyword_length + 1):
            all_ngrams.extend(get_ngrams(generated_text, n))
        
        for keyword in keywords:
            keyword = keyword.lower()
            best_match = process.extractOne(
                keyword, all_ngrams, scorer=fuzz.ratio, score_cutoff=80
            )
            
            if best_match is not None:
                keyword_matches += 1
                if verbose:
                    print(f"Matched '{keyword}' with '{best_match[0]}' (score: {best_match[1]})")
        
        match_percentage = (keyword_matches / len(keywords)) * 100
        
        if verbose:
            clean_summary = output['generated_summary'].replace(tokenizer.pad_token, '') if tokenizer else output['generated_summary']
            print(f"\nCompletion: {clean_summary}")
            print(f"Keywords: {keywords}")
            print(f"Match percentage: {match_percentage}%")
            print("-" * 80)
            
        total_keyword_match_percentage += match_percentage

    return total_keyword_match_percentage / len(summarization_outputs)

# Test cases
def run_tests():
    # Mock tokenizer that does nothing
    class MockTokenizer:
        pad_token = ""
    
    test_cases = [
        {
            "name": "Exact matches",
            "summary": "artificial intelligence is transforming the healthcare industry rapidly",
            "keywords": ["artificial intelligence", "healthcare industry", "transforming"]
        },
        {
            "name": "Partial/fuzzy matches",
            "summary": "artifical intellegence is changing the health industry fast",  # Intentional typos
            "keywords": ["artificial intelligence", "healthcare industry", "transforming"]
        },
        {
            "name": "Long phrase matches",
            "summary": "the quick brown fox jumps over the lazy sleeping dog in the park",
            "keywords": ["quick brown fox", "lazy sleeping dog", "in the park"]
        },
        {
            "name": "Mixed length phrases",
            "summary": "deep learning models are becoming more sophisticated every day",
            "keywords": ["deep learning", "sophisticated", "becoming more sophisticated", "models"]
        },
        {
            "name": "Five-word phrases",
            "summary": "the big red ball bounced down the steep hill quickly and rolled into the river",
            "keywords": ["big red ball bounced down", "rolled into the river", "steep hill quickly"]
        }
    ]
    
    print("Running multi-word keyword matching tests...\n")
    
    for test_case in test_cases:
        print(f"\nTest: {test_case['name']}")
        print("=" * 50)
        
        # Format data for evaluate_summary function
        summarization_outputs = [{"generated_summary": test_case["summary"]}]
        summarization_val_dataset = {"gpt_keywords": [test_case["keywords"]]}
        
        # Run evaluation
        match_percentage = evaluate_summary(
            summarization_outputs,
            summarization_val_dataset,
            MockTokenizer(),
            verbose=True
        )
        
        print(f"\nFinal match percentage: {match_percentage:.2f}%\n")

if __name__ == "__main__":
    run_tests()

Running multi-word keyword matching tests...


Test: Exact matches


1it [00:00, 70.78it/s]


Matched 'artificial intelligence' with 'artificial intelligence' (score: 100.0)
Matched 'healthcare industry' with 'healthcare industry' (score: 100.0)
Matched 'transforming' with 'transforming' (score: 100.0)

Completion: artificial intelligence is transforming the healthcare industry rapidly
Keywords: ['artificial intelligence', 'healthcare industry', 'transforming']
Match percentage: 100.0%
--------------------------------------------------------------------------------

Final match percentage: 100.00%


Test: Partial/fuzzy matches


1it [00:00, 9300.01it/s]


Matched 'artificial intelligence' with 'artifical intellegence' (score: 93.33333333333333)
Matched 'healthcare industry' with 'health industry' (score: 88.23529411764706)

Completion: artifical intellegence is changing the health industry fast
Keywords: ['artificial intelligence', 'healthcare industry', 'transforming']
Match percentage: 66.66666666666666%
--------------------------------------------------------------------------------

Final match percentage: 66.67%


Test: Long phrase matches


1it [00:00, 10131.17it/s]


Matched 'quick brown fox' with 'quick brown fox' (score: 100.0)
Matched 'lazy sleeping dog' with 'lazy sleeping dog' (score: 100.0)
Matched 'in the park' with 'in the park' (score: 100.0)

Completion: the quick brown fox jumps over the lazy sleeping dog in the park
Keywords: ['quick brown fox', 'lazy sleeping dog', 'in the park']
Match percentage: 100.0%
--------------------------------------------------------------------------------

Final match percentage: 100.00%


Test: Mixed length phrases


1it [00:00, 4236.67it/s]


Matched 'deep learning' with 'deep learning' (score: 100.0)
Matched 'sophisticated' with 'sophisticated' (score: 100.0)
Matched 'becoming more sophisticated' with 'becoming more sophisticated' (score: 100.0)
Matched 'models' with 'models' (score: 100.0)

Completion: deep learning models are becoming more sophisticated every day
Keywords: ['deep learning', 'sophisticated', 'becoming more sophisticated', 'models']
Match percentage: 100.0%
--------------------------------------------------------------------------------

Final match percentage: 100.00%


Test: Five-word phrases


1it [00:00, 7825.19it/s]

Matched 'big red ball bounced down' with 'big red ball bounced down' (score: 100.0)
Matched 'rolled into the river' with 'rolled into the river' (score: 100.0)
Matched 'steep hill quickly' with 'steep hill quickly' (score: 100.0)

Completion: the big red ball bounced down the steep hill quickly and rolled into the river
Keywords: ['big red ball bounced down', 'rolled into the river', 'steep hill quickly']
Match percentage: 100.0%
--------------------------------------------------------------------------------

Final match percentage: 100.00%




