In [1]:
# load an article
import os
ABSOLUTE_PATH = os.getcwd()

import sys
ROOT_PATH = '/'.join(ABSOLUTE_PATH.split('/')[:-1])
sys.path.append(ROOT_PATH)


from main.document import Document
from pprint import pprint
input_file_path = f'{ROOT_PATH}/data/article.txt'
document = Document.load_from_local(input_file_path)
pprint(document.content)

('Teenagers in foster care in Scotland are being moved too often, according to '
 'a campaign group. Research carried out\n'
 'by the Fostering Network suggests almost half of fostered young people are '
 'already living with their third foster\n'
 'family since going into care. The group has warned that 750 more foster '
 'carers are \\"urgently\\" needed to meet the\n'
 'demands of the care system. It urged people to \\"open their hearts and '
 'homes\\" to vulnerable youngsters. Currently,\n'
 'more than 5,500 children are in foster care in Scotland, living with 4,400 '
 'families and carers. The Fostering Network\n'
 'surveyed 250 children, teenagers and foster carers across Scotland and '
 'discovered that many young people had failed\n'
 'to find stability. Almost half were already living with their third family, '
 'a quarter were with their fourth family\n'
 'and about 20 were living with their 10th family since going into care. There '
 'was a particular need for homes to be\n

In [4]:
# Generate 20 candidate summarization reports
num_of_candidate = 20


has_title = True                   # has a title
min_num_of_char_in_title = 32      # 32 <= title length <= 80 
max_num_of_char_in_title = 80
compression_rate = 0.3             # compression rate = 0.3
min_num_of_paragraph = 2           # 2 <= number of paragraphs <= 4
max_num_of_paragraph = 4

from openai import AsyncOpenAI
llm_api_client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="dummy_key")
model = "deepseek-r1:8b"

from main.summarizer import BestHitLLMSummarizer
summarizer = BestHitLLMSummarizer(
    client=llm_api_client, 
    model=model,
    has_title=has_title,
    min_num_of_char_in_title=min_num_of_char_in_title,
    max_num_of_char_in_title=max_num_of_char_in_title,
    min_num_of_paragraph=min_num_of_paragraph,
    max_num_of_paragraph=max_num_of_paragraph,
    num_tries=num_of_candidate,
    llm_as_judge=False)


report = await summarizer.summarize(document=document)

Summarize at iteration 0
Summarize at iteration 1
Summarize at iteration 2
Summarize at iteration 3
Summarize at iteration 4
Summarize at iteration 5
Summarize at iteration 6
Summarize at iteration 7
Summarize at iteration 8
Summarize at iteration 9
Summarize at iteration 10
Summarize at iteration 11
Summarize at iteration 12
Summarize at iteration 13
Summarize at iteration 14
Summarize at iteration 15
Summarize at iteration 16
Summarize at iteration 17
Summarize at iteration 18
Summarize at iteration 19
Number of valid reports: 20
Valid report 1; Bert-Score-F1: 0.6778725385665894; Roger-Score-F1: 0.5373134328358209
Valid report 2; Bert-Score-F1: 0.6651332974433899; Roger-Score-F1: 0.4371584699453552
Valid report 3; Bert-Score-F1: 0.6436493396759033; Roger-Score-F1: 0.44
Valid report 4; Bert-Score-F1: 0.6989108920097351; Roger-Score-F1: 0.4166666666666667
Valid report 5; Bert-Score-F1: 0.5905027389526367; Roger-Score-F1: 0.37398373983739835
Valid report 6; Bert-Score-F1: 0.638892531394

In [5]:
# Show the best report selected and its bert-score and rouge-score
from main.llm_as_judge import Reference
from main.metrics import BertScoreMetricExtractor
bert_score_metric = BertScoreMetricExtractor(reference=Reference(content=document.content)).extract(report=report)
print(f'metric name: {bert_score_metric.name}, metric value: {bert_score_metric.value}')

metric name: Bert-Score-Metric, metric value: 0.6778725385665894


In [6]:
from main.metrics import RougeScoreMetricExtractor
rouge_score_metric = RougeScoreMetricExtractor(reference=Reference(content=document.content)).extract(report=report)
print(f'metric name: {rouge_score_metric.name}, metric value: {rouge_score_metric.value}')

metric name: Rouge-Score-Metric, metric value: 0.5373134328358209
