In [19]:
import pandas as P
import requests
import os
from dotenv import load_dotenv
from tqdm import tqdm
from controller.summary_generator import SummaryGeneratorController
from gutenberg_rag import get_llm
from controller.gutenberg_text_splitter import create_document



API_TOKEN = os.getenv("HUGGING_FACE_API_TOKEN")

## LLM


In [31]:
llm = get_llm()

In [21]:

headers = {"Authorization": f"Bearer {API_TOKEN}"}

API_URL = "https://datasets-server.huggingface.co/rows?dataset=kmfoda%2Fbooksum&config=default&split=train&&length=10"

def query():
    response = requests.get(API_URL, headers=headers)
    return response.json()
data = query()

In [22]:
data

{'features': [{'feature_idx': 0,
   'name': 'bid',
   'type': {'dtype': 'int64', '_type': 'Value'}},
  {'feature_idx': 1,
   'name': 'is_aggregate',
   'type': {'dtype': 'bool', '_type': 'Value'}},
  {'feature_idx': 2,
   'name': 'source',
   'type': {'dtype': 'string', '_type': 'Value'}},
  {'feature_idx': 3,
   'name': 'chapter_path',
   'type': {'dtype': 'string', '_type': 'Value'}},
  {'feature_idx': 4,
   'name': 'summary_path',
   'type': {'dtype': 'string', '_type': 'Value'}},
  {'feature_idx': 5,
   'name': 'book_id',
   'type': {'dtype': 'string', '_type': 'Value'}},
  {'feature_idx': 6,
   'name': 'summary_id',
   'type': {'dtype': 'string', '_type': 'Value'}},
  {'feature_idx': 7,
   'name': 'content',
   'type': {'dtype': 'float64', '_type': 'Value'}},
  {'feature_idx': 8,
   'name': 'summary',
   'type': {'dtype': 'string', '_type': 'Value'}},
  {'feature_idx': 9,
   'name': 'chapter',
   'type': {'dtype': 'string', '_type': 'Value'}},
  {'feature_idx': 10,
   'name': 'cha

In [23]:
data['rows']

[{'row_idx': 0,
  'row': {'bid': 27681,
   'is_aggregate': True,
   'source': 'cliffnotes',
   'chapter_path': 'all_chapterized_books/27681-chapters/chapters_1_to_2.txt',
   'summary_path': 'finished_summaries/cliffnotes/The Last of the Mohicans/section_1_part_0.txt',
   'book_id': 'The Last of the Mohicans.chapters 1-2',
   'summary_id': 'chapters 1-2',
   'content': None,
   'summary': '{"name": "Chapters 1-2", "url": "https://web.archive.org/web/20201101053205/https://www.cliffsnotes.com/literature/l/the-last-of-the-mohicans/summary-and-analysis/chapters-12", "summary": "Before any characters appear, the time and geography are made clear. Though it is the last war that England and France waged for a country that neither would retain, the wilderness between the forces still has to be overcome first. Thus it is in 1757, in the New York area between the head waters of the Hudson River and Lake George to the north. Because only two years earlier General Braddock was disgracefully routed

# Dataset formation

In [24]:
df = P.DataFrame.from_dict(P.json_normalize(data['rows']), orient='columns')

In [25]:
df

Unnamed: 0,row_idx,truncated_cells,row.bid,row.is_aggregate,row.source,row.chapter_path,row.summary_path,row.book_id,row.summary_id,row.content,row.summary,row.chapter,row.chapter_length,row.summary_name,row.summary_url,row.summary_text,row.summary_analysis,row.summary_length,row.analysis_length
0,0,[],27681,True,cliffnotes,all_chapterized_books/27681-chapters/chapters_...,finished_summaries/cliffnotes/The Last of the ...,The Last of the Mohicans.chapters 1-2,chapters 1-2,,"{""name"": ""Chapters 1-2"", ""url"": ""https://web.a...","\n ""Mine ear is open, and my heart prepared:\...",6471.0,Chapters 1-2,https://web.archive.org/web/20201101053205/htt...,"Before any characters appear, the time and geo...",These two chapters introduce the reader to the...,388.0,473.0
1,1,[],27681,False,cliffnotes,all_chapterized_books/27681-chapters/03.txt,finished_summaries/cliffnotes/The Last of the ...,The Last of the Mohicans.chapter 3,chapter 3,,"{""name"": ""Chapter 3"", ""url"": ""https://web.arch...","\n ""Before these fields were shorn and tilled...",3132.0,Chapter 3,https://web.archive.org/web/20201101053205/htt...,In another part of the forest by the river a f...,This chapter introduces the other three main a...,198.0,149.0
2,2,[],27681,False,cliffnotes,all_chapterized_books/27681-chapters/04.txt,finished_summaries/cliffnotes/The Last of the ...,The Last of the Mohicans.chapter 4,chapter 4,,"{""name"": ""Chapter 4"", ""url"": ""https://web.arch...","\n ""Well, go thy way: thou shalt not from thi...",3075.0,Chapter 4,https://web.archive.org/web/20201101053205/htt...,When the mounted party from Fort Howard approa...,Since this chapter is mostly one of surface ac...,319.0,75.0
3,3,[],27681,False,cliffnotes,all_chapterized_books/27681-chapters/05.txt,finished_summaries/cliffnotes/The Last of the ...,The Last of the Mohicans.chapter 5,chapter 5,,"{""name"": ""Chapter 5"", ""url"": ""https://web.arch...","\n ""In such a night\n Di...",3268.0,Chapter 5,https://web.archive.org/web/20201101053205/htt...,"The pursuit of Magua is unsuccessful, but Hawk...",Here the reader encounters the first bloodshed...,329.0,156.0
4,4,[],27681,False,cliffnotes,all_chapterized_books/27681-chapters/06.txt,finished_summaries/cliffnotes/The Last of the ...,The Last of the Mohicans.chapter 6,chapter 6,,"{""name"": ""Chapter 6"", ""url"": ""https://web.arch...","\n ""Those strains that once did sweet in Zion...",3873.0,Chapter 6,https://web.archive.org/web/20201101053205/htt...,Heyward and the girls are uneasy and Gamut is ...,This chapter shows Cooper in his most inventiv...,321.0,128.0
5,5,[],27681,True,cliffnotes,all_chapterized_books/27681-chapters/chapters_...,finished_summaries/cliffnotes/The Last of the ...,The Last of the Mohicans.chapters 7-8,chapters 7-8,,"{""name"": ""Chapters 7-8"", ""url"": ""https://web.a...","\n ""They do not sleep.\n On ...",7748.0,Chapters 7-8,https://web.archive.org/web/20201101053205/htt...,"Feeling that the cry is some kind of warning, ...",While outwardly these two chapters are concern...,612.0,457.0
6,6,[],27681,False,cliffnotes,all_chapterized_books/27681-chapters/09.txt,finished_summaries/cliffnotes/The Last of the ...,The Last of the Mohicans.chapter 9,chapter 9,,"{""name"": ""Chapter 9"", ""url"": ""https://web.arch...","\n ""Be gay securely...",3114.0,Chapter 9,https://web.archive.org/web/20201101053205/htt...,"In the stillness that follows, Heyward finds i...","With the woodsmen off the scene of action, thi...",254.0,244.0
7,7,[],27681,True,cliffnotes,all_chapterized_books/27681-chapters/chapters_...,finished_summaries/cliffnotes/The Last of the ...,The Last of the Mohicans.chapters 10-11,chapters 10-11,,"{""name"": ""Chapters 10-11"", ""url"": ""https://web...","\n ""I fear we shall outsleep the coming morn\...",8481.0,Chapters 10-11,https://web.archive.org/web/20201101053205/htt...,"Though at first menaced by the Hurons, Heyward...",These chapters are important for certain revel...,549.0,284.0
8,8,[],27681,False,cliffnotes,all_chapterized_books/27681-chapters/12.txt,finished_summaries/cliffnotes/The Last of the ...,The Last of the Mohicans.chapter 12,chapter 12,,"{""name"": ""Chapter 12"", ""url"": ""https://web.arc...","\n ""_Clo._--I am gone, sir,\n And anon, ...",5182.0,Chapter 12,https://web.archive.org/web/20201101053205/htt...,Since the Indians' rifles have been placed to ...,"This is another bloody chapter, but its themat...",219.0,347.0
9,9,[],27681,True,cliffnotes,all_chapterized_books/27681-chapters/chapters_...,finished_summaries/cliffnotes/The Last of the ...,The Last of the Mohicans.chapters 13-14,chapters 13-14,,"{""name"": ""Chapters 13-14"", ""url"": ""https://web...","\n ""I'll seek a readier path.""\n\n PARNELL.\...",8710.0,Chapters 13-14,https://web.archive.org/web/20201101053205/htt...,"Now that the afternoon is shortening, Hawkeye ...",The story has now reached the end of the first...,380.0,260.0


### caching

In [26]:
df.to_csv("./dataset/booksum.csv")

## Samples

In [27]:
df.iloc[0, df.columns.get_loc('row.chapter')]



In [28]:
df.iloc[0, df.columns.get_loc('row.summary_text')]

'Before any characters appear, the time and geography are made clear. Though it is the last war that England and France waged for a country that neither would retain, the wilderness between the forces still has to be overcome first. Thus it is in 1757, in the New York area between the head waters of the Hudson River and Lake George to the north. Because only two years earlier General Braddock was disgracefully routed by a handful of French and Indians, the frontier is now exposed to real and imaginary savage disasters as well as to the horrors of warfare. Fear has replaced reason. Near dusk of a day in July, an Indian runner named Magua arrives at Fort Edward on the upper Hudson. He has come from Fort William Henry at the southern tip of Lake George with the news that the French General Montcalm is moving south with a very large army and that Munro, commander of Fort William Henry, is in urgent need of plentiful reinforcements from General Webb. Early the next morning, a limited detach

In [29]:
generated_summaries = []
baseline_summaries  = []

In [39]:
# baseline_summaries  = []
# for index, chapter in tqdm(df.iterrows()):
#     baseline_summary = df.iloc[index, df.columns.get_loc('row.summary_text')]
#     baseline_summaries.append(baseline_summary)

In [32]:
for index, chapter in tqdm(df.iterrows()):
    print(index)
    chapter = df.iloc[index, df.columns.get_loc('row.chapter')]
    baseline_summary = df.iloc[index, df.columns.get_loc('row.summary_text')]
    # generation
    chapter = create_document(chapter)
    generated_summary = SummaryGeneratorController.generate_summary_from_long_gutenberg_book([chapter], llm)
    # 
    generated_summaries.append(generated_summary)
    baseline_summaries.append(baseline_summary)
   

  warn_deprecated(


0


1it [01:33, 93.01s/it]

1


2it [03:04, 91.87s/it]

2


3it [05:03, 104.61s/it]

3


4it [07:25, 119.39s/it]

4


5it [09:33, 122.34s/it]

5


6it [12:06, 132.85s/it]

6


7it [14:28, 135.86s/it]

7


8it [17:10, 144.01s/it]

8


9it [19:27, 141.76s/it]

9


10it [22:33, 135.39s/it]


In [59]:
generated_summaries

['In "The Poetical Works of Major Heyward," the author\'s son, Major William Heyward Jr., is described as a poet and musician who lived in New England during the 18th century. The book contains a collection of poem and musical accompaniment, including a rare production of his native poet\'s skill. The poem "Standish" is accompanied by a musical accompaniment that includes a manual instrumental accompaniment such as a violin or harp, and a rhythmic rise and fall in the right hand. The author also mentions Heyward interrupting him with broken English to ask for permission to journey through the wildness in silence. Despite the novelty of the musical accompaniment, the author\'s son is praised for his poetry and music.',
 'In "The Adventures of Huckleberry Finn," Mark Twain portrays the adventures of Huck and Jim as they embark on a journey through the Mississippi River. The story follows their escape from the cruel and tyrannical father, who wants to marry Huck\'s friend, Jim. Along the 

In [58]:
baseline_summaries

['Before any characters appear, the time and geography are made clear. Though it is the last war that England and France waged for a country that neither would retain, the wilderness between the forces still has to be overcome first. Thus it is in 1757, in the New York area between the head waters of the Hudson River and Lake George to the north. Because only two years earlier General Braddock was disgracefully routed by a handful of French and Indians, the frontier is now exposed to real and imaginary savage disasters as well as to the horrors of warfare. Fear has replaced reason. Near dusk of a day in July, an Indian runner named Magua arrives at Fort Edward on the upper Hudson. He has come from Fort William Henry at the southern tip of Lake George with the news that the French General Montcalm is moving south with a very large army and that Munro, commander of Fort William Henry, is in urgent need of plentiful reinforcements from General Webb. Early the next morning, a limited detac

In [42]:
eval_dataset = P.DataFrame(
                            zip(baseline_summaries, generated_summaries),
                            columns = ['baseline_summaries', 'generated_summaries']
                           )

In [43]:
eval_dataset

Unnamed: 0,baseline_summaries,generated_summaries
0,"Before any characters appear, the time and geo...","In ""The Poetical Works of Major Heyward,"" the ..."
1,In another part of the forest by the river a f...,"In ""The Adventures of Huckleberry Finn,"" Mark ..."
2,When the mounted party from Fort Howard approa...,"In ""The Last of the Mohicans,"" the Mohawk warr..."
3,"The pursuit of Magua is unsuccessful, but Hawk...","The author, John Heyward, travels with two com..."
4,Heyward and the girls are uneasy and Gamut is ...,"The novel ""The Adventures of Huckleberry Finn""..."
5,"Feeling that the cry is some kind of warning, ...","Title: ""The Red Badge of Courage"" by Stephen C..."
6,"In the stillness that follows, Heyward finds i...","""In the novel ""The Adventures of Huckleberry F..."
7,"Though at first menaced by the Hurons, Heyward...","The novel ""The Red Badge of Courage"" by Stephe..."
8,Since the Indians' rifles have been placed to ...,The narrative follows a group of exploreurs as...
9,"Now that the afternoon is shortening, Hawkeye ...","In the French and Indian War, the Battle of Fo..."


In [44]:
eval_dataset.to_csv("./dataset/booksum_eval.csv")

In [109]:
eval_dataset=P.read_csv("./dataset/booksum_eval.csv")

In [102]:
eval_dataset

Unnamed: 0.1,Unnamed: 0,baseline_summaries,generated_summaries
0,0,"Before any characters appear, the time and geo...","In ""The Poetical Works of Major Heyward,"" the ..."
1,1,In another part of the forest by the river a f...,"In ""The Adventures of Huckleberry Finn,"" Mark ..."
2,2,When the mounted party from Fort Howard approa...,"In ""The Last of the Mohicans,"" the Mohawk warr..."
3,3,"The pursuit of Magua is unsuccessful, but Hawk...","The author, John Heyward, travels with two com..."
4,4,Heyward and the girls are uneasy and Gamut is ...,"The novel ""The Adventures of Huckleberry Finn""..."
5,5,"Feeling that the cry is some kind of warning, ...","Title: ""The Red Badge of Courage"" by Stephen C..."
6,6,"In the stillness that follows, Heyward finds i...","""In the novel ""The Adventures of Huckleberry F..."
7,7,"Though at first menaced by the Hurons, Heyward...","The novel ""The Red Badge of Courage"" by Stephe..."
8,8,Since the Indians' rifles have been placed to ...,The narrative follows a group of exploreurs as...
9,9,"Now that the afternoon is shortening, Hawkeye ...","In the French and Indian War, the Battle of Fo..."


# Evaluation

In [45]:
import evaluate

### Rouge

In [48]:
rouge = evaluate.load('rouge')

In [51]:
results = []

In [88]:
generated_summaries

['In "The Poetical Works of Major Heyward," the author\'s son, Major William Heyward Jr., is described as a poet and musician who lived in New England during the 18th century. The book contains a collection of poem and musical accompaniment, including a rare production of his native poet\'s skill. The poem "Standish" is accompanied by a musical accompaniment that includes a manual instrumental accompaniment such as a violin or harp, and a rhythmic rise and fall in the right hand. The author also mentions Heyward interrupting him with broken English to ask for permission to journey through the wildness in silence. Despite the novelty of the musical accompaniment, the author\'s son is praised for his poetry and music.',
 'In "The Adventures of Huckleberry Finn," Mark Twain portrays the adventures of Huck and Jim as they embark on a journey through the Mississippi River. The story follows their escape from the cruel and tyrannical father, who wants to marry Huck\'s friend, Jim. Along the 

In [89]:

results = rouge.compute(predictions=generated_summaries, references=baseline_summaries, use_aggregator=False)


In [90]:
results

{'rouge1': [0.21526418786692758,
  0.22068965517241382,
  0.21800947867298576,
  0.2633744855967078,
  0.17966903073286053,
  0.1349527665317139,
  0.25178147268408546,
  0.12037037037037038,
  0.21935483870967742,
  0.31139646869983945],
 'rouge2': [0.015717092337917484,
  0.034722222222222224,
  0.028571428571428567,
  0.028925619834710745,
  0.019002375296912115,
  0.008119079837618403,
  0.01909307875894988,
  0.015479876160990712,
  0.01948051948051948,
  0.03542673107890499],
 'rougeL': [0.12133072407045008,
  0.14482758620689656,
  0.12322274881516589,
  0.1646090534979424,
  0.12293144208037826,
  0.09176788124156544,
  0.1377672209026128,
  0.08950617283950618,
  0.12258064516129033,
  0.1508828250401284],
 'rougeLsum': [0.12133072407045008,
  0.14482758620689656,
  0.12322274881516589,
  0.1646090534979424,
  0.12293144208037826,
  0.09176788124156544,
  0.1377672209026128,
  0.08950617283950618,
  0.12258064516129033,
  0.1508828250401284]}

In [96]:
results = P.DataFrame(results, columns=['rouge1','rouge2','rougeL','rougeLsum'])

In [110]:
eval_dataset = P.concat([eval_dataset, results], axis=1)

In [111]:
eval_dataset

Unnamed: 0.1,Unnamed: 0,baseline_summaries,generated_summaries,rouge1,rouge2,rougeL,rougeLsum
0,0,"Before any characters appear, the time and geo...","In ""The Poetical Works of Major Heyward,"" the ...",0.215264,0.015717,0.121331,0.121331
1,1,In another part of the forest by the river a f...,"In ""The Adventures of Huckleberry Finn,"" Mark ...",0.22069,0.034722,0.144828,0.144828
2,2,When the mounted party from Fort Howard approa...,"In ""The Last of the Mohicans,"" the Mohawk warr...",0.218009,0.028571,0.123223,0.123223
3,3,"The pursuit of Magua is unsuccessful, but Hawk...","The author, John Heyward, travels with two com...",0.263374,0.028926,0.164609,0.164609
4,4,Heyward and the girls are uneasy and Gamut is ...,"The novel ""The Adventures of Huckleberry Finn""...",0.179669,0.019002,0.122931,0.122931
5,5,"Feeling that the cry is some kind of warning, ...","Title: ""The Red Badge of Courage"" by Stephen C...",0.134953,0.008119,0.091768,0.091768
6,6,"In the stillness that follows, Heyward finds i...","""In the novel ""The Adventures of Huckleberry F...",0.251781,0.019093,0.137767,0.137767
7,7,"Though at first menaced by the Hurons, Heyward...","The novel ""The Red Badge of Courage"" by Stephe...",0.12037,0.01548,0.089506,0.089506
8,8,Since the Indians' rifles have been placed to ...,The narrative follows a group of exploreurs as...,0.219355,0.019481,0.122581,0.122581
9,9,"Now that the afternoon is shortening, Hawkeye ...","In the French and Indian War, the Battle of Fo...",0.311396,0.035427,0.150883,0.150883


### Blue

In [113]:
bleu = evaluate.load("bleu")

In [115]:
results = bleu.compute(predictions=generated_summaries, references=baseline_summaries)

In [116]:
results

{'bleu': 0.0,
 'precisions': [0.43899018232819076,
  0.0635593220338983,
  0.0035561877667140826,
  0.0],
 'brevity_penalty': 0.16761007036777928,
 'length_ratio': 0.3589227284168135,
 'translation_length': 1426,
 'reference_length': 3973}

### BertScore

In [119]:
bertscore = evaluate.load("bertscore")

In [121]:
results = bertscore.compute(predictions=generated_summaries, references=baseline_summaries, lang="en")

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [122]:
results

{'precision': [0.8199960589408875,
  0.8329588770866394,
  0.8281077742576599,
  0.8194102048873901,
  0.8153960108757019,
  0.8100225925445557,
  0.8042126297950745,
  0.8131375908851624,
  0.834378182888031,
  0.8146968483924866],
 'recall': [0.7891746163368225,
  0.7997214794158936,
  0.7986310720443726,
  0.7971629500389099,
  0.7885341048240662,
  0.7855565547943115,
  0.801103949546814,
  0.7854651808738708,
  0.802308976650238,
  0.8009130954742432],
 'f1': [0.8042901754379272,
  0.8160018920898438,
  0.8131023645401001,
  0.8081334829330444,
  0.8017401695251465,
  0.7976019978523254,
  0.8026552200317383,
  0.7990618348121643,
  0.8180294036865234,
  0.8077462315559387],
 'hashcode': 'roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.42.4)'}

In [123]:
results = P.DataFrame(results)

In [124]:
results

Unnamed: 0,precision,recall,f1,hashcode
0,0.819996,0.789175,0.80429,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
1,0.832959,0.799721,0.816002,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
2,0.828108,0.798631,0.813102,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
3,0.81941,0.797163,0.808133,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
4,0.815396,0.788534,0.80174,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
5,0.810023,0.785557,0.797602,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
6,0.804213,0.801104,0.802655,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
7,0.813138,0.785465,0.799062,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
8,0.834378,0.802309,0.818029,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
9,0.814697,0.800913,0.807746,roberta-large_L17_no-idf_version=0.3.12(hug_tr...


In [125]:
eval_dataset = P.concat([eval_dataset, results], axis=1)

In [126]:
eval_dataset

Unnamed: 0.1,Unnamed: 0,baseline_summaries,generated_summaries,rouge1,rouge2,rougeL,rougeLsum,precision,recall,f1,hashcode
0,0,"Before any characters appear, the time and geo...","In ""The Poetical Works of Major Heyward,"" the ...",0.215264,0.015717,0.121331,0.121331,0.819996,0.789175,0.80429,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
1,1,In another part of the forest by the river a f...,"In ""The Adventures of Huckleberry Finn,"" Mark ...",0.22069,0.034722,0.144828,0.144828,0.832959,0.799721,0.816002,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
2,2,When the mounted party from Fort Howard approa...,"In ""The Last of the Mohicans,"" the Mohawk warr...",0.218009,0.028571,0.123223,0.123223,0.828108,0.798631,0.813102,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
3,3,"The pursuit of Magua is unsuccessful, but Hawk...","The author, John Heyward, travels with two com...",0.263374,0.028926,0.164609,0.164609,0.81941,0.797163,0.808133,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
4,4,Heyward and the girls are uneasy and Gamut is ...,"The novel ""The Adventures of Huckleberry Finn""...",0.179669,0.019002,0.122931,0.122931,0.815396,0.788534,0.80174,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
5,5,"Feeling that the cry is some kind of warning, ...","Title: ""The Red Badge of Courage"" by Stephen C...",0.134953,0.008119,0.091768,0.091768,0.810023,0.785557,0.797602,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
6,6,"In the stillness that follows, Heyward finds i...","""In the novel ""The Adventures of Huckleberry F...",0.251781,0.019093,0.137767,0.137767,0.804213,0.801104,0.802655,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
7,7,"Though at first menaced by the Hurons, Heyward...","The novel ""The Red Badge of Courage"" by Stephe...",0.12037,0.01548,0.089506,0.089506,0.813138,0.785465,0.799062,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
8,8,Since the Indians' rifles have been placed to ...,The narrative follows a group of exploreurs as...,0.219355,0.019481,0.122581,0.122581,0.834378,0.802309,0.818029,roberta-large_L17_no-idf_version=0.3.12(hug_tr...
9,9,"Now that the afternoon is shortening, Hawkeye ...","In the French and Indian War, the Battle of Fo...",0.311396,0.035427,0.150883,0.150883,0.814697,0.800913,0.807746,roberta-large_L17_no-idf_version=0.3.12(hug_tr...


### Note

The quality, coherence, and informativeness can be generated using another LLM such as GPT-4 to evaluate the summaries (online evaluation).
Here an example below inspired from OpenAI.


In [None]:
# Evaluation prompt template based on G-Eval
EVALUATION_PROMPT_TEMPLATE = """
You will be given one summary written for an article. Your task is to rate the summary on one metric.
Please make sure you read and understand these instructions very carefully. 
Please keep this document open while reviewing, and refer to it as needed.

Evaluation Criteria:

{criteria}

Evaluation Steps:

{steps}

Example:

Source Text:

{document}

Summary:

{summary}

Evaluation Form (scores ONLY):

- {metric_name}
"""

# Metric 1: Relevance

RELEVANCY_SCORE_CRITERIA = """
Relevance(1-5) - selection of important content from the source. \
The summary should include only important information from the source document. \
Annotators were instructed to penalize summaries which contained redundancies and excess information.
"""

RELEVANCY_SCORE_STEPS = """
1. Read the summary and the source document carefully.
2. Compare the summary to the source document and identify the main points of the article.
3. Assess how well the summary covers the main points of the article, and how much irrelevant or redundant information it contains.
4. Assign a relevance score from 1 to 5.
"""

# Metric 2: Coherence

COHERENCE_SCORE_CRITERIA = """
Coherence(1-5) - the collective quality of all sentences. \
We align this dimension with the DUC quality question of structure and coherence \
whereby "the summary should be well-structured and well-organized. \
The summary should not just be a heap of related information, but should build from sentence to a\
coherent body of information about a topic."
"""

COHERENCE_SCORE_STEPS = """
1. Read the article carefully and identify the main topic and key points.
2. Read the summary and compare it to the article. Check if the summary covers the main topic and key points of the article,
and if it presents them in a clear and logical order.
3. Assign a score for coherence on a scale of 1 to 5, where 1 is the lowest and 5 is the highest based on the Evaluation Criteria.
"""

# Metric 3: Consistency

CONSISTENCY_SCORE_CRITERIA = """
Consistency(1-5) - the factual alignment between the summary and the summarized source. \
A factually consistent summary contains only statements that are entailed by the source document. \
Annotators were also asked to penalize summaries that contained hallucinated facts.
"""

CONSISTENCY_SCORE_STEPS = """
1. Read the article carefully and identify the main facts and details it presents.
2. Read the summary and compare it to the article. Check if the summary contains any factual errors that are not supported by the article.
3. Assign a score for consistency based on the Evaluation Criteria.
"""

# Metric 4: Fluency

FLUENCY_SCORE_CRITERIA = """
Fluency(1-3): the quality of the summary in terms of grammar, spelling, punctuation, word choice, and sentence structure.
1: Poor. The summary has many errors that make it hard to understand or sound unnatural.
2: Fair. The summary has some errors that affect the clarity or smoothness of the text, but the main points are still comprehensible.
3: Good. The summary has few or no errors and is easy to read and follow.
"""

FLUENCY_SCORE_STEPS = """
Read the summary and evaluate its fluency based on the given criteria. Assign a fluency score from 1 to 3.
"""


def get_geval_score(
    criteria: str, steps: str, document: str, summary: str, metric_name: str
):
    prompt = EVALUATION_PROMPT_TEMPLATE.format(
        criteria=criteria,
        steps=steps,
        metric_name=metric_name,
        document=document,
        summary=summary,
    )
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
        max_tokens=5,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
    )
    return response.choices[0].message.content


evaluation_metrics = {
    "Relevance": (RELEVANCY_SCORE_CRITERIA, RELEVANCY_SCORE_STEPS),
    "Coherence": (COHERENCE_SCORE_CRITERIA, COHERENCE_SCORE_STEPS),
    "Consistency": (CONSISTENCY_SCORE_CRITERIA, CONSISTENCY_SCORE_STEPS),
    "Fluency": (FLUENCY_SCORE_CRITERIA, FLUENCY_SCORE_STEPS),
}

summaries = {"Summary 1": eval_summary_1, "Summary 2": eval_summary_2}

data = {"Evaluation Type": [], "Summary Type": [], "Score": []}

for eval_type, (criteria, steps) in evaluation_metrics.items():
    for summ_type, summary in summaries.items():
        data["Evaluation Type"].append(eval_type)
        data["Summary Type"].append(summ_type)
        result = get_geval_score(criteria, steps, excerpt, summary, eval_type)
        score_num = int(result.strip())
        data["Score"].append(score_num)

pivot_df = pd.DataFrame(data, index=None).pivot(
    index="Evaluation Type", columns="Summary Type", values="Score"
)
styled_pivot_df = pivot_df.style.apply(highlight_max, axis=1)
display(styled_pivot_df)