In [22]:
import pandas as pd
from ast import literal_eval

In [23]:
from summarizer import Summarization, ChainTypes
from utils import join_lst_elements, remove_newline

In [24]:
df = pd.read_csv("./data.csv")

In [25]:
df

Unnamed: 0,id,project,original_text,original_summary
0,1,Bangladesh,"['[12th April 2021, Bangladesh] Compared to 20...",Manusher Jonno Foundation study suggested that...
1,2,Bangladesh,"['[28th February 2021, Bangladesh] A recent st...",According to the recent survey by the rights b...
2,3,Bangladesh,"['[22nd Feb 2021, Bangladesh] Protection Secto...",Children have faced difficulty in accessing pr...
3,4,Bangladesh,"['[18th -24th April 2021, Cox Bazar] Refugees:...",The Border Guard Bangladesh detained twenty-tw...


In [26]:
df.original_text = df.original_text.map(literal_eval)

In [27]:
df.original_text = df.original_text.apply(join_lst_elements)

In [28]:
df

Unnamed: 0,id,project,original_text,original_summary
0,1,Bangladesh,"[12th April 2021, Bangladesh] Compared to 2019...",Manusher Jonno Foundation study suggested that...
1,2,Bangladesh,"[28th February 2021, Bangladesh] A recent stud...",According to the recent survey by the rights b...
2,3,Bangladesh,"[22nd Feb 2021, Bangladesh] Protection Sector:...",Children have faced difficulty in accessing pr...
3,4,Bangladesh,"[18th -24th April 2021, Cox Bazar] Refugees: D...",The Border Guard Bangladesh detained twenty-tw...


In [29]:
llm_summarizer = Summarization()

In [30]:
prompt = llm_summarizer.generate_prompt()

In [31]:
def process_llm_summary(row):
    doc = llm_summarizer.create_docs(row["original_text"])
    generated_summary = llm_summarizer.generate_summary(
        doc,
        prompt,
        chain_type=ChainTypes.REFINE
    )
    return remove_newline(generated_summary)


In [32]:
def process_llm_summary_inbuilt_chain(row):
    doc = llm_summarizer.create_docs(row["original_text"])
    generated_summary = llm_summarizer.use_summ_checker_chain(doc)
    return remove_newline(generated_summary)

In [38]:
def calc_score(row, inbuilt_chain=False):
    if inbuilt_chain:
        scores = llm_summarizer.evaluate(
            [row["original_summary"].strip()],
            [row["generated_summary_B"].strip()]
        )
    else:
        scores = llm_summarizer.evaluate(
            [row["original_summary"].strip()],
            [row["generated_summary_A"].strip()]
        )
    return (
        scores["rouge1"],
        scores["rouge2"],
        scores["rougeL"]
    )

In [34]:
df["generated_summary_A"] = df.apply(lambda row: process_llm_summary(row), axis=1)

In [35]:
df["generated_summary_B"] = df.apply(lambda row: process_llm_summary_inbuilt_chain(row), axis=1)

In [39]:
df

Unnamed: 0,id,project,original_text,original_summary,generated_summary_A,generated_summary_B
0,1,Bangladesh,"[12th April 2021, Bangladesh] Compared to 2019...",Manusher Jonno Foundation study suggested that...,\nA Manusher Jonno Foundation study has reveal...,"""""""\n[12th April 2021, Bangladesh] According t..."
1,2,Bangladesh,"[28th February 2021, Bangladesh] A recent stud...",According to the recent survey by the rights b...,\nA recent survey conducted by Ain o Salish Ke...,"""""""\n[28th February 2021, Bangladesh] A recent..."
2,3,Bangladesh,"[22nd Feb 2021, Bangladesh] Protection Sector:...",Children have faced difficulty in accessing pr...,\nChildren in Bangladesh are facing unique cha...,"[22nd Feb 2021, Bangladesh] Children in Bangla..."
3,4,Bangladesh,"[18th -24th April 2021, Cox Bazar] Refugees: D...",The Border Guard Bangladesh detained twenty-tw...,"\nDuring the week of 18th -24th April 2021, a ...","""""""\n[18th -24th April 2021, Cox Bazar] Refuge..."


In [40]:
df[["rouge1_A", "rouge2_A", "rougeL_A"]] = df.apply(lambda row: calc_score(row), axis=1, result_type="expand")

In [41]:
df[["rouge1_B", "rouge2_B", "rougeL_B"]] = df.apply(lambda row: calc_score(row, inbuilt_chain=True), axis=1, result_type="expand")

In [42]:
df

Unnamed: 0,id,project,original_text,original_summary,generated_summary_A,generated_summary_B,rouge1_A,rouge2_A,rougeL_A,rouge1_B,rouge2_B,rougeL_B
0,1,Bangladesh,"[12th April 2021, Bangladesh] Compared to 2019...",Manusher Jonno Foundation study suggested that...,\nA Manusher Jonno Foundation study has reveal...,"""""""\n[12th April 2021, Bangladesh] According t...",0.42623,0.176796,0.295082,0.346774,0.138211,0.241935
1,2,Bangladesh,"[28th February 2021, Bangladesh] A recent stud...",According to the recent survey by the rights b...,\nA recent survey conducted by Ain o Salish Ke...,"""""""\n[28th February 2021, Bangladesh] A recent...",0.253012,0.073171,0.180723,0.248889,0.06278,0.151111
2,3,Bangladesh,"[22nd Feb 2021, Bangladesh] Protection Sector:...",Children have faced difficulty in accessing pr...,\nChildren in Bangladesh are facing unique cha...,"[22nd Feb 2021, Bangladesh] Children in Bangla...",0.373626,0.202247,0.32967,0.342857,0.174757,0.285714
3,4,Bangladesh,"[18th -24th April 2021, Cox Bazar] Refugees: D...",The Border Guard Bangladesh detained twenty-tw...,"\nDuring the week of 18th -24th April 2021, a ...","""""""\n[18th -24th April 2021, Cox Bazar] Refuge...",0.42623,0.183333,0.327869,0.440678,0.189655,0.305085


In [43]:
df.to_csv("results.csv", index=False)