In [1]:
import pandas as pd
from ast import literal_eval

In [2]:
from summarizer import Summarization, ChainTypes
from utils import join_lst_elements, remove_newline

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
df = pd.read_csv("./data.csv")

In [4]:
df

Unnamed: 0,id,project,original_text,original_summary
0,1,Bangladesh,"['[12th April 2021, Bangladesh] Compared to 20...",Manusher Jonno Foundation study suggested that...
1,2,Bangladesh,"['[28th February 2021, Bangladesh] A recent st...",According to the recent survey by the rights b...
2,3,Bangladesh,"['[22nd Feb 2021, Bangladesh] Protection Secto...",Children have faced difficulty in accessing pr...
3,4,Bangladesh,"['[18th -24th April 2021, Cox Bazar] Refugees:...",The Border Guard Bangladesh detained twenty-tw...


In [5]:
df.original_text = df.original_text.map(literal_eval)

In [6]:
df.original_text = df.original_text.apply(join_lst_elements)

In [7]:
df

Unnamed: 0,id,project,original_text,original_summary
0,1,Bangladesh,"[12th April 2021, Bangladesh] Compared to 2019...",Manusher Jonno Foundation study suggested that...
1,2,Bangladesh,"[28th February 2021, Bangladesh] A recent stud...",According to the recent survey by the rights b...
2,3,Bangladesh,"[22nd Feb 2021, Bangladesh] Protection Sector:...",Children have faced difficulty in accessing pr...
3,4,Bangladesh,"[18th -24th April 2021, Cox Bazar] Refugees: D...",The Border Guard Bangladesh detained twenty-tw...


In [8]:
llm_summarizer = Summarization()

In [9]:
prompt = llm_summarizer.generate_prompt()

In [10]:
def process_llm_summary(row):
    doc = llm_summarizer.create_docs(row["original_text"])
    generated_summary = llm_summarizer.generate_summary(
        doc,
        prompt,
        chain_type=ChainTypes.REFINE
    )
    return remove_newline(generated_summary)


In [11]:
def process_llm_summary_inbuilt_chain(row):
    doc = llm_summarizer.create_docs(row["original_text"])
    generated_summary = llm_summarizer.use_summ_checker_chain(doc)
    return remove_newline(generated_summary)

In [12]:
def calc_score(row, inbuilt_chain=False):
    if inbuilt_chain:
        scores = llm_summarizer.evaluate(
            [row["original_text"].strip()],
            [row["generated_summary_B"].strip()]
        )
    else:
        scores = llm_summarizer.evaluate(
            [row["original_text"].strip()],
            [row["generated_summary_A"].strip()]
        )
    return (
        scores["rouge1"],
        scores["rouge2"],
        scores["rougeL"]
    )

In [13]:
df["generated_summary_A"] = df.apply(lambda row: process_llm_summary(row), axis=1)

In [14]:
df["generated_summary_B"] = df.apply(lambda row: process_llm_summary_inbuilt_chain(row), axis=1)

In [22]:
df

Unnamed: 0,id,project,original_text,original_summary,generated_summary_A,generated_summary_B,rouge1_A,rouge2_A,rougeL_A,rouge1_B,rouge2_B,rougeL_B,cosine_similarity
0,1,Bangladesh,"[12th April 2021, Bangladesh] Compared to 2019...",Manusher Jonno Foundation study suggested that...,A Manusher Jonno Foundation study has reveale...,""""""" [12th April 2021, Bangladesh] According to...",0.422222,0.179775,0.3,0.349206,0.136,0.246032,0.945789
1,2,Bangladesh,"[28th February 2021, Bangladesh] A recent stud...",According to the recent survey by the rights b...,A recent survey conducted by Ain o Salish Ken...,""""""" [28th February 2021, Bangladesh] A recent ...",0.267516,0.077419,0.191083,0.248889,0.06278,0.151111,0.912212
2,3,Bangladesh,"[22nd Feb 2021, Bangladesh] Protection Sector:...",Children have faced difficulty in accessing pr...,Children in Bangladesh are facing unique chal...,"[22nd Feb 2021, Bangladesh] Children in Bangla...",0.373626,0.202247,0.32967,0.342857,0.174757,0.285714,0.932303
3,4,Bangladesh,"[18th -24th April 2021, Cox Bazar] Refugees: D...",The Border Guard Bangladesh detained twenty-tw...,"During the week of 18th - 24th April 2021, a ...","[18th -24th April 2021, Cox Bazar] Refugees: D...",0.42623,0.183333,0.327869,0.314961,0.08,0.173228,0.92974


In [23]:
df[["rouge1_A", "rouge2_A", "rougeL_A"]] = df.apply(lambda row: calc_score(row), axis=1, result_type="expand")

In [24]:
df[["rouge1_B", "rouge2_B", "rougeL_B"]] = df.apply(lambda row: calc_score(row, inbuilt_chain=True), axis=1, result_type="expand")

In [25]:
df

Unnamed: 0,id,project,original_text,original_summary,generated_summary_A,generated_summary_B,rouge1_A,rouge2_A,rougeL_A,rouge1_B,rouge2_B,rougeL_B,cosine_similarity
0,1,Bangladesh,"[12th April 2021, Bangladesh] Compared to 2019...",Manusher Jonno Foundation study suggested that...,A Manusher Jonno Foundation study has reveale...,""""""" [12th April 2021, Bangladesh] According to...",0.422222,0.179775,0.3,0.349206,0.136,0.246032,0.945789
1,2,Bangladesh,"[28th February 2021, Bangladesh] A recent stud...",According to the recent survey by the rights b...,A recent survey conducted by Ain o Salish Ken...,""""""" [28th February 2021, Bangladesh] A recent ...",0.267516,0.077419,0.191083,0.248889,0.06278,0.151111,0.912212
2,3,Bangladesh,"[22nd Feb 2021, Bangladesh] Protection Sector:...",Children have faced difficulty in accessing pr...,Children in Bangladesh are facing unique chal...,"[22nd Feb 2021, Bangladesh] Children in Bangla...",0.373626,0.202247,0.32967,0.342857,0.174757,0.285714,0.932303
3,4,Bangladesh,"[18th -24th April 2021, Cox Bazar] Refugees: D...",The Border Guard Bangladesh detained twenty-tw...,"During the week of 18th - 24th April 2021, a ...","[18th -24th April 2021, Cox Bazar] Refugees: D...",0.42623,0.183333,0.327869,0.314961,0.08,0.173228,0.92974


In [26]:
df["cosine_similarity"] = df.apply(lambda row: llm_summarizer.calc_similarity(row), axis=1)

In [27]:
df

Unnamed: 0,id,project,original_text,original_summary,generated_summary_A,generated_summary_B,rouge1_A,rouge2_A,rougeL_A,rouge1_B,rouge2_B,rougeL_B,cosine_similarity
0,1,Bangladesh,"[12th April 2021, Bangladesh] Compared to 2019...",Manusher Jonno Foundation study suggested that...,A Manusher Jonno Foundation study has reveale...,""""""" [12th April 2021, Bangladesh] According to...",0.422222,0.179775,0.3,0.349206,0.136,0.246032,0.945789
1,2,Bangladesh,"[28th February 2021, Bangladesh] A recent stud...",According to the recent survey by the rights b...,A recent survey conducted by Ain o Salish Ken...,""""""" [28th February 2021, Bangladesh] A recent ...",0.267516,0.077419,0.191083,0.248889,0.06278,0.151111,0.912308
2,3,Bangladesh,"[22nd Feb 2021, Bangladesh] Protection Sector:...",Children have faced difficulty in accessing pr...,Children in Bangladesh are facing unique chal...,"[22nd Feb 2021, Bangladesh] Children in Bangla...",0.373626,0.202247,0.32967,0.342857,0.174757,0.285714,0.932314
3,4,Bangladesh,"[18th -24th April 2021, Cox Bazar] Refugees: D...",The Border Guard Bangladesh detained twenty-tw...,"During the week of 18th - 24th April 2021, a ...","[18th -24th April 2021, Cox Bazar] Refugees: D...",0.42623,0.183333,0.327869,0.314961,0.08,0.173228,0.92974


In [28]:
df.to_csv("results.csv", index=False)