In [107]:
import os
import evaluate
from rouge_score import rouge_scorer
from enum import Enum
from dotenv import load_dotenv

import warnings
warnings.filterwarnings("ignore")

In [108]:
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

from langchain.chains.summarize import load_summarize_chain

from langchain.chains import LLMSummarizationCheckerChain

In [109]:
load_dotenv()

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

In [110]:
rouge = evaluate.load('rouge')

In [111]:
class ChainTypes(Enum):
    STUFF = 0
    MAP_REDUCE = 1
    REFINE = 2

In [179]:
class Summarization:
    def __init__(
        self,
        temperature=0,
        model_name="text-davinci-003" # gpt-3.5-turbo
    ):
        self.llm = OpenAI(
            temperature=temperature,
            model=model_name
        )
    
    def textsplitter(self):
        return RecursiveCharacterTextSplitter(
            chunk_size=300,
            chunk_overlap=50,
            separators=["\n\n", "\n", "\t"]
        )
    
    def load_data(self, filename):
        with open(filename) as f:
            texts = f.read()
        
        return texts
    
    def create_docs(self, texts):
        text_splitter = self.textsplitter()
        texts = text_splitter.split_text(texts)
        docs = [Document(page_content=t) for t in texts]
        return docs
    
    def generate_prompt(self):
        prompt_template = """You are a humanitarian analyst and has a strong domain knowledge. 
        Write a concise summary of the following including key points by answering following questions

        {text}

        CONCISE SUMMARY:
        """

        prompt = PromptTemplate(
            template=prompt_template,
            input_variables=["text"]
        )
        return prompt

    def generate_refine_prompt(self):
        refine_template = (
            "Your job is to produce a final summary\n"
            "We have provided an existing summary up to a certain point: {existing_answer}\n"
            "We have the opportunity to refine the existing summary\n"
            "(only if needed) with some more context below.\n"
            "----------------\n"
            "{text}\n"
            "----------------\n"
            "Given the new context, refine the original summary\n"
            "If the context isn't useful, return the original summary"
        )
        refine_prompt = PromptTemplate(
            template=refine_template,
            input_variables=["existing_answer", "text"]
        )
        return refine_prompt

    def generate_summary(
        self,
        docs,
        prompt,
        chain_type=ChainTypes.STUFF,
        verbose=False
    ):
        if chain_type==ChainTypes.MAP_REDUCE:
            chain = load_summarize_chain(
                llm=self.llm,
                chain_type="map_reduce",
                verbose=verbose,
                map_prompt=prompt,
                combine_prompt=prompt
            )
        elif chain_type==ChainTypes.REFINE:
            chain = load_summarize_chain(
                llm=self.llm,
                chain_type="refine",
                verbose=verbose,
                question_prompt=prompt,
                refine_prompt=self.generate_refine_prompt()
            )
        else:
            chain = load_summarize_chain(
                llm=self.llm,
                chain_type=chain_type,
                verbose=verbose
            )
        return chain.run(docs) # summary
    
    def use_summ_checker_chain(self, texts):
        checker_chain = LLMSummarizationCheckerChain(
            llm=self.llm,
            verbose=False,
            max_checks=2
        )
        return checker_chain.run(texts)

    def evaluate(
        self,
        original_summary,
        generated_summary
    ):
        metric_score = rouge.compute(
            predictions=generated_summary,
            references=original_summary
        )
        return metric_score
    
    def rouge_scorer(
        self,
        original_summary,
        model_summary
    ):
        scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
        scores = scorer.score(original_summary, model_summary)
        return scores


In [180]:
summarizer = Summarization()

In [181]:
texts = summarizer.load_data("evidence.txt")

In [182]:
docs = summarizer.create_docs(texts)

In [183]:
prompt = summarizer.generate_prompt()

In [184]:
summary = summarizer.generate_summary(
    docs,
    prompt,
    chain_type=ChainTypes.REFINE
)

In [185]:
summary = summary.replace("\n", " ")

In [186]:
summary2 = summarizer.use_summ_checker_chain(docs)

In [193]:
summary2

'"""\nAmong internally displaced households reporting having child(ren) 5 years old or younger, 29% reported that they had noticed at least one child in their household losing weight in the 30 days prior to data collection in Ngala LGA, Borno State (19/01/2021). This was compared to 31% of non-displaced households (n=49). In Mafa LGA, 30% of internally displaced households reported noticing at least one child in their household losing weight in the 30 days prior to data collection, compared to 15% of non-displaced households (n=69). In Konduga LGA, 30% of internally displaced households reported noticing at least one child in their household losing weight in the 30 days prior to data collection, compared to 19% of non-displaced households (n=62). In Hawul LGA, 40% of internally displaced households reported noticing at least one child in their household losing weight in the 30 days prior to data collection, compared to 35% of non-displaced households (n=67). In Dikwa LGA, 59% of intern

In [194]:
original_summary = "Of the 6 assessed LGAs in Borno, in four LGAs, a higher percentage of displaced households have noticed at least one child in their household losing weight in the 30 days prior to data collection than non displaced households with exception to Ngala and Biu LGA. The highest percentage of households noticing the child lose weight was Dikwa LGA (Displaced 59%, Non displaced 51%) followed by Biu LGA (Displaced 37%, Non displaced 46%) and Hawul LGA (Displaced 40%, Non displaced 35%)."

In [195]:
score1  = summarizer.rouge_scorer(original_summary, summary)

In [196]:
score2  = summarizer.rouge_scorer(original_summary, summary2)

In [197]:
score1

{'rouge1': Score(precision=0.5964912280701754, recall=0.41975308641975306, fmeasure=0.49275362318840576),
 'rougeL': Score(precision=0.45614035087719296, recall=0.32098765432098764, fmeasure=0.3768115942028985)}

In [198]:
score2

{'rouge1': Score(precision=0.3160621761658031, recall=0.7530864197530864, fmeasure=0.44525547445255476),
 'rougeL': Score(precision=0.22797927461139897, recall=0.5432098765432098, fmeasure=0.32116788321167883)}