# Implementation of  text summarizer inspired by the CoD approach

In [9]:
# python built-in module
import yaml
import os
import json

# python installed module
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import get_openai_callback
from langchain.schema import HumanMessage, SystemMessage

# python user defined module
import prompt


In [10]:
# loading environment variable OPENAI_API_KEY from .env file
load_dotenv('.env')

True

In [11]:
# reading config file
with open('config.yaml', "r") as yaml_file:
        config_dict = yaml.safe_load(yaml_file)

In [12]:
# reading text from an input file
input_file_name = config_dict["io_config"]["input_file"]
input_file_path = os.path.join(os.path.dirname(os.path.realpath('__file__')), "input", input_file_name)

with open(input_file_path, "r") as input_file:
    text_content = input_file.read()

In [13]:
# send prompt and receive response from the GPT model
openai= ChatOpenAI(model=config_dict["cod"]["model_name"],
                                   temperature=config_dict["cod"]["temperature"],
                                   max_tokens=config_dict["cod"]["max_tokens"],
                                   model_kwargs={"top_p": config_dict["cod"]["top_p"],
                                                 "presence_penalty": config_dict["cod"]["presence_penalty"],
                                                 "frequency_penalty": config_dict["cod"]["frequency_penalty"]})

cod_messages = [SystemMessage(content=prompt.COD_SYSTEM_PROMPT),
                HumanMessage(content="Here is the input text for you to summarize:\n\n{}".format(text_content))
                           ]

with get_openai_callback() as openai_cb:
    cod_response = openai(cod_messages)

output = cod_response.content

In [14]:
# writing received responce to the file
output_file_name = config_dict["io_config"]["output_file"]
output_file_path = os.path.join(os.path.dirname(os.path.realpath('__file__')), "output", output_file_name)

output_dict = json.loads(output.replace("\n", ""))
summary = output_dict[-1]['Denser_Summary']

result = {"summary": summary,
          "metadata": {"total_tokens": openai_cb.total_tokens,
                       "total_cost": round(openai_cb.total_cost, 3)}}

with open(output_file_path, 'w') as json_file:
    json.dump(result, json_file)

In [15]:
print(f'Input text size: {len(text_content.split(" "))} words')
print(f'Output text size: {len(summary.split(" "))} words')

Input text size: 543 words
Output text size: 53 words
