In [14]:
from logging import getLogger

logger = getLogger(__name__)
CLASSIFIER_ENDPOINT_ARN = "arn:aws:comprehend:us-east-1:856210586235:document-classifier-endpoint/mynews-endpoint"

class News:
    def __init__(self, title, description, link=None) -> None:
        self.title = title
        self.description = description
        self.link = link

    def __str__(self) -> str:
        return f"{self.title}\n{self.description}"


class RawNews(News):
    def __init__(self, title, description, link=None) -> None:
        super().__init__(title, description, link)


class MyNewsSummary(News):
    def __init__(self, title, description, sentiment, category, link=None) -> None:
        super().__init__(title, description, link)
        self.sentiment = sentiment
        self.category = category

    def __str__(self) -> str:
        return f"{self.title} -> {self.category}, {self.sentiment}\n{self.description}"


class MyNewsSummarizer:
    def __init__(self, source_language, target_language, boto_session) -> None:
        self.source_language = source_language
        self.target_language = target_language
        self.translate_client = boto_session.client('translate')
        self.comprehend_client = boto_session.client('comprehend')

    def _translate_text(self, text):
        response = self.translate_client.translate_text(
            Text=text,
            SourceLanguageCode=self.source_language,
            TargetLanguageCode=self.target_language,
            Settings={
                "Formality": "FORMAL"
            },
        )

        logger.debug(response)
        return response['TranslatedText']
    
    def _detect_sentiment(self, text):
        response = self.comprehend_client.detect_sentiment(
            Text=text,
            LanguageCode=self.source_language
        )

        logger.debug(response)
        return response["Sentiment"]

    def _classify_document(self, text, endpoint_arn):
        try:
            response = self.comprehend_client.classify_document(
                Text=text,
                EndpointArn=endpoint_arn,
            )
        except Exception as e:
            logger.error(e)
            return "CLASSIFICATION_FAIL"

        logger.debug(response)
        return response["Classes"][0]["Name"]

    def summarize(self, news: News):
        title = news.title
        description = news.description

        translated_title = self._translate_text(title)
        translated_description = self._translate_text(description)

        text = f"{title}. {description}"
        sentiment = self._detect_sentiment(text)
        cateogry = self._classify_document(text, CLASSIFIER_ENDPOINT_ARN)
        summary = MyNewsSummary(translated_title, translated_description, sentiment, cateogry)
        return summary



In [15]:
import boto3

session = boto3.Session()
summarizer = MyNewsSummarizer("en", "ko", session)


In [17]:
title = "Ukraine war: PM calls for 'step-by-step' move from Russian fuel"
description = "Meanwhile, the German chancellor says there is no alternative to Russian supplies at the moment."
news = RawNews(title, description)

print(str(summarizer.summarize(news)))

우크라이나 전쟁: 총리, 러시아 연료에서 '단계별' 이동 촉구 -> business, NEGATIVE
한편, 독일 수상은 현재로선 러시아 물품 외에는 대안이 없다고 해요.


In [16]:
logger.setLevel("DEBUG")