<a href="https://colab.research.google.com/github/kimdonggyu2008/2024_2_Capstone/blob/main/run.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#api활용할때의 통합 사용

In [1]:
# # 필요한 라이브러리 설치
# pip install flask transformers torch


In [2]:
# from flask import Flask, request, jsonify
# import torch
# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# # Flask 앱 초기화
# app = Flask(__name__)

# # 1. 모델과 토크나이저 로드 (KETI-AIR/long-ke-t5-base 모델)
# tokenizer = AutoTokenizer.from_pretrained("KETI-AIR/long-ke-t5-base")
# model = AutoModelForSeq2SeqLM.from_pretrained("KETI-AIR/long-ke-t5-base")

# # 2. 영어 기사 요약 함수
# def summarize_article(article_text):
#     inputs = tokenizer(article_text, truncation=True, padding="longest", return_tensors="pt")
#     summary_ids = model.generate(**inputs, max_length=60, num_beams=5, early_stopping=True)
#     summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
#     return summary

# # 3. 요약된 내용을 한국어로 번역하는 함수
# def translate_to_korean(english_text):
#     inputs = tokenizer(english_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
#     output = model.generate(inputs['input_ids'], attention_mask=inputs['attention_mask'], max_length=512)
#     translated_text = tokenizer.decode(output[0], skip_special_tokens=True)
#     return translated_text

# # 4. API 엔드포인트 정의
# @app.route('/summarize_translate', methods=['POST'])
# def summarize_and_translate():
#     # 요청에서 JSON 데이터 추출
#     data = request.json
#     article_text = data.get('article', '')

#     if not article_text:
#         return jsonify({"error": "No article text provided"}), 400

#     # 영어 기사를 요약
#     summary = summarize_article(article_text)

#     # 요약된 내용을 한국어로 번역
#     translated_summary = translate_to_korean(summary)

#     # 결과 반환
#     return jsonify({
#         "summary": summary,
#         "translated_summary": translated_summary
#     })

# # 5. API 서버 실행
# if __name__ == '__main__':
#     app.run(host='0.0.0.0', port=5000)


#파이프라인 테스트


In [3]:
from transformers import PegasusTokenizer, PegasusForConditionalGeneration
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import re
import math


In [4]:

# 요약 모델 및 토크나이저 설정
summarizer_model_name = "/content/drive/MyDrive/summarizer/model/model_part_2"
summarizer_tokenizer = PegasusTokenizer.from_pretrained(summarizer_model_name)
summarizer_model = PegasusForConditionalGeneration.from_pretrained(summarizer_model_name)


In [5]:

# 번역 모델 및 토크나이저 설정
translator_model_name = "/content/drive/MyDrive/translator/model/model_part_3"
translator_tokenizer = AutoTokenizer.from_pretrained(translator_model_name)
translator_model = AutoModelForSeq2SeqLM.from_pretrained(translator_model_name)




In [6]:

def summarize_article(article_text):
    # 입력 텍스트를 요약
    inputs = summarizer_tokenizer(article_text, max_length=1024, return_tensors="pt", truncation=True)
    summary_ids = summarizer_model.generate(inputs["input_ids"], max_length=150, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = summarizer_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary


In [7]:

def translate_text(text):
    # 요약된 텍스트를 번역
    inputs = translator_tokenizer(text, return_tensors="pt", truncation=True)
    translated_ids = translator_model.generate(inputs["input_ids"], max_length=512, num_beams=4, early_stopping=True)
    translation = translator_tokenizer.decode(translated_ids[0], skip_special_tokens=True)
    return translation


In [8]:

def summarize_and_translate(article_text):
    # 외신 기사 요약 및 번역 파이프라인
    summary = summarize_article(article_text)
    translation = translate_text(summary)
    return translation


In [9]:
def split_text_with_last_sentence_overlap(text, target_chunk_length=2048):
    # 문장 단위로 텍스트 분할
    sentences = re.split(r'(?<=[.!?]) +', text)
    chunks = []
    current_chunk = ""

    for sentence in sentences:
        # 현재 문장을 청크에 추가
        if len(current_chunk) + len(sentence) <= target_chunk_length:
            current_chunk += sentence + " "
        else:
            # 청크에 문장 추가 후, 마지막 문장을 다음 청크에 포함
            chunks.append(current_chunk.strip())
            current_chunk = chunks[-1].split()[-1] + " " + sentence + " "

    # 마지막 청크 추가
    if current_chunk:
        chunks.append(current_chunk.strip())

    return chunks

In [10]:

def summarize_long_text(article_text, target_chunk_length=2048):
    # 텍스트를 분할하여 요약
    chunks = split_text_with_last_sentence_overlap(article_text, target_chunk_length)
    summaries = []

    for chunk in chunks:
        print(len(chunk))
        print(chunk)
        print("현재 청크")
        inputs = summarizer_tokenizer(chunk, max_length=target_chunk_length, return_tensors="pt", truncation=True)
        summary_ids = summarizer_model.generate(inputs["input_ids"], max_length=200, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)
        summary = summarizer_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        summaries.append(summary)

    return summaries

In [11]:
# 예시 기사
article_text = """A drone has been "launched towards" the private residence of Israeli Prime Minister Benjamin Netanyahu in the coastal town of Caesarea, his office has said.

"The prime minister and his wife were not at the location, and there were no injuries in the incident," a statement said.

It comes after the Israeli military said three drones were launched from Lebanon into Israel early on Saturday morning, with one hitting a building in Caesarea.

The Israeli government has not said whether the building was part of the Prime Minister's residence nor the extent of any damage.

At 08:19 local time (06:19 BST), the Israel Defense Forces (IDF) said: "In the last hour, three unmanned aerial vehicles crossed into the country from Lebanon.

"Two of the aircraft were intercepted. Another aircraft hit a building in Caesarea, no injuries."

Netanyahu makes use of two private homes, in Caesarea and Jerusalem, and has also spent time at Beit Aghion, the prime minister's official residence in Jerusalem, which is currently being renovated.

The IDF said some 55 rockets had been launched into Israel from Lebanon so far on Saturday.

Israel is continuing to attack targets in Lebanon which it says are linked to the Iran-backed Hezbollah group.

Lebanon's health ministry said on Saturday that two people were killed in an Israeli strike in Jounieh, a Christian-majority town to the north of Beirut.

The strike hit a moving car that was travelling along the main highway, according to local media.

This attack is unusual as most Israeli operations to date have focused on Shia Muslim-majority areas where Hezbollah has a presence."""


In [12]:
article_text = """
Japanese voters are today heading to the polls in a snap election, following a tumultuous few years for the ruling party which saw a “cascade” of scandals, widespread voter apathy and record-low approval ratings.

The election was announced by Shigeru Ishiba three days after he was selected as the leader of the ruling Liberal Democratic Party (LDP) - before he had been officially sworn in as prime minister.

The decision was made despite the LDP seeing approval ratings of below 20% earlier in the year, in the wake of a political fundraising corruption scandal.

Yet the LDP still remains the strongest contender against opposition parties which have failed to unite, or convince voters they are a viable option to govern.

The main opposition party had an approval rating of just 6.6% before parliament was dissolved.

“It is so hard to make decisions to choose parties, I think people are losing interest,” Miyuki Fujisaki, a long-time LDP supporter who works in the care-home sector, told the BBC ahead of polls opening.

The LDP, she said, has its problems with alleged corruption, “but the opposition also does not stand out at all”.

“They sure complain a lot, but it’s not at all clear on what they want to do,” the 66-year-old said.

For all the apathy, politics in Japan has been moving at a fast pace in recent months.

Shigeru Ishiba took over as prime minister after being voted in by the ruling party following his predecessor Fumio Kishida - who had been in the role since 2021 – making a surprise decision to step down in August.

The move to call the election came at a time when the LDP is desperate to restore its tarnished image among the public.


Ishiba - a long-time politician who previously served as defence minister - has described it as the “people’s verdict”.

But whether it’s enough to restore trust in the LDP - which has been in power almost continuously since 1955 – is uncertain.

A series of scandals has tarnished the ruling party’s reputation. Chief among them is the party’s relationship with the controversial Unification Church - described by critics as a “cult” - and the level of influence it had on lawmakers.

Then came the revelations of the political funding corruption scandal. Japan’s prosecutors have been investigating dozens of LDP lawmakers accused of pocketing proceeds from political fundraising events. Those allegations - running into the millions of dollars - led to the dissolution of powerful factions, the backbone of its internal party politics.

“What a wretched state the ruling party is in,” said Michiko Hamada, who had travelled to Urawa station, on outskirts of Tokyo, for an opposition campaign rally.

“That is what I feel most. It is tax evasion and it’s unforgivable.”

It strikes her as particularly egregious at a time when people in Japan are struggling with high prices. Wages have not changed for three decades – dubbed “the lost 30 years” – but prices have risen at the fastest rate in nearly half a century in the last two years.

This month, as voters were getting ready to go to the polls, saw more price hikes on thousands of food products as well as other day-to-day provisions like mail, pharmaceuticals, electricity and gas.

“I pay 10,000 yen or 20,000 yen ($65 - $130; £50 - £100) more for the food per month (than I used to),” Ms Hamada said.

“And I’m not buying the things I used to buy. I am trying to save up but it still costs more.



Things like fruit are very expensive.”

She is not the only one concerned with high prices. Pensioner Chie Shimizu says she now must work part-time to make ends meet.

“Our hourly wage has gone up a bit but it does not match the prices,” she told the BBC as she picked up some food from a stand at Urawa station. “I come to places like this to find something cheaper and good because everything in regular shops is expensive.”

Ms Shimizu has not voted for years but might this time - although she is not sure which candidate or which party to vote for.

“I can't find anyone that I want to vote for. I feel like there's no one who I can trust to be our leader. I wonder about those who become an MP for their own greed.”

Against this backdrop, it might look like Ishiba has taken a political gamble. His party had held 247 of 465 seats in the lower house, while its coalition partner, Komeito, had 32. A party needs 233 seats to control the house, known as the Diet.

There are now fears Komeito may fail to reach that number again, while the main opposition - which had 98 seats in the previous parliament - began to pick up momentum with voters ahead of Sunday’s election.

“I think the LDP has dug itself a very deep hole to climb out of. It does not enjoy public trust, and why should it?


There's just been a cascade of scandals,” Jeff Kingston, professor of Asian studies and history at Temple University Japan, told the BBC.

But he does not think any of this will necessarily lead to the party losing the elections.

“I think they (LDP) are worried they're going to lose some marginal seats, and there are questions hanging over Komeito and how effective a coalition partner they will be,” Prof Kingston said.

Should they pull off a win, Miyuki Fujisaki, the care-home sector worker, warns they will have to do more than just pay lip service to change.

“I want them to show us what they are going to do so this [the scandals] doesn’t happen again,” she said. “They need to prove it - not just say it like they often do in the election time.”


"""

In [13]:
# print(len(article_text))

In [14]:
# print(summarize_long_text(article_text))

In [15]:

# # 파이프라인 실행
# translated_summary = summarize_and_translate(article_text)
# print("번역된 요약문:", translated_summary)


In [16]:
!pip install transformers huggingface_hub



In [21]:
from huggingface_hub import notebook_login

notebook_login()  # API 토큰을 입력하면 됩니다.


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [23]:
from huggingface_hub import HfApi, HfFolder, Repository

summarizer_model_repo = "EXP442/pegasus_summarizer"
translator_model_repo = "EXP442/seq2seq_translator"

# Summarizer 모델 업로드
api = HfApi()
api.upload_folder(folder_path=summarizer_model_name, repo_id=summarizer_model_repo)

# Translator 모델 업로드
api.upload_folder(folder_path=translator_model_name, repo_id=translator_model_repo)

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.24k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.24k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/837M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/EXP442/seq2seq_translator/commit/af3fa16be5e679cd2b587f420a2799e1019b3dd4', commit_message='Upload folder using huggingface_hub', commit_description='', oid='af3fa16be5e679cd2b587f420a2799e1019b3dd4', pr_url=None, pr_revision=None, pr_num=None)