In [None]:
import kss
import csv
from transformers import AutoTokenizer, AutoModel
import openpyxl
from konlpy.tag import Komoran
from sklearn.feature_extraction.text import CountVectorizer

# 텍스트 요약 함수
def summarize_text(text, num_sentences=3):
    if text == "":
        return "NAN"
    sentences = kss.split_sentences(text)
    tokenizer = AutoTokenizer.from_pretrained("beomi/kcbert-base")
    model = AutoModel.from_pretrained("beomi/kcbert-base")

    inputs = tokenizer(sentences, return_tensors='pt', padding=True, truncation=True)
    outputs = model(**inputs)
    sentence_embeddings = outputs.last_hidden_state.mean(dim=1)

    sentence_scores = sentence_embeddings.sum(dim=1)
    top_indices = sentence_scores.argsort(descending=True)[:num_sentences]

    summary = [sentences[i] for i in top_indices]
    return ' '.join(summary)

# 키워드 추출 함수
def extract_keywords(text):
    if text == "":
        return "NAN"
    komoran = Komoran()
    pos_tags = komoran.pos(text)
    keywords = [word for word, pos in pos_tags if pos in ['NNG', 'NNP', 'VV', 'VA']]
    return keywords

# 텍스트 읽기 함수
def read_csv_column(file_path, column_index):
    strings = []
    with open(file_path, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        for row in reader:
            if len(row) > column_index:
                strings.append(row[column_index])
    return strings

# 메인
file_path = r'C:\Users\Minho\Desktop\project\missonf.csv'  # CSV 파일의 경로
column_index = 15  # p번째 열의 인덱스 (0부터 시작)

column_strings = read_csv_column(file_path, column_index)

# 요약 및 키워드 추출된 텍스트 저장할 엑셀 파일 생성
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "Summary"

for index in range(2, len(column_strings)):
    original_text = column_strings[index]
    summary_text = summarize_text(original_text)
    keywords = extract_keywords(original_text)

    ws.cell(row=index+1, column=1, value=original_text)
    ws.cell(row=index+1, column=2, value=summary_text)
    ws.cell(row=index+1, column=3, value=", ".join(keywords))

output_file_path = "summary_with_keywords.xlsx"
wb.save(output_file_path)
wb.close()

print("요약된 텍스트와 핵심어가 %s 파일에 저장되었습니다." % output_file_path)
