# 모델사용

In [None]:
import pandas as pd
from konlpy.tag import Okt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# 데이터 로드


# 감성 분류 레이블이 있는 경우 사용 (예: 'SentimentCategory' 컬럼에 'Positive', 'Negative', 'Neutral')
# 만약 없으면 추가로 라벨링이 필요함
df = post_df[['Title', 'SentimentCategory']].dropna()

# 텍스트 전처리
okt = Okt()
df['tokenized'] = df['Title'].apply(lambda x: ' '.join(okt.morphs(x)))

# 훈련/테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(df['tokenized'], df['SentimentCategory'], test_size=0.2, random_state=42)

# 벡터화
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# 모델 훈련
model = MultinomialNB()
model.fit(X_train_vec, y_train)

# 예측
y_pred = model.predict(X_test_vec)

# 성능 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Classification Report:\n{report}')
df_temp_post_df

# 발표용 말뭉치 준비

In [None]:
import pandas as pd

import os
import numpy as np
from datetime import datetime, timedelta
post_path = "/Users/admin/softeer/project/0814/post/"
post_files = glob.glob(os.path.join(post_path, "*.csv"))
post_df = pd.concat([pd.read_csv(f) for f in post_files], ignore_index=True)
post_df['DateTime'] = pd.to_datetime(post_df['DateTime'], format='mixed')
post_df=post_df[['URL',	'Title',	'DateTime',	'ViewCount',	'Content',	'CarName']]

import json
import pandas as pd
from konlpy.tag import Okt

class KnuSL:
    def __init__(self, json_path):
        with open(json_path, encoding='utf-8-sig', mode='r') as f:
            self.data = json.load(f)

    def data_list(self, wordname):
        result = ['None', 'None']
        for entry in self.data:
            if entry['word'] == wordname:
                result = [entry['word_root'], entry['polarity']]
                break
        
        return result[0], result[1]

# 감성 분석 함수 정의
def sentiment_analysis(text, knusl):
    okt = Okt()
    if not isinstance(text, str):  # 텍스트가 문자열인지 확인
        return 'Neutral'  # 문자열이 아닌 경우 'Neutral'로 처리

    tokenized = okt.morphs(text)
    polarity_score = 0
    
    for word in tokenized:
        _, polarity = knusl.data_list(word)
        if polarity != 'None':
            polarity_score += int(polarity)
    
    if polarity_score > 0:
        return 'Positive'
    elif polarity_score < 0:
        return 'Negative'
    else:
        return 'Neutral'

# KNU 감성 사전 로드
ksl = KnuSL('/Users/admin/softeer/project/0821/SentiWord_info.json')


# 제목 열에 감성 분석 적
post_df['SentimentCategory'] = post_df['Content'].apply(lambda x: sentiment_analysis(x, ksl))

# 결과 출력
print(post_df[['Content', 'SentimentCategory']])
