In [1]:
# 가중합 예측
import json
import pandas as pd
import string
import pickle
import numpy as np
from konlpy.tag import Okt
from nltk import FreqDist
from nltk.classify import NaiveBayesClassifier

# 저장된 모델 로드
with open('naive_bayes_classifier.pkl', 'rb') as model_file:
    classifier = pickle.load(model_file)

# 학습 때 사용한 단어 특징을 로드
with open('word_features.pkl', 'rb') as wf_file:
    word_features = pickle.load(wf_file)

# Okt 객체 생성
okt = Okt()

# 특성 추출 함수 정의
def preprocess_text(text):
    text = text.translate(str.maketrans('', '', string.punctuation))
    words = okt.pos(text, stem=True)
    adjectives = [word for word, pos in words if pos == 'Adjective']
    return adjectives

def document_features(document):
    document_words = set(document)
    features = {}
    for word in word_features:
        features[f'contains({word})'] = (word in document_words)
    return features

# 새로운 데이터 예측 함수
def predict_sentiment(review):
    processed_review = preprocess_text(review)
    features = document_features(processed_review)
    predicted_sentiment = classifier.prob_classify(features)
    sentiment_scores = {
        'very positive': float(predicted_sentiment.prob('very positive')),
        'positive': float(predicted_sentiment.prob('positive')),
        'neutral': float(predicted_sentiment.prob('neutral')),
        'negative': float(predicted_sentiment.prob('negative')),
        'very negative': float(predicted_sentiment.prob('very negative'))
    }
    return sentiment_scores

# input 로드하는 것 문장 단위도 가능.
with open('Naver_Review1000.json', 'r', encoding='utf-8') as file:
    new_data = json.load(file)

# 예측 수행
# 여기서 output 양식을 바꾸고 수행하면 될듯.
results = []
for item in new_data:
    review_text = item['body']
    predicted_scores = predict_sentiment(review_text)
    result = {
        'review': review_text,
        # 'detailed_scores': predicted_scores, # 자세한 스코어
        'predicted_score' :  (
    predicted_scores['very positive'] * 5 +
    predicted_scores['positive'] * 4 +
    predicted_scores['neutral'] * 3 +
    predicted_scores['negative'] * 2 +
    predicted_scores['very negative'] * 1
)
    }
    results.append(result)

# 결과를 JSON 파일로 저장 => 결과는 body, score 형식으로 나감
with open('FINAL.json', 'w', encoding='utf-8') as json_file:
    json.dump(results, json_file, indent=4, ensure_ascii=False)

print("Sentiment analysis results for new data saved to New_Review_Results.json")


Sentiment analysis results for new data saved to New_Review_Results.json


In [3]:
# 그냥 1점 ~ 5점 분류
import json
import pandas as pd
import string
import pickle
import numpy as np
from konlpy.tag import Okt
from nltk import FreqDist
from nltk.classify import NaiveBayesClassifier

with open('naive_bayes_classifier.pkl', 'rb') as model_file:
    classifier = pickle.load(model_file)

with open('word_features.pkl', 'rb') as wf_file:
    word_features = pickle.load(wf_file)

def score_to_sentiment(score):
    if score == 'very positive':
        return 5
    elif score == 'positive':
        return 4
    elif score == 'neutral':
        return 3
    elif score == 'negative':
        return 2
    else:
        return 1
    
    
# Okt 객체 생성
okt = Okt()

# 특성 추출 함수 정의
def preprocess_text(text):
    text = text.translate(str.maketrans('', '', string.punctuation))
    words = okt.pos(text, stem=True)
    adjectives = [word for word, pos in words if pos == 'Adjective']
    return adjectives

def document_features(document):
    document_words = set(document)
    features = {}
    for word in word_features:
        features[f'contains({word})'] = (word in document_words)
    return features

# 새로운 데이터 예측 함수
def predict_sentiment(review):
    processed_review = preprocess_text(review)
    features = document_features(processed_review)
    predicted_sentiment = classifier.prob_classify(features)
    sentiment_scores = {
        'very positive': float(predicted_sentiment.prob('very positive')),
        'positive': float(predicted_sentiment.prob('positive')),
        'neutral': float(predicted_sentiment.prob('neutral')),
        'negative': float(predicted_sentiment.prob('negative')),
        'very negative': float(predicted_sentiment.prob('very negative'))
    }
    predicted_scores = max(sentiment_scores,key=sentiment_scores.get)
    return predicted_scores

# input 로드하는 것 문장 단위도 가능.
with open('Naver_Review1000.json', 'r', encoding='utf-8') as file:
    new_data = json.load(file)

# 예측 수행
# 여기서 output 양식을 바꾸고 수행하면 될듯.
results = []
for item in new_data:
    review_text = item['body']
    predicted_scores = predict_sentiment(review_text)
    result = {
        'review': review_text,
        # 'detailed_scores': predicted_scores, # 자세한 스코어
        'predicted_score' :  score_to_sentiment(predicted_scores)
    }
    results.append(result)

# 결과를 JSON 파일로 저장 => 결과는 body, score 형식으로 나감
with open('FINAL.json', 'w', encoding='utf-8') as json_file:
    json.dump(results, json_file, indent=4, ensure_ascii=False)

print("결과가 FINAL에 저장되었음.")


Sentiment analysis results for new data saved to New_Review_Results.json
