### RNN-LSTM 기반의 한국어 감정 분석기
##### 네이버 영화 리뷰(NSMC)

In [3]:
import pandas as pd
import re
from konlpy.tag import Okt
from soynlp.normalizer import *
from pykospacing import spacing
from hanspell import spell_checker
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model

In [4]:
tokenizer = Tokenizer()
okt = Okt()

stopwords = ['은','는','이','가','을','다','를','것','안','만','거','적','한','로','나','점','인','수','내','못','고','왜',
             '그','말','때','듯','요','볼','중','좀','아','뭐','네','걸','번','건','줄','년','전','저','기','지','끝','용',
             '분','데','난','라','별','알','편','야','두','또','임','여','일','면','속','애','성','엔','서','랑','제','씨',
             '냐','함','하','뿐','자','영','시','후','어','몇','눈','신','놈','감','남','준','위','살','간','명','뭔','움',
             '봄','삶','영화','에서','이가','이고','이렇다','그렇다','이렇게','그렇게']

max_len = 25

In [5]:
def sentiment_predict(new_sentence):
    
    new_sentence = re.sub(r"[^ㄱ-ㅎㅏ-ㅣ가-힣 ]", "", new_sentence) # 전처리(한글, 공백 제외한 문자 제거)
    new_sentence = repeat_normalize(new_sentence, num_repeats=1) # 반복 문자
    new_sentence = spacing(new_sentence) # 띄어쓰기
    
    try:
        new_sentence = spell_checker.check(new_sentence).checked # 맞춤법 교정
    except:
        pass
    
    new_sentence = okt.morphs(new_sentence, stem=True) # 토큰화
    new_sentence = [word for word in new_sentence if not word in stopwords] # 불용어 제거
    encoded = tokenizer.texts_to_sequences([new_sentence]) # 정수 인코딩
    pad_new = pad_sequences(encoded, maxlen = max_len) # 패딩
    score = float(loaded_model.predict(pad_new)) # 예측
    
    result = 0  
    
    if(score > 0.5):
        result = 1
    else:
        result = 0
    return result

In [None]:
loaded_model = load_model('nsmc_best_model.h5')

eval_data = pd.read_csv('./[전처리]ko_data.csv', engine='python', encoding='utf-8')
eval_data['Predicted'] = 0

for i in range(len(eval_data)):
    Predicted = sentiment_predict(eval_data['Sentence'][i])
    eval_data['Predicted'][i] = Predicted
    
eval_data.drop(['Sentence'], axis='columns', inplace=True)
eval_data.to_csv("result_2019512014_이동환.csv", index=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eval_data['Predicted'][i] = Predicted
