# Chapter 8. 감성 분석



## 8.1 감성분석의 이해

## 8.2 감성 사전을 이용한 영화 리뷰 감성 분석

In [2]:
import nltk
nltk.download('movie_reviews')
from nltk.corpus import movie_reviews

print('#Review count:', len(movie_reviews.fileids()))
print('#Samples of file ids:', movie_reviews.fileids()[:10])
print('#Categories of reviews:', movie_reviews.categories())
print('#Num of "neg" reviews:', len(movie_reviews.fileids(categories='neg')))
print('#Num of "pos" reviews:', len(movie_reviews.fileids(categories='pos')))

fileid = movie_reviews.fileids()[0]
print('#Id of the first review:', fileid)
print('#Part of the first review:', movie_reviews.raw(fileid)[:500])
print('#Sentiment of the first review:', movie_reviews.categories(fileid))

fileids = movie_reviews.fileids()
reviews = [movie_reviews.raw(fileid) for fileid in fileids]
categories = [movie_reviews.categories(fileid)[0] for fileid in fileids]

#Review count: 2000
#Samples of file ids: ['neg/cv000_29416.txt', 'neg/cv001_19502.txt', 'neg/cv002_17424.txt', 'neg/cv003_12683.txt', 'neg/cv004_12641.txt', 'neg/cv005_29357.txt', 'neg/cv006_17022.txt', 'neg/cv007_4992.txt', 'neg/cv008_29326.txt', 'neg/cv009_29417.txt']
#Categories of reviews: ['neg', 'pos']
#Num of "neg" reviews: 1000
#Num of "pos" reviews: 1000
#Id of the first review: neg/cv000_29416.txt
#Part of the first review: plot : two teen couples go to a church party , drink and then drive . 
they get into an accident . 
one of the guys dies , but his girlfriend continues to see him in her life , and has nightmares . 
what's the deal ? 
watch the movie and " sorta " find out . . . 
critique : a mind-fuck movie for the teen generation that touches on a very cool idea , but presents it in a very bad package . 
which is what makes this review an even harder one to write , since i generally applaud films which attempt
#Sentiment of the first review: ['neg']


[nltk_data] Downloading package movie_reviews to
[nltk_data]     /Users/rose/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!


### TextBlob을 이용한 감성 분석

In [3]:
from textblob import TextBlob

result = TextBlob(reviews[0])
print(result.sentiment)

Sentiment(polarity=0.06479782948532947, subjectivity=0.5188408350908352)


In [6]:
#텍스트 리스트를 받아서 polarity의 결과를 데이터에 맞게 반환
def sentiment_TextBlob(docs):
    results = []
    for doc in docs:
        testimonial = TextBlob(doc)
        if testimonial.sentiment.polarity > 0:
            results.append('pos')
        else:
            results.append('neg')
    return results

In [7]:
from sklearn.metrics import accuracy_score

print('#TextBlob을 이용한 리뷰 감성분석의 정확도:', accuracy_score(categories, sentiment_TextBlob(reviews)))

#TextBlob을 이용한 리뷰 감성분석의 정확도: 0.6


### AFINN을 이용한 감성 분석

In [8]:
from afinn import Afinn

def sentiment_Afinn(docs):
    afn = Afinn(emoticons=True)
    results = []
    
    for doc in docs:
        if afn.score(doc) > 0:
            results.append('pos')
        else:
            results.append('neg')
    return results

print('#Afinn을 이용한 리뷰 감성분석의 정확도:', accuracy_score(categories, sentiment_Afinn(reviews)))

#Afinn을 이용한 리뷰 감성분석의 정확도: 0.664


### VADER를 이용한 감성 분석

In [9]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

def sentiment_vader(docs):
    analyser = SentimentIntensityAnalyzer()
    results = []
    for doc in docs:
        score = analyser.polarity_scores(doc)
        if score['compound'] > 0:
            results.append('pos')
        else:
            results.append('neg')
    return results
print('#Vader을 이용한 리뷰 감성분석의 정확도:', accuracy_score(categories, sentiment_vader(reviews)))

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/rose/nltk_data...


#Vader을 이용한 리뷰 감성분석의 정확도: 0.635


### 한글 감성사전

1: https://github.com/park1200656/KnuSentiLex   
2: https://github.com/mrlee23/KoreanSentimentAnalyzer

## 8.3 학습을 통한 머신러닝 기반의 감성 분석