# 비 지도 학습 기반 감성 분석 소개
## SentiWordNet을 이용한 Sentiment Analysis
* WordNet Synset과 SentiWordNet SentiSynset 클래스의 이해

In [18]:
from nltk.corpus import wordnet as wn
import pandas as pd

In [19]:
term = 'sun'
# 'present'라는 단어로 wordnet의 synsets(단어의 의미와 구성 및 품사, 유사어 등을 
# 저장하고 있는 객체)생성
synsets = wn.synsets(term)
print('synsets() 반환 type : ', type(synsets))
print('synsets() 반환 값 개수 : ', len(synsets))
print('synsets() 반환 값 : ', synsets)

synsets() 반환 type :  <class 'list'>
synsets() 반환 값 개수 :  7
synsets() 반환 값 :  [Synset('sun.n.01'), Synset('sunlight.n.01'), Synset('sun.n.03'), Synset('sun.n.04'), Synset('sunday.n.01'), Synset('sun.v.01'), Synset('sun.v.02')]


In [20]:
for synset in synsets:
    print('#### synset 이름 : ', synset.name(),'####')  # 해당 단어
    print('POS : ', synset.lexname())  # 해당의 형태(명사 동사 등)
    print('Definition:', synset.definition())  # 해당 단어의 정의
    print('Lemmas:', synset.lemma_names()) # 해당 단어의 유사어

#### synset 이름 :  sun.n.01 ####
POS :  noun.object
Definition: the star that is the source of light and heat for the planets in the solar system
Lemmas: ['sun', 'Sun']
#### synset 이름 :  sunlight.n.01 ####
POS :  noun.phenomenon
Definition: the rays of the sun
Lemmas: ['sunlight', 'sunshine', 'sun']
#### synset 이름 :  sun.n.03 ####
POS :  noun.person
Definition: a person considered as a source of warmth or energy or glory etc
Lemmas: ['sun']
#### synset 이름 :  sun.n.04 ####
POS :  noun.object
Definition: any star around which a planetary system revolves
Lemmas: ['sun']
#### synset 이름 :  sunday.n.01 ####
POS :  noun.time
Definition: first day of the week; observed as a day of rest and worship by most Christians
Lemmas: ['Sunday', "Lord's_Day", 'Dominicus', 'Sun']
#### synset 이름 :  sun.v.01 ####
POS :  verb.body
Definition: expose one's body to the sun
Lemmas: ['sun', 'sunbathe']
#### synset 이름 :  sun.v.02 ####
POS :  verb.perception
Definition: expose to the rays of the sun or affect by ex

In [21]:
#synset 객체를 단어별로 생성합니다
tree = wn.synset('tree.n.01')
lion = wn.synset('lion.n.01')
tiger = wn.synset('tiger.n.02')
cat = wn.synset('cat.n.01')
dog = wn.synset('dog.n.01')
print(tree.definition())
print()
print(tiger.definition())

a tall perennial woody plant having a main trunk and branches forming a distinct elevated crown; includes both gymnosperms and angiosperms

large feline of forests in most of Asia having a tawny coat with black stripes; endangered


In [22]:
print(tree.name())

tree.n.01


In [23]:
print(tree.name().split('.')[0])

tree


In [24]:
entities = [tree, lion, tiger, cat, dog]
entity_names = [ entity.name().split('.')[0] for entity in entities]
print(entity_names)

['tree', 'lion', 'tiger', 'cat', 'dog']


In [25]:
print(entities[1].path_similarity(entities[2],2))
# 각 단어끼리의 유사성 체크

0.3333333333333333


In [26]:
# entities[0]으로 전체 데이터와 유사도 측정 : 결과가 리스트
# entities[1]으로 전체 데이터와 유사도 측정 : 결과가 리스트
# entities[2]으로 전체 데이터와 유사도 측정 : 결과가 리스트
# entities[3]으로 전체 데이터와 유사도 측정 : 결과가 리스트
# entities[4]으로 전체 데이터와 유사도 측정 : 결과가 리스트
# 결과 리스트들을 별도의 리스트에 appned
similarities = []
for en1 in entities:
    similarity = [round(en1.path_similarity(en2), 2) for en2 in entities]
    similarities.append(similarity)
print(similarities)

[[1.0, 0.07, 0.07, 0.08, 0.12], [0.07, 1.0, 0.33, 0.25, 0.17], [0.07, 0.33, 1.0, 0.25, 0.17], [0.08, 0.25, 0.25, 1.0, 0.2], [0.12, 0.17, 0.17, 0.2, 1.0]]


In [27]:
similarity_df = pd.DataFrame(similarities, columns=entity_names, index=entity_names)
similarity_df

Unnamed: 0,tree,lion,tiger,cat,dog
tree,1.0,0.07,0.07,0.08,0.12
lion,0.07,1.0,0.33,0.25,0.17
tiger,0.07,0.33,1.0,0.25,0.17
cat,0.08,0.25,0.25,1.0,0.2
dog,0.12,0.17,0.17,0.2,1.0


In [32]:
import nltk 
from nltk.corpus import sentiwordnet as swn

In [34]:
father = swn.senti_synset('father.n.01')
print('father 긍정감성 지수 :', father.pos_score())
print('father 부정감성 지수 :', father.neg_score())
print('father 객관성 지수 :', father.obj_score())
print()
fabulous = swn.senti_synset('fabulous.a.01')
print('fabulous 긍정감성 지수 :', fabulous.pos_score())
print('fabulous 부정감성 지수 :', fabulous.neg_score())
print('fabulous 객관성 지수 :', fabulous.obj_score())

father 긍정감성 지수 : 0.0
father 부정감성 지수 : 0.0
father 객관성 지수 : 1.0

fabulous 긍정감성 지수 : 0.875
fabulous 부정감성 지수 : 0.125
fabulous 객관성 지수 : 0.0
