#

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import torch
from tqdm.auto import tqdm
import random
import os

def reset_seeds(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

DATA_PATH = "/content/drive/MyDrive/멀티캠퍼스 자료/Machine Learning/data/"
SEED = 42

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [4]:
train = pd.read_csv(f"{DATA_PATH}kkm_세종말뭉치_통계조회.csv")

In [5]:
train

Unnamed: 0,형태소,품사,문어_횟수,문어_비율,문어_파일,문어_의미,구어_횟수,구어_의미,구어_파일,구어_의미.1,전체_횟수,전체_비율,전체_파일,전체_의미
0,없,VA,101131,0.34%,445,1,4320,0.27%,195,0,105451,0.33%,640,1
1,같,VA,51514,0.17%,444,0,5226,0.32%,196,0,56740,0.18%,640,0
2,그렇,VA,31843,0.11%,432,0,4244,0.26%,194,0,36087,0.11%,626,0
3,많,VA,24911,0.08%,444,0,1677,0.10%,175,0,26588,0.08%,619,0
4,크,VA,25659,0.09%,446,0,758,0.05%,162,0,26417,0.08%,608,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1820,퍼르스름하,VA,1,0.00%,1,0,0,0.00%,0,0,1,0.00%,1,0
1821,헐수할수없,VA,1,0.00%,1,0,0,0.00%,0,0,1,0.00%,1,0
1822,호리호리하,VA,1,0.00%,1,0,0,0.00%,0,0,1,0.00%,1,0
1823,흔하디흔하,VA,1,0.00%,1,0,0,0.00%,0,0,1,0.00%,1,0


In [7]:
pd.DataFrame(train['형태소'])

Unnamed: 0,형태소
0,없
1,같
2,그렇
3,많
4,크
...,...
1820,퍼르스름하
1821,헐수할수없
1822,호리호리하
1823,흔하디흔하


In [10]:
train['형태소']

total_va_list = []

for i in train['형태소']:
    total_va_list.append(i)


- 형용사 형태소 1825개 -> 명사화

In [13]:
total_va_list

['없',
 '같',
 '그렇',
 '많',
 '크',
 '좋',
 '어떻',
 '이렇',
 '있',
 '새롭',
 '다르',
 '어렵',
 '높',
 '쉽',
 '작',
 '길',
 '아름답',
 '깊',
 '어리',
 '힘들',
 '젊',
 '멀',
 '가깝',
 '적',
 '심하',
 '낮',
 '아프',
 '빠르',
 '나쁘',
 '싫',
 '늦',
 '넓',
 '무섭',
 '짧',
 '커다랗',
 '엄청나',
 '지나치',
 '희',
 '이러하',
 '검',
 '어둡',
 '옳',
 '괜찮',
 '똑같',
 '밝',
 '바쁘',
 '가볍',
 '뜨겁',
 '붉',
 '하얗',
 '고맙',
 '예쁘',
 '푸르',
 '무겁',
 '수많',
 '맑',
 '즐겁',
 '좁',
 '부드럽',
 '재미있',
 '약하',
 '낯설',
 '편하',
 '낫',
 '틀림없',
 '비싸',
 '강하',
 '슬프',
 '춥',
 '놀랍',
 '곱',
 '짙',
 '뛰어나',
 '낡',
 '드물',
 '거칠',
 '안타깝',
 '부끄럽',
 '기쁘',
 '바르',
 '두렵',
 '반갑',
 '맛있',
 '흔하',
 '착하',
 '차갑',
 '싸',
 '조그맣',
 '외롭',
 '올바르',
 '빨갛',
 '멋지',
 '날카롭',
 '급하',
 '친하',
 '저렇',
 '굵',
 '굳',
 '이르',
 '아쉽',
 '느리',
 '환하',
 '더럽',
 '그립',
 '뒤늦',
 '괴롭',
 '덥',
 '귀엽',
 '아무렇',
 '우습',
 '가늘',
 '알맞',
 '어리석',
 '귀하',
 '둥글',
 '부럽',
 '노랗',
 '귀찮',
 '재밌',
 '별다르',
 '세',
 '까맣',
 '그러하',
 '두껍',
 '어지럽',
 '거세',
 '다름없',
 '차',
 '흐리',
 '파랗',
 '밉',
 '아깝',
 '시끄럽',
 '눈부시',
 '잦',
 '얇',
 '못지않',
 '진하',
 '힘겹',
 '손쉽',
 '점잖',
 '고프',
 '까다롭',
 '힘차',
 '폭넓',
 

In [37]:
def adjective_to_noun(adjective_list):
    results = []
    for adjective in adjective_list:
        last_char = adjective[-1]

        if (ord(last_char) - 44032) % 28 != 0: #받침이 있다면 음을 더하고
            result = adjective + "음"
        else:
            last_jamo = chr(ord(last_char) + 16) #없다면 마지막 글자에 ㅁ 받침을 추가
            result = adjective[:-1] + last_jamo

        results.append(result)
    return results

In [39]:
total_nva_list = adjective_to_noun(total_va_list)
total_nva_list

['없음',
 '같음',
 '그렇음',
 '많음',
 '큼',
 '좋음',
 '어떻음',
 '이렇음',
 '있음',
 '새롭음',
 '다름',
 '어렵음',
 '높음',
 '쉽음',
 '작음',
 '길음',
 '아름답음',
 '깊음',
 '어림',
 '힘들음',
 '젊음',
 '멀음',
 '가깝음',
 '적음',
 '심함',
 '낮음',
 '아픔',
 '빠름',
 '나쁨',
 '싫음',
 '늦음',
 '넓음',
 '무섭음',
 '짧음',
 '커다랗음',
 '엄청남',
 '지나침',
 '흼',
 '이러함',
 '검음',
 '어둡음',
 '옳음',
 '괜찮음',
 '똑같음',
 '밝음',
 '바쁨',
 '가볍음',
 '뜨겁음',
 '붉음',
 '하얗음',
 '고맙음',
 '예쁨',
 '푸름',
 '무겁음',
 '수많음',
 '맑음',
 '즐겁음',
 '좁음',
 '부드럽음',
 '재미있음',
 '약함',
 '낯설음',
 '편함',
 '낫음',
 '틀림없음',
 '비쌈',
 '강함',
 '슬픔',
 '춥음',
 '놀랍음',
 '곱음',
 '짙음',
 '뛰어남',
 '낡음',
 '드물음',
 '거칠음',
 '안타깝음',
 '부끄럽음',
 '기쁨',
 '바름',
 '두렵음',
 '반갑음',
 '맛있음',
 '흔함',
 '착함',
 '차갑음',
 '쌈',
 '조그맣음',
 '외롭음',
 '올바름',
 '빨갛음',
 '멋짐',
 '날카롭음',
 '급함',
 '친함',
 '저렇음',
 '굵음',
 '굳음',
 '이름',
 '아쉽음',
 '느림',
 '환함',
 '더럽음',
 '그립음',
 '뒤늦음',
 '괴롭음',
 '덥음',
 '귀엽음',
 '아무렇음',
 '우습음',
 '가늘음',
 '알맞음',
 '어리석음',
 '귀함',
 '둥글음',
 '부럽음',
 '노랗음',
 '귀찮음',
 '재밌음',
 '별다름',
 '셈',
 '까맣음',
 '그러함',
 '두껍음',
 '어지럽음',
 '거셈',
 '다름없음',
 '참',
 '흐림',
 '파랗음',
 '밉음',
 '아깝음',
 

- 성격단어 200개

In [22]:
personality_vocab = ['당당함',
 '상상력',
 '재치있음',
 '생산적임',
 '활발함',
 '철학적임',
 '환상적임',
 '아이러니함',
 '도전정신이강함',
 '자신에게강함',
 '아트워크',
 '유순함',
 '자기주도적임',
 '자유분방함',
 '정교함',
 '협동적임',
 '유머감각이있음',
 '관용적임',
 '솔직함',
 '감수성',
 '고요함',
 '천재적임',
 '자유로움',
 '책임감강함',
 '대화력이있음',
 '지적임',
 '눈부심',
 '믿음직스러움',
 '적극적임',
 '우스꽝스럼',
 '자부심있음',
 '친절함',
 '관능적임',
 '영감적임',
 '주목받음',
 '자연스러움',
 '자부심',
 '참을성있음',
 '영감을줄줄앎',
 '믿음직스러움',
 '도발적임',
 '명랑함',
 '지도력있음',
 '노골적임',
 '책임감',
 '매혹적임',
 '뛰어남',
 '예리함',
 '자기주도적임',
 '감성적임',
 '인내심',
 '인내력있음',
 '쿨함',
 '명랑함',
 '미묘함',
 '민첩성',
 '동의적임',
 '호감을줄줄앎',
 '낙천적임',
 '사실적임',
 '정직함',
 '따뜻함',
 '멋있음',
 '카리스마있음',
 '화사함',
 '관대함',
 '주인공같음',
 '매력을표출함',
 '사랑스러움',
 '항상배우려함',
 '적응력이있음',
 '자신을잘표현함',
 '융통성있음',
 '자기표현이뛰어남',
 '예술적임',
 '대담함',
 '노력함',
 '유연함',
 '청순함',
 '사랑스러움',
 '생동감있음',
 '탁월함',
 '비현실적임',
 '자존감이높음',
 '성취지향적임',
 '현대적임',
 '미모',
 '호감을줌',
 '로맨틱함',
 '근면함',
 '적응력이좋음',
 '유머러스함',
 '사회적임',
 '대화력',
 '호기심이많음',
 '색다름',
 '질서정연함',
 '멋지게입음',
 '책임감있음',
 '남을도음',
 '인기있음',
 '유니크함',
 '우스꽝스러움',
 '도전적임',
 '낭만적임',
 '동정심이있음',
 '상냥함',
 '자기개발에힘씀',
 '통찰력이있음',
 '자기통제력이강함',
 '담담함',
 '꼼꼼함',
 '독립적임',
 '창의적임',
 '헌신적임',
 '귀여움',
 '특별함',
 '능숙함',
 '소통능력이뛰어남',
 '온화함',
 '소화력이뛰어남',
 '흥미로움',
 '정직함',
 '친화력있음',
 '예민함',
 '주목할만함',
 '독창적임',
 '수용력있음',
 '역사적임',
 '낙천적임',
 '열정적임',
 '화려함',
 '사교적임',
 '협력적임',
 '공감능력이있음',
 '매력적임',
 '시각적임',
 '진지함',
 '배려심',
 '진취적임',
 '민감함',
 '풍부함',
 '견실함',
 '소극적임',
 '창의적임',
 '참을성',
 '우아함',
 '매력넘침',
 '세심함',
 '유머감각',
 '사이좋음',
 '추상적임',
 '경이로움',
 '차분함',
 '스타일리시함',
 '남을도울줄앎',
 '참을성이있음',
 '소극적임',
 '헌신적임',
 '사교성이뛰어남',
 '재능있음',
 '열정적임',
 '내향적임',
 '아름다움',
 '겸손함',
 '침착함',
 '섬세함',
 '성실함',
 '강인함',
 '의지가강함',
 '친절함',
 '지도력이있음',
 '대화가잘통함',
 '신비로움',
 '웅장함',
 '자아성찰적임',
 '자부심강함',
 '몽환적임',
 '정교함',
 '미소가매력적임',
 '감각적임',
 '성실함',
 '자기개발을소중히여김',
 '강인함',
 '포근함',
 '카리스마',
 '비주얼',
 '소심함',
 '지능적임',
 '적응력',
 '현명함',
 '사람들과잘어울림',
 '우아함',
 '강렬함',
 '자신을믿음',
 '동적임',
 '공감능력',
 '진귀함',
 '성공적임',
 '자신감',
 '외향적임',
 '시선을사로잡음',
 '활기참',
 '유쾌함',
 '통찰력있음',
 '세련됨',
 '감수성이풍부함',
 '소통능력이좋음',
 '놀라움',
 '화려함',
 '빛남',
 '협력적임',
 '독립적임',
 '동질적임',
 '행복감을줌',
 '신뢰할만함',
 '인기많음',
 '시크함',
 '목표지향적임',
 '긍정적임',
 '독특함',
 '절제력',
 '사려깊음',
 '믿음직함',
 '도움이필요함',
 '냉정함',
 '자아성찰',
 '감성적임',
 '주도적임',
 '자기분석적임',
 '온화함',
 '대화력이뛰어남',
 '인상적임',
 '깊이있음',
 '눈길을끔',
 '멋짐',
 '감동적임',
 '탐구적임',
 '소심하지않음',
 '사려깊음',
 '대담함',
 '자유로움',
 '성취지향적임',
 '자신감넘침',
 '영감적임',
 '순종적임',
 '꼼꼼함',
 '우정있음',
 '담대함',
 '멋부림',
 '열심',
 '놀라움',
 '명석함',
 '표현력이풍부함',
 '풍부한상상력을가짐',
 '고집스러움',
 '태연함',
 '자연스러움',
 '용기있음',
 '유머감각이뛰어남',
 '견고함',
 '배울욕구가있음',
 '흥미로움',
 '자기주장이강함',
 '훌륭함',
 '다재다능함',
 '트렌디함',
 '매끄러움',
 '매뉴얼적임',
 '용기있음',
 '근면함',
 '역동적임',
 '책임감이강함',
 '무거움',
 '겸손함',
 '사회적임',
 '배려해줌',
 '자신감있음',
 '빠른대처능력이있음',
 '자신감이넘침',
 '자기표현적임',
 '친근함',
 '냉철함',
 '섹시함',
 '신중함',
 '냉정함',
 '열성적임',
 '민첩함',
 '차분함',
 '매사에적극적임',
 '애정어림',
 '자발적임']


In [40]:
len(total_nva_list), len(personality_vocab)

(1825, 292)

In [41]:
# word2vec
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

train_vocab = personality_vocab + total_nva_list

vectorizer = CountVectorizer(binary=True)
tri = vectorizer.fit_transform(train_vocab)

tri_word2vec = cosine_similarity(tri[10], tri[292:])

print(tri_word2vec)
tri_word2vec.sum()


[[0. 0. 0. ... 0. 0. 0.]]


0.0

In [42]:
for i in range(len(personality_vocab)):
    tri_word2vec = cosine_similarity(tri[i], tri[len(personality_vocab):])
    print(tri_word2vec.sum())


0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
1.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0


In [44]:
tri_word2vec

array([[0., 0., 0., ..., 0., 0., 0.]])

In [48]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

train_list = personality_vocab
vocab = total_nva_list


train_vocab = train_list + vocab

vectorizer = CountVectorizer(binary=True)
tri = vectorizer.fit_transform(train_vocab)

tri_vec2word_list = []
for i in range(len(train_list)):
    tri_word2vec = cosine_similarity(tri[i], tri[len(train_list):])
    tri_vectors = [j for j, score in enumerate(tri_word2vec[0]) if score == 1.0]
    tri_vec2word = [vectorizer.get_feature_names_out()[j] for j in tri_vectors]
    tri_vec2word_list.append(tri_vec2word)

tri_vec2word_list = np.concatenate(tri_vec2word_list)
print(tri_vec2word_list)

['경사짐' '강팍함' '역겁음' '궃음' '맵디맵음' '빈틈없음' '구쁨' '뇌렇음' '깊음' '거름짐' '뜬금없음']


In [53]:
personality_vocab

['당당함',
 '상상력',
 '재치있음',
 '생산적임',
 '활발함',
 '철학적임',
 '환상적임',
 '아이러니함',
 '도전정신이강함',
 '자신에게강함',
 '아트워크',
 '유순함',
 '자기주도적임',
 '자유분방함',
 '정교함',
 '협동적임',
 '유머감각이있음',
 '관용적임',
 '솔직함',
 '감수성',
 '고요함',
 '천재적임',
 '자유로움',
 '책임감강함',
 '대화력이있음',
 '지적임',
 '눈부심',
 '믿음직스러움',
 '적극적임',
 '우스꽝스럼',
 '자부심있음',
 '친절함',
 '관능적임',
 '영감적임',
 '주목받음',
 '자연스러움',
 '자부심',
 '참을성있음',
 '영감을줄줄앎',
 '믿음직스러움',
 '도발적임',
 '명랑함',
 '지도력있음',
 '노골적임',
 '책임감',
 '매혹적임',
 '뛰어남',
 '예리함',
 '자기주도적임',
 '감성적임',
 '인내심',
 '인내력있음',
 '쿨함',
 '명랑함',
 '미묘함',
 '민첩성',
 '동의적임',
 '호감을줄줄앎',
 '낙천적임',
 '사실적임',
 '정직함',
 '따뜻함',
 '멋있음',
 '카리스마있음',
 '화사함',
 '관대함',
 '주인공같음',
 '매력을표출함',
 '사랑스러움',
 '항상배우려함',
 '적응력이있음',
 '자신을잘표현함',
 '융통성있음',
 '자기표현이뛰어남',
 '예술적임',
 '대담함',
 '노력함',
 '유연함',
 '청순함',
 '사랑스러움',
 '생동감있음',
 '탁월함',
 '비현실적임',
 '자존감이높음',
 '성취지향적임',
 '현대적임',
 '미모',
 '호감을줌',
 '로맨틱함',
 '근면함',
 '적응력이좋음',
 '유머러스함',
 '사회적임',
 '대화력',
 '호기심이많음',
 '색다름',
 '질서정연함',
 '멋지게입음',
 '책임감있음',
 '남을도음',
 '인기있음',
 '유니크함',
 '우스꽝스러움',
 '도전적임',
 '낭만적임',
 '동정심이있음',
 '상냥함',
 '자기개발에

In [54]:
total_nva_list

['없음',
 '같음',
 '그렇음',
 '많음',
 '큼',
 '좋음',
 '어떻음',
 '이렇음',
 '있음',
 '새롭음',
 '다름',
 '어렵음',
 '높음',
 '쉽음',
 '작음',
 '길음',
 '아름답음',
 '깊음',
 '어림',
 '힘들음',
 '젊음',
 '멀음',
 '가깝음',
 '적음',
 '심함',
 '낮음',
 '아픔',
 '빠름',
 '나쁨',
 '싫음',
 '늦음',
 '넓음',
 '무섭음',
 '짧음',
 '커다랗음',
 '엄청남',
 '지나침',
 '흼',
 '이러함',
 '검음',
 '어둡음',
 '옳음',
 '괜찮음',
 '똑같음',
 '밝음',
 '바쁨',
 '가볍음',
 '뜨겁음',
 '붉음',
 '하얗음',
 '고맙음',
 '예쁨',
 '푸름',
 '무겁음',
 '수많음',
 '맑음',
 '즐겁음',
 '좁음',
 '부드럽음',
 '재미있음',
 '약함',
 '낯설음',
 '편함',
 '낫음',
 '틀림없음',
 '비쌈',
 '강함',
 '슬픔',
 '춥음',
 '놀랍음',
 '곱음',
 '짙음',
 '뛰어남',
 '낡음',
 '드물음',
 '거칠음',
 '안타깝음',
 '부끄럽음',
 '기쁨',
 '바름',
 '두렵음',
 '반갑음',
 '맛있음',
 '흔함',
 '착함',
 '차갑음',
 '쌈',
 '조그맣음',
 '외롭음',
 '올바름',
 '빨갛음',
 '멋짐',
 '날카롭음',
 '급함',
 '친함',
 '저렇음',
 '굵음',
 '굳음',
 '이름',
 '아쉽음',
 '느림',
 '환함',
 '더럽음',
 '그립음',
 '뒤늦음',
 '괴롭음',
 '덥음',
 '귀엽음',
 '아무렇음',
 '우습음',
 '가늘음',
 '알맞음',
 '어리석음',
 '귀함',
 '둥글음',
 '부럽음',
 '노랗음',
 '귀찮음',
 '재밌음',
 '별다름',
 '셈',
 '까맣음',
 '그러함',
 '두껍음',
 '어지럽음',
 '거셈',
 '다름없음',
 '참',
 '흐림',
 '파랗음',
 '밉음',
 '아깝음',
 

In [56]:
from gensim.models import Word2Vec
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# 성격 단어 리스트와 모든 한국어 형용사 리스트 (1800개)를 합친 리스트를 생성합니다.
total_vocab = personality_vocab + total_nva_list

# Word2Vec 모델을 학습합니다.
model = Word2Vec(sentences=total_vocab, vector_size=100, window=5, min_count=1, workers=4)

# 성격 단어 리스트의 단어들과 모든 한국어 형용사 리스트의 단어들의 벡터를 추출합니다.
vectorizer = CountVectorizer(binary=True)
tri = vectorizer.fit_transform(total_vocab)
personality_vectors = tri[:len(personality_vocab)]
adjective_vectors = tri[len(personality_vocab):]

# 각 성격 단어에 대해 유사한 형용사들을 추출합니다.
similar_adjectives = {}
for idx, personality_word in enumerate(personality_vocab):
    personality_vector = personality_vectors[idx]
    cosine_similarities = cosine_similarity(personality_vector, adjective_vectors)
    similar_indices = cosine_similarities.argsort()[0][-500:]  # 상위 20개 유사한 형용사들을 추출
    similar_adjectives[personality_word] = [total_nva_list[i] for i in similar_indices]

lst = []

# 결과 출력
for word, similar_words in similar_adjectives.items():
    print(f"{word}: {similar_words}")
    lst.append(similar_words)




당당함: ['참', '거셈', '어지럽음', '두껍음', '그러함', '까맣음', '셈', '별다름', '재밌음', '귀찮음', '노랗음', '부럽음', '다름없음', '점잖음', '고픔', '까다롭음', '쓸데없음', '빔', '지겹음', '남다름', '갑작스럽음', '끊임없음', '배고픔', '낯익음', '오램', '시커멓음', '누렇음', '이쁨', '험함', '달음', '끈질김', '두드러짐', '사납음', '잘남', '못함', '걸맞음', '상관없음', '가쁨', '숱함', '폭넓음', '힘참', '고름', '메스껍음', '더함', '과함', '값있음', '이러이러함', '엉터리없음', '짝음', '맞음', '궁글음', '괄함', '몽글음', '어줍음', '시덥음', '솔찮음', '생급스럽음', '짖궂음', '조렇음', '열쩍음', '감미롭음', '찰짐', '까아맣음', '굴뚝같음', '맵싸함', '둔덕짐', '되알짐', '단촐함', '다사롭음', '즘잖음', '시장스럽음', '아무러함', '좁아터짐', '커닿음', '만무함', '들큰함', '덩그랗음', '높푸름', '세모남', '생때같음', '볕바름', '별쭝맞음', '버얼겋음', '밉살맞음', '얄따랗음', '앙징맞음', '쑥쓰럽음', '쌍스럽음', '시덥잖음', '잘디잘음', '자애롭음', '눈꼴사납음', '허구많음', '하나같음', '하고함', '피어림', '피비림', '리드미컬함', '색스럽음', '편험', '뽀오얗음', '여지없음', '염치없음', '야멸참', '저러함', '인상깊음', '응달짐', '유다름', '뱀', '물샐틈없음', '조그만함', '대참', '느껍음', '생뚱맞음', '뭣함', '마름', '동글음', '시답음', '승함', '꼴사납음', '성스럽음', '성마름', '뿌우옇음', '아슬함', '자디잘음', '옹골짐', '물설음', '매스껍음', '굴곡짐', '극성맞음', '어긋맞음', '아금받음', '숨김없음', '촌스럽음', '째째함', '주옥같음', '작디작음', '요

In [59]:
len(lst)

257

In [67]:
len(lst[0])

lst[0]

['참',
 '거셈',
 '어지럽음',
 '두껍음',
 '그러함',
 '까맣음',
 '셈',
 '별다름',
 '재밌음',
 '귀찮음',
 '노랗음',
 '부럽음',
 '다름없음',
 '점잖음',
 '고픔',
 '까다롭음',
 '쓸데없음',
 '빔',
 '지겹음',
 '남다름',
 '갑작스럽음',
 '끊임없음',
 '배고픔',
 '낯익음',
 '오램',
 '시커멓음',
 '누렇음',
 '이쁨',
 '험함',
 '달음',
 '끈질김',
 '두드러짐',
 '사납음',
 '잘남',
 '못함',
 '걸맞음',
 '상관없음',
 '가쁨',
 '숱함',
 '폭넓음',
 '힘참',
 '고름',
 '메스껍음',
 '더함',
 '과함',
 '값있음',
 '이러이러함',
 '엉터리없음',
 '짝음',
 '맞음',
 '궁글음',
 '괄함',
 '몽글음',
 '어줍음',
 '시덥음',
 '솔찮음',
 '생급스럽음',
 '짖궂음',
 '조렇음',
 '열쩍음',
 '감미롭음',
 '찰짐',
 '까아맣음',
 '굴뚝같음',
 '맵싸함',
 '둔덕짐',
 '되알짐',
 '단촐함',
 '다사롭음',
 '즘잖음',
 '시장스럽음',
 '아무러함',
 '좁아터짐',
 '커닿음',
 '만무함',
 '들큰함',
 '덩그랗음',
 '높푸름',
 '세모남',
 '생때같음',
 '볕바름',
 '별쭝맞음',
 '버얼겋음',
 '밉살맞음',
 '얄따랗음',
 '앙징맞음',
 '쑥쓰럽음',
 '쌍스럽음',
 '시덥잖음',
 '잘디잘음',
 '자애롭음',
 '눈꼴사납음',
 '허구많음',
 '하나같음',
 '하고함',
 '피어림',
 '피비림',
 '리드미컬함',
 '색스럽음',
 '편험',
 '뽀오얗음',
 '여지없음',
 '염치없음',
 '야멸참',
 '저러함',
 '인상깊음',
 '응달짐',
 '유다름',
 '뱀',
 '물샐틈없음',
 '조그만함',
 '대참',
 '느껍음',
 '생뚱맞음',
 '뭣함',
 '마름',
 '동글음',
 '시답음',
 '승함',
 '꼴사납음',
 '성스럽음',
 '성마름',
 

In [47]:
# vec2word
tri_vectors = [i for i, score in enumerate(tri_word2vec[0]) if score == 1.0]

tri_vec2word = [vectorizer.get_feature_names_out()[i] for i in tri_vectors]

print(tri_vec2word)

[]


In [25]:
from gensim.models import Word2Vec
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# 성격 단어 리스트와 모든 한국어 형용사 리스트 (1800개)를 합친 리스트를 생성합니다.
total_vocab = personality_vocab + total_va_list

# Word2Vec 모델을 학습합니다.
model = Word2Vec(sentences=total_vocab, vector_size=100, window=5, min_count=1, workers=4)

# 성격 단어 리스트의 단어들과 모든 한국어 형용사 리스트의 단어들의 벡터를 추출합니다.
vectorizer = CountVectorizer(binary=True)
tri = vectorizer.fit_transform(total_vocab)
personality_vectors = tri[:len(personality_vocab)]
adjective_vectors = tri[len(personality_vocab):]

# 각 성격 단어에 대해 유사한 형용사들을 추출합니다.
similar_adjectives = {}
for idx, personality_word in enumerate(personality_vocab):
    personality_vector = personality_vectors[idx]
    cosine_similarities = cosine_similarity(personality_vector, adjective_vectors)
    similar_indices = cosine_similarities.argsort()[0][100:]  # 상위 20개 유사한 형용사들을 추출
    similar_adjectives[personality_word] = [total_va_list[i] for i in similar_indices]

# 결과 출력
for word, similar_words in similar_adjectives.items():
    print(f"{word}: {similar_words}")


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[[0. 0. 0. ... 0. 0. 0.]]


In [18]:
# vec2word
tri_vectors = [i for i, score in enumerate(tri_word2vec[0]) if score == 1.0]

tri_vec2word = [vectorizer.get_feature_names_out()[i] for i in tri_vectors]

print(tri_vec2word)

[]


In [19]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

train_vocab = train_list + vocab

vectorizer = CountVectorizer(binary=True)
tri = vectorizer.fit_transform(train_vocab)

tri_vec2word_list = []
for i in range(len(train_list)):
    tri_word2vec = cosine_similarity(tri[i], tri[len(train_list):])
    tri_vectors = [j for j, score in enumerate(tri_word2vec[0]) if score == 1.0]
    tri_vec2word = [vectorizer.get_feature_names_out()[j] for j in tri_vectors]
    tri_vec2word_list.append(tri_vec2word)

tri_vec2word_list = np.concatenate(tri_vec2word_list)
print(tri_vec2word_list)

NameError: ignored

In [None]:
test_vocab = test_list + vocab

vectorizer = CountVectorizer(binary=True)
vai = vectorizer.fit_transform(test_vocab)

vai_vec2word_list = []
for i in range(len(test_list)):
    vai_word2vec = cosine_similarity(vai[i], vai[len(test_list):])
    vai_vectors = [j for j, score in enumerate(vai_word2vec[0]) if score == 1.0]
    vai_vec2word = [vectorizer.get_feature_names_out()[j] for j in vai_vectors]
    vai_vec2word_list.append(vai_vec2word)

vai_vec2word_list = np.concatenate(vai_vec2word_list)
print(vai_vec2word_list)

[]


In [2]:
meaning_words = ['많', '크', '좋','새롭','다르','어렵','쉽','작','길','아름답','어리','힘들','젊','아프','빠르','나쁘','싫','무섭','짧',
                 '커다랗','엄청나','지나치','희','검','어둡','옳','괜찮','밝','바쁘','가볍','뜨겁','하얗','예쁘','무겁','맑','즐겁','부드럽',
                 '재미있','약하','낯설','편하','낫','비싸','강하','슬프','춥','놀랍','곱','뛰어나','낡','거칠','안타깝','부끄럽','기쁘','바르',
                 '두렵','반갑','맛있','흔하','착하','차갑','싸','조그맣','외롭','올바르','빨갛','멋지','날카롭','급하','친하','굵','굳','아쉽',
                 '느리','환하','더럽','그립','괴롭','귀엽','우습','가늘','어리석','귀하','둥글','부럽','노랗','귀찮','재밌','세','까맣','두껍',
                 '어지럽','거세','차','흐리','파랗','밉','아깝','시끄럽','눈부시','얇','진하','힘겹','점잖','고프','까다롭','힘차','폭넓',
                 '못하','잘나','사납','두드러지','끈질기','험하', '이쁘',' 누렇',' 시커멓','오래','낯익','배고프','끊임없','남다르','지겹',
                 '비','쓸데없','못나','벅차','묘하','기막히','서럽','서글프','세차','값싸','그르','모질','가엾','잘','참되','허옇','엷',
                 '색다르','동그랗','게으르','메마르','벌겋','쑥스럽','엄하','어이없','가느다랗','너그럽','서투르','하찮','시리','매끄럽',
                 '우스꽝스럽','고달프','서툴','뻔하','값지','멋있','잘생기','질기','멍하','새까맣','연하','추하','틀리','억세','안되','여리',
                 '싱겁','쓰라리','어처구니없','역겹','재미없','수줍','뿌옇','천하','징그럽','순하','흉하','안쓰럽','야하','빼어나','속상하',
                 '유별나','매섭','보잘것없','못생기','못되','시퍼렇','기다랗','짓궂','언짢','잽싸','형편없','값비싸','흥겹','새파랗','둥그렇',
                 '둔하','부시','재빠르','딱하','배부르','싱그럽','별나','쓰리','야무지','새하얗','어질','한결같','활기차','궂','아리','갸날프',
                 '건방지','독하','선하','훤하','시뻘겋','탁하','무디','우렁차','능하','철없','뽀얗','번거롭','해롭','쓸모없','성가시','정겹',
                 '애처롭','퍼렇','분하','관계없','네모나','얄밉','차디차','장하','이롭','달갑','헤프','앳되','후하','푸르르','쎄','힘세',
                 '힘없','보드랍','새빨갛','길다랗','아니꼽','미심쩍','해맑','미덥','수상쩍','굳세','만만찮','동떨어지','껄끄럽','험상궂',
                 '변함없','드세','앙증맞','깡마르','구슬프','날쌔','실없','쓰디쓰','시꺼멓','을씨년스럽','검푸르','발갛','시원찮','뻘겋',
                 '어여쁘','당차','고되','모나','다부지','흐드러지','드럽','짙푸르','약삭빠르','방정맞','뜸하','사이좋','천연덕스럽','무르',
                 '볼품없','상스럽','새카맣','격하','거침없','허하','희뿌옇','앙칼지','심술궂','아리땁','끈덕지','역하','가녀리','노오랗',
                 '굼뜨','희디희','가엽','살갑','하잘것없','정신없','버릇없','올곧','악하','청승맞','별스럽','구리','샛노랗','길하','빈틈없',
                 '필요없','남부럽','멋들어지','쪼그맣','똥그랗','매몰차','파아랗','속좁','막되','노하','얌전하','얇디얇','얍삽하','연푸르',
                 '번거럽','똘똘하','야물딱지','예쁘장하','투박스럽','게을러빠지','껄렁껄렁하','믿음직스럽','사근사근하','기','될성부르',
                 '약아빠지','예스럽','쇼킹하','놀라웁','맑디맑','능청맞','곱디곱','징하','모지','꽁하','따스','늙','유머러스하','예쁘장스럽',
                 '촌시럽','보오얗','동그렇','모자르','모잘르','간지','힘쎄','자애롭','시덥잖','쌍스럽','쑥쓰럽','앙징맞','밉살맞','후덥',
                 '여물','찐하','찡하','둥굴','기차','자애롭','시덥잖','쌍스럽','쑥쓰럽','앙징맞','얄따랗','밉살맞','버얼겋','높푸르','찐하',
                 '찡하','기차','주제넘','꺼멓','어리숙하','얄궂','참하','끄떡없','희망차','기운차','희부옇','안스럽','어줍잖','의롭','능글맞',
                 '간드러지','쌀쌀맞','자그맣','볼썽사납','희멀겋','도드라지','재수없','걸','빠알갛','뜬금없','살지','악착같','멀겋','의좋','꾸밈없',
                 '같잖','잡스럽','냉하','싸하','빡세','궁상맞','밉살스럽','하이얗','익살맞','쬐그맣','암팡지','어쭙잖','분별없','달갑잖','똑바르',
                 '실하','박하','고깝','특별나','새삼스럽','스스럼없','좀스럽','손색없','후지','짠하','시답잖','허물없','변변찮','따습','맹하',
                 '애닯','주책없','뚱하','의심쩍','쬐끄맣','섹시하','곱다랗','시건방지','특출나','옹골차','막돼먹','곰살궂','가소롭','유하','속없',
                 '지각없','거멓','대단찮','기똥차','깊푸르','따시','뜨시','맥없','싯누렇','로맨틱하','도탑','허여멀겋','모지락스럽','향그럽',
                 '옹골지','성마르','성스럽','꼴사납','시답','동글','마르','대차','조그만하','인상깊','야멸차','염치없','쌩뚱맞','매스껍','극성맞',
                 '깊디깊','감명깊','반하','궁하','덜떨어지','나즈막하','요상하','작디작','째쨰하','촌스럽','뽀오얗','색스럽','다사롭','둔덕지',
                 '맵싸하','까아맣','감미롭','짖궂','시덥','어줍','몽글','값있','눈꼴사납']

In [None]:
()