In [51]:
from pandas import read_csv
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pearsonr


In [11]:
table=read_csv('emoji_table.csv')
table.head()

Unnamed: 0,unicode,emoji,name,anger,anticipation,disgust,fear,joy,sadness,surprise,trust
0,1F308,🌈,rainbow,0.0,0.28,0.0,0.0,0.69,0.06,0.22,0.33
1,1F319,🌙,crescent moon,0.0,0.31,0.0,0.0,0.25,0.0,0.06,0.25
2,1F31A,🌚,new moon face,0.06,0.08,0.17,0.06,0.42,0.19,0.06,0.11
3,1F31E,🌞,sun with face,0.0,0.22,0.0,0.0,0.78,0.0,0.11,0.22
4,1F31F,🌟,glowing star,0.0,0.28,0.0,0.0,0.53,0.0,0.25,0.31


## 1)

In [12]:
table[['unicode','emoji','name']]

Unnamed: 0,unicode,emoji,name
0,1F308,🌈,rainbow
1,1F319,🌙,crescent moon
2,1F31A,🌚,new moon face
3,1F31E,🌞,sun with face
4,1F31F,🌟,glowing star
...,...,...,...
145,2757,❗,exclamation mark
146,2764,❤,red heart
147,27A1,➡,right arrow
148,2B05,⬅,left arrow


## 2)

In [47]:
emotions=list(table.columns[-8:])
emoticons=list(table['name'])
off_scores=np.array(table[emotions])

In [48]:
def cosine_similarity(a,b):
    return np.dot(a, b)/(np.linalg.norm(a)*np.linalg.norm(b))

def calculate_model_scores(emotions, emoticons, embed_fun):
    scores=np.zeros([len(emoticons),len(emotions)])
    emotions_vec=[embed_fun(emo) for emo in emotions]
    for i,emoticon in enumerate(emoticons):
        emoticon_vec=embed_fun(emoticon)
        scores[i]=[cosine_similarity(emoticon_vec,emovec) for emovec in emotions_vec]
    return scores

In [54]:
w2v_embed_fun=lambda x: np.random.random(5)
word2vec_scores=calculate_model_scores(emotions,emoticons, w2v_embed_fun)

In [59]:
[pearsonr(off_scores[:,i],word2vec_scores[:,i]) for i in range(8)]

[PearsonRResult(statistic=0.01617757219286872, pvalue=0.8442271596338166),
 PearsonRResult(statistic=0.013375859743313652, pvalue=0.8709459955084654),
 PearsonRResult(statistic=0.11559056368858202, pvalue=0.15896154594238485),
 PearsonRResult(statistic=-0.01697255974686578, pvalue=0.8366765006369041),
 PearsonRResult(statistic=-0.016309367616514747, pvalue=0.8429743891167857),
 PearsonRResult(statistic=-0.048022703350493426, pvalue=0.5595081322551164),
 PearsonRResult(statistic=-0.0077983273668358714, pvalue=0.9245435308788551),
 PearsonRResult(statistic=0.03435069591678524, pvalue=0.6764500903621286)]

## 3)

In [None]:
gl_embed_fun=lambda x: np.random.random(5)
glove_scores=calculate_model_scores(emotions,emoticons, gl_embed_fun)

In [None]:
[pearsonr(off_scores[:,i],glove_scores[:,i]) for i in range(8)]

In [None]:
d2v_embed_fun=lambda x: np.random.random(5)
doc2vec_scores=calculate_model_scores(emotions,emoticons, d2v_embed_fun)

In [None]:
[pearsonr(off_scores[:,i],doc2vec_scores[:,i]) for i in range(8)]

In [None]:
brt_embed_fun=lambda x: np.random.random(5)
bert_scores=calculate_model_scores(emotions,emoticons, brt_embed_fun)

In [None]:
[pearsonr(off_scores[:,i],bert_scores[:,i]) for i in range(8)]