## I. Reading NRC Lexicon and Its Translations to Different Languages:

In [2]:
import pandas as pd
lexicon = pd.read_excel('NRCLexicon/NRC-Emotion-Lexicon-v0.92/NRC-Emotion-Lexicon-v0.92-InManyLanguages-web.xlsx')
lexicon.head()

Unnamed: 0,English Word,Arabic Translation (Google Translate),Basque Translation (Google Translate),Bengali Translation (Google Translate),Catalan Translation (Google Translate),Chinese (simplified) Translation (Google Translate),Chinese (traditional) Translation (Google Translate),Danish Translation (Google Translate),Dutch Translation (Google Translate),Esperanto Translation (Google Translate),...,Positive,Negative,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
0,aback,الى الوراء,aback,পশ্চাতে,darrere,吓了一跳,嚇了一跳,overrasket,verrast,surprizita,...,0,0,0,0,0,0,0,0,0,0
1,abacus,طبلية تاج,abako,গণনা-যন্ত্রবিশেষ,àbac,算盘,算盤,abacus,abacus,abako,...,0,0,0,0,0,0,0,0,0,1
2,abandon,تخلى,alde batera utzi,বর্জিত করা,abandonar,放弃,放棄,opgive,verlaten,forlasos,...,0,1,0,0,0,1,0,1,0,0
3,abandoned,مهجور,abandonatu,পরিত্যক্ত,abandonat,弃,棄,forladte,verlaten,forlasita,...,0,1,1,0,0,1,0,1,0,0
4,abandonment,هجر,abandono,বিসর্জন,abandonament,放弃,放棄,opgivelse,verlatenheid,forlasas,...,0,1,1,0,0,1,0,1,1,0


In [4]:
lexicon.columns

Index(['English Word', 'Arabic Translation (Google Translate)',
       'Basque Translation (Google Translate)',
       'Bengali Translation (Google Translate)',
       'Catalan Translation (Google Translate)',
       'Chinese (simplified) Translation (Google Translate)',
       'Chinese (traditional) Translation (Google Translate)',
       'Danish Translation (Google Translate)',
       'Dutch Translation (Google Translate)',
       'Esperanto Translation (Google Translate)',
       'Finnish Translation (Google Translate)',
       'French Translation (Google Translate)',
       'German Translation (Google Translate)',
       'Greek Translation (Google Translate)',
       'Gujarati Translation (Google Translate)',
       'Hebrew Translation (Google Translate)',
       'Hindi Translation (Google Translate)',
       'Irish Translation (Google Translate)',
       'Italian Translation (Google Translate)',
       'Japanese Translation (Google Translate)',
       'Latin Translation (Google Tr

## II. Preparing Lexicon Dictionaries:

### 1. Choosing Language:

In [5]:
language = 'French' # REPLACE LANGUAGE HERE

### 2. Reading corresponding Emotion/Sentiment vectors for words translated to chosen language:

In [6]:
language_lexicon = lexicon[[language+' Translation (Google Translate)','Positive', 'Negative','Anger','Anticipation', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise','Trust']]
language_lexicon.head()

Unnamed: 0,French Translation (Google Translate),Positive,Negative,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
0,un dos,0,0,0,0,0,0,0,0,0,0
1,abaque,0,0,0,0,0,0,0,0,0,1
2,abandonner,0,1,0,0,0,1,0,1,0,0
3,abandonné,0,1,1,0,0,1,0,1,0,0
4,abandon,0,1,1,0,0,1,0,1,1,0


### 3. Converting the words and their vectors into a dictionary:

In [8]:
d = {}
for i in range(0,len(language_lexicon)):
    d[language_lexicon.iloc[i][language+' Translation (Google Translate)']] = [language_lexicon.iloc[i]['Anger'],language_lexicon.iloc[i]['Anticipation'], language_lexicon.iloc[i]['Disgust'], language_lexicon.iloc[i]['Fear'], language_lexicon.iloc[i]['Joy'],language_lexicon.iloc[i]['Negative'],language_lexicon.iloc[i]['Positive'], language_lexicon.iloc[i]['Sadness'], language_lexicon.iloc[i]['Surprise'],language_lexicon.iloc[i]['Trust']]

d = {k: v for k, v in d.items() if v !=[0,0,0,0,0,0,0,0,0,0]}

### 4. Storing the dictionary in a pickle file for later reuse:

In [9]:
import pickle

with open('NRCLexicon/'+language+'/'+language.lower()+'_lexicon.pickle', 'wb') as handle:
    pickle.dump(d, handle, protocol=2)

with open('NRCLexicon/'+language+'/'+language.lower()+'_lexicon.pickle', 'rb') as handle:
    b = pickle.load(handle)
len(b)

5045

## III. Preparing Sets of Representative Words:

In [None]:
import pandas as pd
lexicon = pd.read_csv('NRCLexicon/NRC-Emotion-Lexicon-v0.92/NRC-emotion-lexicon.txt',engine='python', names=['col'])
lexicon['Word'] = lexicon['col'].str.split().apply(lambda x: x[0])
lexicon['EmotionCategory'] = lexicon['col'].str.split().apply(lambda x: x[1])
lexicon['Score'] = lexicon['col'].str.split().apply(lambda x: x[-1])
del(lexicon['EmotionCategory'])
del(lexicon['col'])

In [None]:
from tqdm import tqdm
distinct_words = list(set(lexicon['Word']))
word_vectors = []
for i in tqdm(range(0,len(distinct_words))):
    vector = list(lexicon[lexicon['Word']==distinct_words[i]]['Score'])
    word_vectors.append((distinct_words[i],vector))

In [None]:
vector_dict = dict(word_vectors)
vector_dict['happy']
import numpy as np
#np.save('NRCLexicon/lexicon_dictionary.npy', vector_dict)
if 'aims' in vector_dict.keys():
    print ("yes")
list_ = [0 for i in range(0,10)]
list_
vector_dict['clap']

lexicon_dict = np.load('NRCLexicon/lexicon_dictionary.npy').item()
lexicon_dict['clap']

### 2. Computing Lists of Representative Words for each emotion/sentiment category:

In [21]:
representative_set = []
for i in range(0,10):
    representative_set_sub = []
    for word in d.keys():
        if d[word][i] == 1: 
            representative_set_sub.append(word)
    representative_set.append(representative_set_sub)
representative_set[0][0:15]

['allégation',
 'armes à feu',
 'présomptueux',
 'téméraire',
 'peur',
 'moustique',
 'mélodrame',
 'coupe',
 'provocation',
 'offenser',
 'calomnie',
 'aliénation',
 'chaud',
 'malédiction',
 'huée']

### 3. Storing the list in a dataframe where each column represents the set of representative words for a particular emotion/sentiment category:

In [18]:
lexicon_df = pd.DataFrame()
lexicon_df[0] = representative_set[0]
for i in range(1,10):
    df = pd.DataFrame()
    df[i] = representative_set[i]
    lexicon_df= pd.concat([lexicon_df,df],ignore_index=True,axis=1)
lexicon_df.columns = [['Anger','Anticipation','Disgust','Fear','Joy','Negative','Positive','Sadness','Surprise','Trust']]

In [19]:
lexicon_df.to_csv('NRCLexicon/'+language+'/'+language.lower()+'_nrc.csv',index=False)
lexicon_df.head(15)

Unnamed: 0,Anger,Anticipation,Disgust,Fear,Joy,Negative,Positive,Sadness,Surprise,Trust
0,allégation,antidote,épargne,blindé,accolade,allégation,antidote,corse,averti,antidote
1,armes à feu,averti,gratuit,armes à feu,fortune,armes à feu,épargne,perplexité,peur,épargne
2,présomptueux,peur,présomptueux,averti,immaculé,jacasser,innocent,éteint,rôder,innocent
3,téméraire,courrier,moustique,téméraire,intimement,méthanol,conjecturer,mélodrame,accolade,compétent
4,peur,accolade,menteur,peur,gage,incorrect,compétent,coupe,fortune,accolade
5,moustique,fortune,coupe,rôder,librement,puéril,accolade,perdu,blitz,fortune
6,mélodrame,prophète,offenser,conspirer,libérer,présomptueux,clé de voûte,épreuves,libérer,digue
7,coupe,intimement,calomnie,dommage,musical,mutant,fortune,aliénation,musical,prophète
8,provocation,libérer,aliénation,coupe,blancheur,téméraire,prophète,malédiction,chanceler,shérif
9,offenser,musical,malédiction,dément,culte,peur,majesté,plaintif,pillage,religion
