In [38]:
import pandas as pd
import model_wrappers as mw

DIMENSIONS = ['v_{0}'.format(i + 1) for i in range(300)]

# read in the semantic model
en = mw.FastVector('wiki.en.vec')

### 1: Positive and Negative Comparitors
#### 1.1: Vectors for positive words

In [39]:
# taken from appendix A of http://psycnet.apa.org/fulltext/1998-02892-004.html#appendix_psp_74_6_1464
positive = pd.DataFrame(dict(attribute = ["caress", "freedom", "health", "love", "peace", "cheer", "friend", "heaven", "loyal", 
                  "pleasure", "diamond", "gentle", "honest", "lucky", "rainbow", "diploma", "gift", "honor",
                  "miracle", "sunrise", "family", "happy", "laughter", "paradise", "vacation"]))

positive['valence'] = 'positive'

positive = positive.join(positive.apply(lambda row: pd.Series(en[row['attribute'].lower()], index = DIMENSIONS), axis = 1))

#### 1.2: Vectors for negative words

In [41]:
# appendix A of http://psycnet.apa.org/fulltext/1998-02892-004.html#appendix_psp_74_6_1464
negative = pd.DataFrame(dict(attribute = ["abuse", "crash", "filth", "murder", "sickness", "accident", "death", "grief", "poison", 
                  "stink", "assault", "disaster", "hatred", "pollute", "tragedy", "bomb", "divorce", "jail",
                  "poverty", "ugly", "cancer", "evil", "kill", "rotten", "vomit", "agony"]))

negative['valence'] = 'negative'
negative = negative.join(negative.apply(lambda row: pd.Series(en[row['attribute'].lower()], index = DIMENSIONS), axis = 1))

#### 1.3: Combining the comparitor datasets

In [45]:
comparitors = pd.concat([positive, negative], ignore_index = True)

### 2: Names
#### 2.1: Vectors for names

In [36]:
names = pd.read_csv('names.csv') # read in the names

# check which names got vectors
in_model = names.apply(lambda row: row['name'].lower() in en, axis = 1) 

# subset out unusuable names
names = names[in_model].copy()

# retrieve vectors and add to name df
names = names.join(names.apply(lambda row: pd.Series(en[row['name'].lower()], index = DIMENSIONS), axis = 1))

### 3: Analysis
#### 3.1: Utils

In [62]:
import numpy as np

cosine = lambda v1, v2: np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))

score_name = lambda name_row: comparitors.apply(lambda comparitor_row: cosine(name_row[DIMENSIONS].values, comparitor_row[DIMENSIONS].values) , axis = 1)

#### 3.2: computing similarities

In [63]:
results = names.apply(score_name, axis = 1)

In [65]:
results.columns = comparitors.attribute.values

results['name'] = names.name.values

In [71]:
#results.to_csv('name-results.csv', index = False)

#### 3.2: Example: What is the luckiest name?

In [86]:
results.sort_values(by = 'lucky', ascending = False)[['name', 'lucky']]

Unnamed: 0,name,lucky
4432,Sonny,0.424140
3008,Love,0.423179
590,Buddy,0.420261
755,Chance,0.416936
2075,Jack,0.415908
601,Buster,0.413532
1525,Essie,0.411188
4182,Sammy,0.409079
4171,Sally,0.408517
3856,Polly,0.408397
