In [1]:
import numpy as np 
from Feature2Vec import Feature2Vec 
from PLSR import PLSR
from utils import * 
import os
os.environ["CUDA_VISIBLE_DEVICES"]="-1"    

SEED = 42
np.random.seed(seed = SEED)

# path = 'data/mcrae_feature_matrix.csv'
path = 'data/cslb_feature_matrix.csv'

print('Building feature2vec')
model = Feature2Vec(path = path)

Using TensorFlow backend.


Building feature2vec


In [2]:
# get state needed to reproduce experiments from paper 
import pickle as pkl 

with open('state_zero.pkl', 'rb') as f:
    st0 = pkl.load(f)
np.random.set_state(st0)

In [3]:
shuffle = np.random.permutation(len(model.concepts))
train_concepts = list(np.asarray(model.concepts)[shuffle][:500])
test_concepts = list(np.asarray(model.concepts)[shuffle][500:])
model.set_vocabulary(train_words = train_concepts)

In [4]:
print('Training feature2vec')
model.train(verbose = 1, epochs = 20, lr = 5e-3, negative_samples = 20)
print('')

Training feature2vec
Epoch: 19 Loss: 0.004729189370919474


In [5]:
# test for word dog
word = 'dog'
print('Example features learned for word:', word)
print(model.top_features(model.wvector(word), top = 10))
print('')

Example features learned for word: dog
[['0.25640281976339574' 'has_a_tail']
 ['0.23381005781910313' 'has_four_legs']
 ['0.2329559209030151' 'is_an_animal']
 ['0.22442036094359183' 'is_a_mammal']
 ['0.2197005260302991' 'has_fur_hair']
 ['0.2193652359049925' 'has_legs']
 ['0.21557504271842726' "is_man's_best_friend"]
 ['0.20636315741574815' 'has_paws']
 ['0.2042316792609977' 'is_small']
 ['0.20358028406688727' 'is_a_companion']]



In [6]:
# build baseline model (50 and 200)
from PLSR import PLSR
print('Building partial least squared regression (50)')
plsr50 = PLSR(path = path)
plsr50.set_vocabulary(train_concepts)
plsr50.train(embedding_size = 50)

print('Building partial least squared regression (120)')
plsr120 = PLSR(path = path)
plsr120.set_vocabulary(train_concepts)
plsr120.train(embedding_size = 120)
print('')

Building partial least squared regression (50)
Building partial least squared regression (120)







In [7]:
concept_dict_plsr50 = {}
for index, concept in enumerate(plsr50.test_words):
    concept_dict_plsr50[concept] = plsr50.test_preds[index,:]

In [8]:
concept_dict_plsr120 = {}
for index, concept in enumerate(plsr120.test_words):
    concept_dict_plsr120[concept] = plsr120.test_preds[index,:]

In [9]:
print('PLSR 50 neighbour scores')
tops = [1, 5, 10, 20]
for n in tops:
    print('Top', n, neighbour_score(concept_dict_plsr50, plsr50, top = n))

PLSR 50 neighbour scores
Top 1 2.898550724637681
Top 5 23.18840579710145
Top 10 44.20289855072464
Top 20 60.86956521739131


In [10]:
print('PLSR 120 neighbour scores')
tops = [1, 5, 10, 20]
for n in tops:
    print('Top', n, neighbour_score(concept_dict_plsr120, plsr120, top = n))

PLSR 120 neighbour scores
Top 1 7.246376811594203
Top 5 34.78260869565217
Top 10 55.79710144927537
Top 20 71.73913043478261


In [11]:
concept_dict_f2v = {}
for index, concept in enumerate(model.test_words):
    concept_dict_f2v[concept] = construct_vector(concept, model)

In [12]:
tops = [1, 5, 10, 20]
for n in tops:
    print('Top', n, neighbour_score(concept_dict_f2v, model, top = n))

Top 1 10.144927536231885
Top 5 42.7536231884058
Top 10 59.42028985507246
Top 20 80.43478260869566


In [13]:
print('PLSR 50 Scores')
print('Train:', np.mean(feature_score(plsr50, data_type = 'train', max_features = 0))*100)
print('Test:', np.mean(feature_score(plsr50, data_type = 'test', max_features = 0))*100)

PLSR 50 Scores
Train: 50.57949454864544
Test: 40.25185648542887


In [14]:
print('PLSR 120 Scores')
print('Train:', np.mean(feature_score(plsr120, data_type = 'train', max_features = 0))*100)
print('Test:', np.mean(feature_score(plsr120, data_type = 'test', max_features = 0))*100)

PLSR 120 Scores
Train: 65.42766605331973
Test: 40.71468254150691


In [15]:
print('Feature2Vec Scores')
print('Train:', np.mean(feature_score(model, data_type = 'train', max_features = 0))*100)
print('Test:', np.mean(feature_score(model, data_type = 'test', max_features = 0))*100)

Feature2Vec Scores
Train: 89.12740218635554
Test: 44.46290960352884
