Generates a training set from BERT embeddings by selecting the closest (by euclidean distance) constituent word embeddings (from a few) to the compound word embedding.

In [167]:
from transformers import BertModel, BertTokenizer, data
import numpy as np
import pandas as pd
import nltk
from nltk.corpus import brown
import torch
import random
from itertools import islice
from scipy.spatial.distance import cdist, euclidean

In [11]:
nltk.download('brown')

[nltk_data] Downloading package brown to
[nltk_data]     /Users/kalliehuynh/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.


True

In [57]:
embeddings = pd.read_csv('BERT_embeddings.csv')
embeddings_vocab = embeddings.iloc[:, 1].tolist()

In [71]:
embeddings_dict = {}
for i in embeddings.index:
    embeddings_dict[embeddings.iloc[i, 1]] = embeddings.iloc[i, 2:].to_numpy()

In [53]:
ladec = pd.read_csv('LADECv1-2019.csv')

In [59]:
is_embedded = ladec.apply( lambda row: ((row['c1'] in embeddings_vocab) and (row['c2'] in  embeddings_vocab) and (row['stim'] in embeddings_vocab) ), axis='columns' )

In [60]:
ladec['is_embedded'] = is_embedded

In [61]:
ladec['keep_row'] = ladec.apply( lambda row: ( (row['correctParse']=='yes') and row['is_embedded'] ), axis='columns' )


In [62]:
embedded_ladec = ladec[ ladec['keep_row']]

In [63]:
c1_labels = [ 'c1_%02d' % x for x in range(768) ]
c2_labels = [ 'c2_%02d' % x for x in range(768) ]
cmp_labels = [ 'cmp_%02d' % x for x in range(768) ]

In [239]:
all_embeddings = pd.DataFrame( columns = ['cmp','c1','c2', *c1_labels, *c2_labels, *cmp_labels])

In [240]:
all_embeddings[ ['cmp','c1','c2'] ] = embedded_ladec[ ['stim','c1','c2'] ]


In [241]:
for i in all_embeddings.index:
    all_embeddings.loc[i, c1_labels] = embeddings_dict[all_embeddings.loc[i,'c1']]
    all_embeddings.loc[i, c2_labels] = embeddings_dict[all_embeddings.loc[i,'c2']]
    all_embeddings.loc[i, cmp_labels] = embeddings_dict[all_embeddings.loc[i,'cmp']]

In [78]:
all_embeddings.to_csv('BERT_trainingset1.csv')

In [243]:
model = BertModel.from_pretrained('bert-base-uncased', output_hidden_states=True)
model.eval()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          

In [81]:
sentences = [[word.lower() for word in sentence] for sentence in brown.sents()]
words = [word.lower() for word in brown.words()]
random.shuffle(sentences)
random.shuffle(words)

Create a dictionary of words containing sentences that include that word
    e.g. sents_dict['apple'] = ['i ate an apple today', 'i love apples', 'apple trees are cool']

In [87]:
examples_dict = {}
for word in words:
    # get some examples of each word
    filtered = (sentence for sentence in sentences if word in sentence)
    examples_dict[word] = list(islice(filtered, 5))

In [372]:
def generate_embedding(word, sentence):
    with torch.no_grad():
        return ((model(torch.tensor([tokenizer.convert_tokens_to_ids(sentence)]))[0])[0][sentence.index(word)].reshape(1, -1)).cpu().detach().numpy().reshape(768)

In [374]:
apples_embeddings = [generate_embedding('apples', exmple) for exmple in examples_dict['apples']]
apples_embeddings[0].ndim

1

In [249]:
all_embeddings.reset_index(drop=True, inplace=True)


In [399]:
def closest_embedding(embedding, list_of_embeddings):
    euclidean_dists = {}
    for emb in list_of_embeddings:
        euclidean_dists[euclidean(embedding, emb)] = emb
    return euclidean_dists[min(list(euclidean_dists.keys()))]

In [405]:
trainingset2 = all_embeddings.copy(deep=True)
for i in all_embeddings.index:
    c1 = all_embeddings.iloc[i, 1]
    c2 = all_embeddings.iloc[i, 2]
    cmp_embedding = np.array(all_embeddings.iloc[i, -768:])
    c1_embeddings = [generate_embedding(c1, example) for example in examples_dict[c1]]
    c2_embeddings = [generate_embedding(c2, example) for example in examples_dict[c2]]
    trainingset2.iloc[i, 3:(3+768)] = closest_embedding(cmp_embedding, c1_embeddings)
    trainingset2.iloc[i, (3+768):-768] = closest_embedding(cmp_embedding, c2_embeddings)





In [409]:
all_embeddings

Unnamed: 0,cmp,c1,c2,c1_00,c1_01,c1_02,c1_03,c1_04,c1_05,c1_06,c1_07,c1_08,c1_09,c1_10,c1_11,c1_12,c1_13,c1_14,c1_15,c1_16,c1_17,c1_18,c1_19,c1_20,c1_21,c1_22,c1_23,c1_24,c1_25,c1_26,c1_27,c1_28,c1_29,c1_30,c1_31,c1_32,c1_33,c1_34,c1_35,c1_36,...,cmp_728,cmp_729,cmp_730,cmp_731,cmp_732,cmp_733,cmp_734,cmp_735,cmp_736,cmp_737,cmp_738,cmp_739,cmp_740,cmp_741,cmp_742,cmp_743,cmp_744,cmp_745,cmp_746,cmp_747,cmp_748,cmp_749,cmp_750,cmp_751,cmp_752,cmp_753,cmp_754,cmp_755,cmp_756,cmp_757,cmp_758,cmp_759,cmp_760,cmp_761,cmp_762,cmp_763,cmp_764,cmp_765,cmp_766,cmp_767
0,firearms,fire,arms,0.306882,0.347477,0.121686,-0.116544,0.238976,-0.241225,-0.05919,-0.152411,-0.260173,-0.231379,0.075639,0.011626,-0.148121,0.261864,-0.199842,0.746793,0.273412,0.176947,-0.407396,0.018173,0.184014,-0.028838,0.008865,0.471597,0.143743,0.42232,0.12463,0.174283,-0.195753,0.092736,0.283208,0.137101,0.170151,-0.201664,-0.121466,-0.365894,0.044687,...,-0.118838,-0.763056,0.379005,0.522458,-0.504011,-0.269101,0.065853,0.01209,-0.135361,0.261787,-0.29125,0.011223,0.138634,-0.00002,0.179635,-0.598769,0.016547,0.36209,-0.001882,0.018785,-0.254406,0.047067,0.363453,0.003035,0.516177,0.16625,-0.334533,0.028811,-0.472909,-0.016061,-0.552014,-0.040579,0.029861,-0.294845,0.144527,-0.193792,-0.354488,-0.22896,0.246467,-0.051407
1,turnaround,turn,around,0.180003,0.362116,0.258669,-0.050414,0.177428,-0.090171,-0.184348,0.356337,0.218401,-0.240157,0.084281,-0.057987,-0.090988,0.095799,-0.295345,0.46204,0.177077,0.009489,-0.157473,-0.031217,0.465271,0.029498,0.134468,0.500703,0.288195,-0.099312,-0.003664,-0.142693,-0.428526,-0.047209,0.080369,-0.154755,0.044124,-0.063097,-0.240956,-0.350415,0.401093,...,-1.013953,-0.456363,0.218049,-0.235983,0.094692,-0.524748,-0.625212,-0.378356,0.071888,-0.1526,-0.620387,-0.093978,0.160365,0.504613,-1.361682,-0.537974,0.383079,0.824039,0.734338,-0.487778,-0.20692,0.657283,-0.272551,0.305398,-0.001724,0.256762,-0.26856,-0.542538,-0.335587,-0.600287,0.431748,-0.534062,-0.010079,-0.02399,0.316489,0.102567,-0.190017,-0.349969,0.460579,-0.083446
2,breakaway,break,away,0.127659,0.491803,0.174259,0.061443,0.308284,-0.318771,0.122113,0.193328,-0.11045,0.012622,-0.024826,0.021342,-0.136238,0.070122,-0.101188,0.232585,0.251623,-0.187576,0.211099,-0.086173,0.267554,-0.120772,0.436335,0.336922,0.248259,-0.150523,-0.014995,0.133539,-0.268476,0.055924,0.32425,-0.11634,0.014727,-0.090401,-0.346566,-0.251147,0.326886,...,-0.217137,-0.16891,-0.07961,-0.2538,-0.260176,-0.061566,-0.009088,0.137364,-0.220916,-0.164655,0.089268,-0.438777,0.481451,0.628595,-0.876594,-0.222676,0.378544,0.573607,0.630287,-0.216487,-0.118798,-0.122614,-0.290342,0.378577,0.084399,-0.723135,-0.288653,0.166646,-0.165356,-0.42602,0.169326,0.005546,0.234493,-0.039265,-0.186066,0.467806,-0.377758,-0.282006,-0.38585,-0.318166
3,straightaway,straight,away,0.243906,0.172449,0.344686,-0.008287,-0.008537,-0.380969,-0.16563,0.277433,-0.185304,0.003972,0.256869,-0.055849,-0.140168,0.083345,0.060041,0.480716,0.303273,-0.064952,-0.234577,-0.25295,0.152624,-0.290769,0.373011,0.294614,0.075285,-0.041983,0.225534,-0.004088,-0.426939,-0.0818,0.568918,-0.204668,0.029587,-0.317141,0.1136,-0.530287,0.197059,...,-0.74322,-0.230949,0.229825,0.381592,0.128916,-0.210681,-0.364865,0.028875,-0.053802,-0.189162,-0.114207,-0.146656,0.521217,-0.039902,-0.676974,-0.729409,0.304847,-0.213269,-0.01625,-0.166444,-0.351096,0.611157,-0.178005,0.13477,0.0318,0.368352,-0.441293,-0.241529,-0.541256,-0.456215,-0.089152,0.049667,0.235026,-0.191677,0.376301,0.053493,-0.192225,0.019832,0.542749,0.210925
4,razorback,razor,back,0.575013,-0.058907,-0.050044,0.26633,-0.209873,0.145645,-0.036316,0.092698,0.114075,-0.2139,0.371633,0.279256,0.078066,0.352557,-0.010164,0.695263,0.145069,-0.097769,-0.385037,0.141058,0.07173,-0.199515,0.237944,0.010099,-0.042627,0.562217,0.150402,0.167259,-0.612657,0.128691,0.48442,-0.332586,-0.16134,-0.146602,-0.086347,-0.577133,0.139881,...,-0.654521,-0.327704,0.379057,-0.233292,0.520686,-0.86454,-1.110958,-0.080947,0.083317,0.385014,-1.089764,0.296884,0.238701,0.13903,-0.225565,-0.216435,-0.432835,0.090557,-0.201686,0.499613,0.5008,0.422515,-0.054316,0.10777,0.836166,0.228013,-0.203003,0.30973,-0.694459,-0.791784,0.342704,0.150877,0.469788,-0.068406,0.161192,-0.056012,-0.085181,0.23379,0.210354,-0.730875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278,metalworking,metal,working,0.347103,0.547228,0.146809,0.131936,-0.0142,0.063368,-0.199542,-0.11131,-0.153332,-0.184294,0.114566,-0.088569,-0.038797,0.351745,-0.206226,0.691788,0.236031,-0.117818,-0.172746,0.184262,0.049808,-0.102082,0.25423,0.327578,0.202018,0.158453,-0.045834,0.101367,-0.444133,0.236233,0.281072,0.290806,-0.250342,0.282496,0.22408,-0.369627,0.129296,...,-0.62303,-0.450808,0.200707,0.089853,-0.304492,-0.328603,-0.334667,0.189428,0.02167,0.12846,-0.95533,-0.008781,0.280841,-0.177815,-0.800347,-0.311613,-0.345989,0.14567,0.303835,-0.150931,-0.281927,1.078696,-0.162455,0.168362,0.588428,0.264327,-0.707685,0.026528,-0.679166,0.188686,-0.097057,0.490353,0.073352,0.083658,0.400655,-0.172182,-0.1644,0.106825,0.37723,-0.266672
279,woodworking,wood,working,0.396838,0.468218,0.12003,0.171639,0.255258,0.039746,-0.199628,-0.009896,-0.408827,-0.129288,0.137744,-0.116719,0.101201,0.229318,-0.442714,0.741699,0.250875,0.145443,-0.313516,-0.001747,0.116568,-0.085678,0.227168,0.329501,0.300148,0.343035,0.001671,0.025143,-0.180469,0.134951,0.390935,0.009272,-0.013258,-0.071397,0.246563,-0.513997,0.219774,...,-0.7044,-0.478045,0.156271,0.375346,-0.322784,-0.278626,-0.30944,0.332547,-0.127158,0.177187,-0.606275,0.018705,0.474721,0.211895,-0.670104,-0.719589,-0.566141,0.214682,0.087673,-0.40338,-0.117545,1.055316,-0.171433,0.428347,0.422284,0.690025,-0.843158,-0.032456,-0.403357,0.11604,-0.161,-0.125852,0.224263,-0.247326,0.143008,-0.565697,0.065304,0.087637,0.309505,-0.0841
280,fireworks,fire,works,0.306882,0.347477,0.121686,-0.116544,0.238976,-0.241225,-0.05919,-0.152411,-0.260173,-0.231379,0.075639,0.011626,-0.148121,0.261864,-0.199842,0.746793,0.273412,0.176947,-0.407396,0.018173,0.184014,-0.028838,0.008865,0.471597,0.143743,0.42232,0.12463,0.174283,-0.195753,0.092736,0.283208,0.137101,0.170151,-0.201664,-0.121466,-0.365894,0.044687,...,-0.540092,-0.419307,0.315115,-0.123576,-0.379095,0.254215,-0.015379,0.024263,0.270941,0.201429,0.0474,-0.144862,-0.093276,-0.091481,-0.23419,-0.390995,0.108525,-0.019283,0.27944,0.050905,0.095166,0.138401,-0.131597,0.002341,0.341208,-0.161593,-0.088825,0.247888,-0.690673,-0.063594,-0.384836,-0.474411,0.125107,-0.014144,0.171168,-0.458203,-0.139406,-0.092208,0.501959,-0.310856
281,waxworks,wax,works,0.33876,0.487201,0.401346,0.233008,0.054997,-0.262055,-0.343935,-0.025483,-0.004575,-0.112573,0.355542,0.068792,0.10584,0.321913,-0.109053,0.56901,0.249991,0.11453,-0.052337,-0.237429,0.201402,-0.118093,0.156811,0.178834,0.300507,0.341346,-0.013559,0.231808,-0.532768,0.158237,0.132676,-0.003891,-0.057251,0.084996,-0.281986,-0.486513,0.29067,...,-0.528288,-0.221641,0.234523,-0.029459,-0.05969,-0.665165,-0.686775,0.288825,-0.101299,0.147331,-0.516189,0.174114,0.225548,-0.344672,-0.248941,-0.438518,-0.117112,0.361404,-0.115454,-0.404367,-0.240916,0.513817,-0.169409,0.244001,0.508167,-0.128355,-0.396466,-0.300677,-0.676022,-0.619092,0.085712,-0.05041,0.10658,-0.37076,0.502152,0.049127,-0.277386,-0.437595,0.336882,0.122535


In [408]:
trainingset2

Unnamed: 0,cmp,c1,c2,c1_00,c1_01,c1_02,c1_03,c1_04,c1_05,c1_06,c1_07,c1_08,c1_09,c1_10,c1_11,c1_12,c1_13,c1_14,c1_15,c1_16,c1_17,c1_18,c1_19,c1_20,c1_21,c1_22,c1_23,c1_24,c1_25,c1_26,c1_27,c1_28,c1_29,c1_30,c1_31,c1_32,c1_33,c1_34,c1_35,c1_36,...,cmp_728,cmp_729,cmp_730,cmp_731,cmp_732,cmp_733,cmp_734,cmp_735,cmp_736,cmp_737,cmp_738,cmp_739,cmp_740,cmp_741,cmp_742,cmp_743,cmp_744,cmp_745,cmp_746,cmp_747,cmp_748,cmp_749,cmp_750,cmp_751,cmp_752,cmp_753,cmp_754,cmp_755,cmp_756,cmp_757,cmp_758,cmp_759,cmp_760,cmp_761,cmp_762,cmp_763,cmp_764,cmp_765,cmp_766,cmp_767
0,firearms,fire,arms,0.382751,0.049523,0.485108,-0.239235,-0.101339,-0.489516,-0.788938,-0.368939,0.008052,-0.236644,-0.097426,0.077028,-0.378867,0.160679,0.051861,0.847933,0.325562,0.348118,-1.01901,0.027388,1.037058,0.071305,-0.298845,0.072707,-0.195096,0.894714,-0.127328,0.086463,-0.222972,0.69157,0.585001,0.2468,1.006906,-0.088951,-0.307759,0.14245,-0.275207,...,-0.118838,-0.763056,0.379005,0.522458,-0.504011,-0.269101,0.065853,0.01209,-0.135361,0.261787,-0.29125,0.011223,0.138634,-0.00002,0.179635,-0.598769,0.016547,0.36209,-0.001882,0.018785,-0.254406,0.047067,0.363453,0.003035,0.516177,0.16625,-0.334533,0.028811,-0.472909,-0.016061,-0.552014,-0.040579,0.029861,-0.294845,0.144527,-0.193792,-0.354488,-0.22896,0.246467,-0.051407
1,turnaround,turn,around,0.304958,-0.213485,0.488826,0.181101,0.597293,0.018228,-0.752402,0.250651,0.614301,0.393992,0.407401,0.364579,0.108686,-0.317714,-0.480744,0.822104,-0.149962,-0.188807,-0.129695,0.065726,0.633829,-0.139975,-0.131272,0.538497,0.215444,0.703052,-0.264492,-0.270153,-0.448449,0.176976,0.001322,0.033256,-0.061243,0.191875,-0.197975,-0.759938,0.306625,...,-1.013953,-0.456363,0.218049,-0.235983,0.094692,-0.524748,-0.625212,-0.378356,0.071888,-0.1526,-0.620387,-0.093978,0.160365,0.504613,-1.361682,-0.537974,0.383079,0.824039,0.734338,-0.487778,-0.20692,0.657283,-0.272551,0.305398,-0.001724,0.256762,-0.26856,-0.542538,-0.335587,-0.600287,0.431748,-0.534062,-0.010079,-0.02399,0.316489,0.102567,-0.190017,-0.349969,0.460579,-0.083446
2,breakaway,break,away,-0.104082,0.357842,0.563442,-0.027165,0.422574,-0.069079,0.235966,-0.226993,-0.189572,0.382641,-0.051766,-0.529943,0.043276,0.384496,-0.000457,-0.232365,0.058584,-0.352132,0.438807,-0.341376,-0.025504,-0.350121,0.254752,0.554712,0.397739,-0.156359,-0.612717,-0.166272,-0.507703,0.090694,0.625252,0.247495,0.58284,0.023157,-0.036446,0.269424,-0.261243,...,-0.217137,-0.16891,-0.07961,-0.2538,-0.260176,-0.061566,-0.009088,0.137364,-0.220916,-0.164655,0.089268,-0.438777,0.481451,0.628595,-0.876594,-0.222676,0.378544,0.573607,0.630287,-0.216487,-0.118798,-0.122614,-0.290342,0.378577,0.084399,-0.723135,-0.288653,0.166646,-0.165356,-0.42602,0.169326,0.005546,0.234493,-0.039265,-0.186066,0.467806,-0.377758,-0.282006,-0.38585,-0.318166
3,straightaway,straight,away,-0.001749,-0.128499,0.765588,-0.065779,0.025506,0.12937,-0.474717,0.629987,-0.185974,0.190931,0.212289,0.536664,0.033262,0.015647,0.016397,-0.046634,0.408869,-0.18193,0.107236,-0.319322,0.125382,0.104096,0.395538,0.193011,0.517401,0.008707,0.125576,0.225187,0.008971,0.339503,0.065811,0.045973,0.066291,-0.855629,0.053149,-0.707398,0.490852,...,-0.74322,-0.230949,0.229825,0.381592,0.128916,-0.210681,-0.364865,0.028875,-0.053802,-0.189162,-0.114207,-0.146656,0.521217,-0.039902,-0.676974,-0.729409,0.304847,-0.213269,-0.01625,-0.166444,-0.351096,0.611157,-0.178005,0.13477,0.0318,0.368352,-0.441293,-0.241529,-0.541256,-0.456215,-0.089152,0.049667,0.235026,-0.191677,0.376301,0.053493,-0.192225,0.019832,0.542749,0.210925
4,razorback,razor,back,0.121279,-0.118942,0.118125,0.231776,-0.292356,0.349308,0.42772,-0.117921,-0.138681,-0.123872,0.529375,-0.041864,0.542897,0.639117,-0.048246,-0.12032,0.471032,-0.127545,-0.248141,0.285174,0.458239,-0.063364,0.212838,-0.317487,-0.275493,0.583739,-0.119446,0.172543,-0.733233,-0.177812,0.48133,-0.310324,-0.416834,-0.073445,-0.158658,-0.369574,-0.105652,...,-0.654521,-0.327704,0.379057,-0.233292,0.520686,-0.86454,-1.110958,-0.080947,0.083317,0.385014,-1.089764,0.296884,0.238701,0.13903,-0.225565,-0.216435,-0.432835,0.090557,-0.201686,0.499613,0.5008,0.422515,-0.054316,0.10777,0.836166,0.228013,-0.203003,0.30973,-0.694459,-0.791784,0.342704,0.150877,0.469788,-0.068406,0.161192,-0.056012,-0.085181,0.23379,0.210354,-0.730875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278,metalworking,metal,working,0.71041,0.7149,0.249368,-0.235609,-0.212036,0.224286,-0.09244,-0.569021,-0.035851,-0.698148,0.205895,0.280246,-0.192511,-0.04414,0.605526,0.524742,0.171149,-0.150145,-0.377149,-0.116937,0.467462,0.040618,0.321319,0.382284,-0.023461,0.286383,0.114047,-0.362708,-0.562836,0.287679,0.167329,0.408601,-0.017124,0.229809,0.167263,-0.153302,0.575143,...,-0.62303,-0.450808,0.200707,0.089853,-0.304492,-0.328603,-0.334667,0.189428,0.02167,0.12846,-0.95533,-0.008781,0.280841,-0.177815,-0.800347,-0.311613,-0.345989,0.14567,0.303835,-0.150931,-0.281927,1.078696,-0.162455,0.168362,0.588428,0.264327,-0.707685,0.026528,-0.679166,0.188686,-0.097057,0.490353,0.073352,0.083658,0.400655,-0.172182,-0.1644,0.106825,0.37723,-0.266672
279,woodworking,wood,working,0.447888,0.397287,-0.0438,0.345284,-0.025091,-0.145027,-0.24934,-0.29822,-0.38853,-0.099791,0.3295,-0.125469,-0.067303,0.362665,-0.055289,0.486486,0.264125,0.179723,-0.324585,0.145631,0.0302,0.172545,0.125768,-0.048867,0.378077,0.309665,0.161956,0.127608,-0.116231,-0.114585,0.602122,-0.059663,0.100442,0.149995,0.017549,-0.222137,0.240105,...,-0.7044,-0.478045,0.156271,0.375346,-0.322784,-0.278626,-0.30944,0.332547,-0.127158,0.177187,-0.606275,0.018705,0.474721,0.211895,-0.670104,-0.719589,-0.566141,0.214682,0.087673,-0.40338,-0.117545,1.055316,-0.171433,0.428347,0.422284,0.690025,-0.843158,-0.032456,-0.403357,0.11604,-0.161,-0.125852,0.224263,-0.247326,0.143008,-0.565697,0.065304,0.087637,0.309505,-0.0841
280,fireworks,fire,works,0.382751,0.049523,0.485108,-0.239235,-0.101339,-0.489516,-0.788938,-0.368939,0.008052,-0.236644,-0.097426,0.077028,-0.378867,0.160679,0.051861,0.847933,0.325562,0.348118,-1.01901,0.027388,1.037058,0.071305,-0.298845,0.072707,-0.195096,0.894714,-0.127328,0.086463,-0.222972,0.69157,0.585001,0.2468,1.006906,-0.088951,-0.307759,0.14245,-0.275207,...,-0.540092,-0.419307,0.315115,-0.123576,-0.379095,0.254215,-0.015379,0.024263,0.270941,0.201429,0.0474,-0.144862,-0.093276,-0.091481,-0.23419,-0.390995,0.108525,-0.019283,0.27944,0.050905,0.095166,0.138401,-0.131597,0.002341,0.341208,-0.161593,-0.088825,0.247888,-0.690673,-0.063594,-0.384836,-0.474411,0.125107,-0.014144,0.171168,-0.458203,-0.139406,-0.092208,0.501959,-0.310856
281,waxworks,wax,works,0.337189,0.373053,0.471374,0.138701,0.345408,-0.379861,-0.346864,0.19203,-0.144149,-0.26175,0.283812,0.201377,0.075339,-0.083994,-0.510065,0.690679,0.139762,0.0212,-0.102097,0.044309,0.280605,-0.20285,0.305946,-0.036359,0.359173,-0.027244,0.117344,0.232677,-0.453745,0.108008,0.136294,0.034795,-0.077848,0.028712,-0.147828,-0.258137,0.964738,...,-0.528288,-0.221641,0.234523,-0.029459,-0.05969,-0.665165,-0.686775,0.288825,-0.101299,0.147331,-0.516189,0.174114,0.225548,-0.344672,-0.248941,-0.438518,-0.117112,0.361404,-0.115454,-0.404367,-0.240916,0.513817,-0.169409,0.244001,0.508167,-0.128355,-0.396466,-0.300677,-0.676022,-0.619092,0.085712,-0.05041,0.10658,-0.37076,0.502152,0.049127,-0.277386,-0.437595,0.336882,0.122535


In [407]:
trainingset2.to_csv('BERT_trainingset2.csv')