In [1]:
%load_ext autoreload
%autoreload 2

In [24]:
from Model import Model

from Generic_Torch_Model import Simple_GRU as GRU
from Generic_Torch_Model import NN_Model, NN_CRF_Model, extract_data, train_test_split_sentences, transform, \
                                load_pretrained_weights, transform_test, align_pred, write_data, combine_sst_mwe
import torch
import torch.nn as nn
import torch.optim as optim
from torchcrf import CRF
import pickle


def preprocess_train_eval(model_sst_generator, model_mwe_generator, epochs_sst, epochs_mwe,
                          generate_pretrained_weights=False, use_pos=False):
    
    # Load data
    data = open('../dimsum-data-1.5/dimsum16.train', 'r').readlines()
    data_test = open('../dimsum-data-1.5/dimsum16.test.blind', 'r', encoding='utf-8').readlines()

    X, y_sst, y_mwe = extract_data(data)
    X_, X_val_, y_sst, y_sst_val = train_test_split_sentences(X, y_sst, test_size=0.3, random_state=0, shuffle=False)
    X, X_val, y_mwe, y_mwe_val = train_test_split_sentences(X, y_mwe, test_size=0.3, random_state=0, shuffle=False)
    assert (X == X_) and (X_val == X_val_)
    write_data(X_val, y_sst_val, y_mwe_val, 'val.gold')

    if use_pos:
        vocab_, le_vocab_, le_pos_, le_sst, train_loader_sst, valid_loader_sst = transform(X, y_sst, X_val, y_sst_val,
                                                                                           max_len=16, batch_size=64,
                                                                                           use_pos=True)
        vocab, le_vocab, le_pos, le_mwe, train_loader_mwe, valid_loader_mwe = transform(X, y_mwe, X_val, y_mwe_val,
                                                                                        max_len=16, batch_size=64,
                                                                                        use_pos=True)
        assert (vocab == vocab_) and (le_vocab == le_vocab_) and (le_pos == le_pos_)
    else:
        vocab_, le_vocab_, le_sst, train_loader_sst, valid_loader_sst = transform(X, y_sst, X_val, y_sst_val,
                                                                                  max_len=16, batch_size=64,
                                                                                  use_pos=False)
        vocab, le_vocab, le_mwe, train_loader_mwe, valid_loader_mwe = transform(X, y_mwe, X_val, y_mwe_val,
                                                                                max_len=16, batch_size=64,
                                                                                use_pos=False)
        assert (vocab == vocab_) and (le_vocab == le_vocab_)



    # Load Pretrained Weights (or generate for the first time)
    if generate_pretrained_weights:
        pretrained_weights = load_pretrained_weights('wiki-news-300d-1M.vec', vocab_size=len(vocab), le_vocab=le_vocab)
        with open(f"pretrained_weights_2.pkl", 'wb') as fo:
            pickle.dump(pretrained_weights, fo)
    else:
        pretrained_weights = load_pretrained_weights('pretrained_weights_2.pkl', from_pickle=True)


    # Train
    #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    device = torch.device('cpu')

    model_sst = model_sst_generator(pretrained_weights, le_sst, le_vocab)
    print('Model SST:')
    model_sst.fit(train_loader_sst, valid_loader_sst, epochs=epochs_sst)
    
    model_mwe = model_mwe_generator(pretrained_weights, le_mwe, le_vocab)
    print('\nModel MWE:')
    model_mwe.fit(train_loader_mwe, valid_loader_mwe, epochs=epochs_mwe)
    
    
    # Get Model F1-Score on valid set
    if use_pos:
        X_val_enc, y_sst_val_enc = transform_test(le_vocab, le_sst, X_val, y_sst_val, le_pos=le_pos)
        X_val_enc, y_mwe_val_enc = transform_test(le_vocab, le_mwe, X_val, y_mwe_val, le_pos=le_pos)
    else:
        X_val_enc, y_sst_val_enc = transform_test(le_vocab, le_sst, X_val, y_sst_val)
        X_val_enc, y_mwe_val_enc = transform_test(le_vocab, le_mwe, X_val, y_mwe_val)
    
    precision, recall, f1_score, accuracy = model_sst.score(X_val_enc, y_sst_val_enc, le_sst[''], le_sst)
    print(f'\nModel SST score: Acc={accuracy:.4} P={precision:.4f}, R={recall:.4f}, F1={f1_score:.4f}')
    precision, recall, f1_score, accuracy = model_mwe.score(X_val_enc, y_mwe_val_enc, le_mwe[''], le_mwe)
    print(f'Model MWE score: Acc={accuracy:.4} P={precision:.4f}, R={recall:.4f}, F1={f1_score:.4f}')


    # Écriture du fichier pour évaluation
    y_sst_hat_val = model_sst.predict(X_val_enc)
    y_mwe_hat_val = model_mwe.predict(X_val_enc)
    y_sst_hat_val_align = align_pred(X_val, y_sst_hat_val, le_sst, is_mwe=False)
    y_mwe_hat_val_align = align_pred(X_val, y_mwe_hat_val, le_mwe, is_mwe=True)
    rev_le_sst = {v: k for k, v in le_sst.items()}
    rev_le_mwe = {v: k for k, v in le_mwe.items()}
    y_sst_hat_val_align = [rev_le_sst[yi] if yi in rev_le_sst else '' for yi in y_sst_hat_val_align] # Decode
    y_mwe_hat_val_align = [rev_le_mwe[yi] if yi in rev_le_mwe else '' for yi in y_mwe_hat_val_align] # Decode
    write_data(X_val, y_sst_hat_val_align.copy(), y_mwe_hat_val_align.copy(), 'val_before_comb.pred')
    y_sst_hat_val_comb, y_mwe_hat_val_comb = combine_sst_mwe(X_val, y_sst_hat_val_align.copy(), y_mwe_hat_val_align.copy())
    write_data(X_val, y_sst_hat_val_comb.copy(), y_mwe_hat_val_comb.copy(), 'val.pred')

# Différence de score entre l'évaluation avec le modèle et l'évaluateur fourni,
# car le modèle fait l'évaluation uniquement sur les séquences de mots de taille inférieure à max_len,
# alors que l'évaluateur le fait sur tous les mots, même ceux coupés lors du pré-traitement

# Modèle GRU

In [19]:
model_sst_generator = lambda pretrained_weights, le, le_vocab : NN_Model(model=GRU(pretrained_weights, le,
                                                                                   embed_size=300, hidden_size=128,
                                                                                   use_pos=False),
                                                                         criterion=nn.CrossEntropyLoss(),
                                                                         optim=optim.Adam
                                                                        )

model_mwe_generator = lambda pretrained_weights, le, le_vocab : NN_Model(model=GRU(pretrained_weights, le,
                                                                                   embed_size=300, hidden_size=128,
                                                                                   use_pos=False),
                                                                         criterion=nn.CrossEntropyLoss(),
                                                                         optim=optim.Adam
                                                                        )

preprocess_train_eval(model_sst_generator, model_mwe_generator, epochs_sst=50, epochs_mwe=50,
                      generate_pretrained_weights=False, use_pos=False)

Model SST:
1 0.03852759769680458 0.026565476838085387 0.6117499214577443
2 0.02581575091448026 0.022439084864324995 0.6369462770970782
3 0.023516904920934032 0.020800631162193085 0.6811184417216463
4 0.022326126663625934 0.01982900318172243 0.6811184417216463
5 0.021529303380654445 0.019176968766583338 0.690732013823437
6 0.020902028961102145 0.018740963439146677 0.7014765944077914
7 0.020406354002457144 0.0183354119459788 0.6990260760289035
8 0.020014065509397525 0.018194583306709925 0.7090166509582155
9 0.01970256521219865 0.01782793812453747 0.7069431354068488
10 0.019369428482180588 0.017564976836244264 0.7051837888784166
11 0.01907674987318261 0.017363159937991035 0.7017907634307258
12 0.01878738453005637 0.017241223404804867 0.7158027018535973
13 0.0185100816125918 0.016974566794104046 0.7127238454288407
14 0.01825970533739495 0.016788248014118937 0.7080113100848257
15 0.0180104355543772 0.01661842626829942 0.7100848256361922
16 0.017812030483619484 0.016435550484392377 0.7132265

In [20]:
# Évaluation avec 'dimsumeval.py'
!python ../dimsum-data-1.5/scripts/dimsumeval.py val.gold val.pred

[40m[97m[('We', 'PRON'), ('called', 'VERB'), ('few', 'ADJ'), ('companies', 'NOUN'), ('before', 'SCONJ'), ('we', 'PRON'), ('decide', 'VERB'), ('to', 'PART'), ('hire', 'VERB'), ('them', 'PRON'), ('.', 'PUNCT')]
[('We', 'PRON'), ('called', 'VERB'), ('few', 'ADJ'), ('companies', 'NOUN'), ('before', 'SCONJ'), ('we', 'PRON'), ('decide', 'VERB'), ('to', 'PART'), ('hire', 'VERB'), ('them', 'PRON'), ('.', 'PUNCT')]
11
11
11
TEST 11 [('We', 'O', 'O'), ('called', 'O', 'O'), ('few', 'O', 'O'), ('companies', 'O', 'O'), ('before', 'O', 'O'), ('we', 'O', 'O'), ('decide', 'O', 'O'), ('to', 'O', 'O'), ('hire', 'O', 'O'), ('them', 'O', 'O'), ('.', 'O', 'O')]
[('They', 'PRON'), ('came', 'VERB'), ('on', 'ADP'), ('time', 'NOUN'), ('and', 'CONJ'), ('completed', 'VERB'), ('their', 'PRON'), ('work', 'NOUN'), ('quickly', 'ADV'), ('.', 'PUNCT')]
[('They', 'PRON'), ('came', 'VERB'), ('on', 'ADP'), ('time', 'NOUN'), ('and', 'CONJ'), ('completed', 'VERB'), ('their', 'PRON'), ('work', 'NOUN'), ('quickly', 'ADV'),

5
5
5
TEST 5 [('He', 'O', 'O'), ('said', 'O', 'O'), ('it', 'O', 'O'), ('sarcastically', 'O', 'O'), ('.', 'O', 'O')]
[('If', 'SCONJ'), ('you', 'PRON'), ('want', 'VERB'), ('a', 'DET'), ('doctor', 'NOUN'), ('who', 'PRON'), ('will', 'AUX'), ('lie', 'VERB'), ('to', 'ADP'), ('you', 'PRON'), ('and', 'CONJ'), ('say', 'VERB'), ('he', 'PRON'), ('will', 'AUX'), ('operate', 'VERB'), ('and', 'CONJ'), ('then', 'ADV'), ('change', 'VERB'), ('his', 'PRON'), ('mind', 'NOUN'), (',', 'PUNCT'), ('and', 'CONJ'), ('not', 'PART'), ('know', 'VERB'), ('what', 'PRON'), ('he', 'PRON'), ('is', 'AUX'), ('talking', 'VERB'), ('about', 'ADP'), ('when', 'ADV'), ('he', 'PRON'), ('recommends', 'VERB'), ('procedures', 'NOUN'), ('at', 'ADP'), ('other', 'ADJ'), ('hospitals', 'NOUN'), ('and', 'CONJ'), ('says', 'VERB'), ('they', 'PRON'), ('are', 'VERB'), ('what', 'PRON'), ('you', 'PRON'), ('need', 'VERB'), (',', 'PUNCT'), ('when', 'ADV'), ('they', 'PRON'), ('will', 'AUX'), ('not', 'PART'), ('work', 'VERB'), ('for', 'ADP'), ('

[('Without', 'ADP'), ('a', 'DET'), ('doubt', 'NOUN'), ('the', 'DET'), ('best', 'ADJ'), ('place', 'NOUN'), ('to', 'PART'), ('grab', 'VERB'), ('a', 'DET'), ('tall', 'ADJ'), ('bloody', 'ADJ'), ('mary', 'NOUN'), ('and', 'CONJ'), ('some', 'DET'), ('eggs', 'NOUN'), ('benedict', 'NOUN'), ('.', 'PUNCT')]
[('Without', 'ADP'), ('a', 'DET'), ('doubt', 'NOUN'), ('the', 'DET'), ('best', 'ADJ'), ('place', 'NOUN'), ('to', 'PART'), ('grab', 'VERB'), ('a', 'DET'), ('tall', 'ADJ'), ('bloody', 'ADJ'), ('mary', 'NOUN'), ('and', 'CONJ'), ('some', 'DET'), ('eggs', 'NOUN'), ('benedict', 'NOUN'), ('.', 'PUNCT')]
17
17
17
TEST 17 [('Without', 'B', 'O'), ('a', 'I', 'O'), ('doubt', 'I', 'O'), ('the', 'O', 'O'), ('best', 'O', 'O'), ('place', 'O', 'O'), ('to', 'O', 'O'), ('grab', 'O', 'O'), ('a', 'O', 'O'), ('tall', 'O', 'O'), ('bloody', 'B', 'O'), ('mary', 'I', 'O'), ('and', 'O', 'O'), ('some', 'O', 'O'), ('eggs', 'B', 'O'), ('benedict', 'I', 'O'), ('.', 'O', 'O')]
[('T', 'PROPN'), ("'s", 'PART'), ('has', 'AUX'),

[('A', 'DET'), ('well', 'ADV'), ('communicated', 'VERB'), ('and', 'CONJ'), ('will', 'AUX'), ('be', 'AUX'), ('hireing', 'VERB'), ('again', 'ADV'), ('for', 'ADP'), ('another', 'DET'), ('projects', 'NOUN'), ('......', 'PUNCT')]
12
12
12
TEST 12 [('A', 'O', 'O'), ('well', 'O', 'O'), ('communicated', 'O', 'O'), ('and', 'O', 'O'), ('will', 'O', 'O'), ('be', 'O', 'O'), ('hireing', 'O', 'O'), ('again', 'O', 'O'), ('for', 'O', 'O'), ('another', 'O', 'O'), ('projects', 'O', 'O'), ('......', 'O', 'O')]
[('Thanks', 'NOUN')]
[('Thanks', 'NOUN')]
1
1
1
TEST 1 [('Thanks', 'O', 'O')]
[('Friendly', 'ADJ'), ('Efficient', 'ADJ'), ('and', 'CONJ'), ('overall', 'ADV'), ('great', 'ADJ'), ('place', 'NOUN'), ('for', 'ADP'), ('people', 'NOUN'), ('in', 'ADP'), ('chronic', 'ADJ'), ('intractable', 'ADJ'), ('pain', 'NOUN')]
[('Friendly', 'ADJ'), ('Efficient', 'ADJ'), ('and', 'CONJ'), ('overall', 'ADV'), ('great', 'ADJ'), ('place', 'NOUN'), ('for', 'ADP'), ('people', 'NOUN'), ('in', 'ADP'), ('chronic', 'ADJ'), ('int

[('Great', 'ADJ'), ('Doctor', 'NOUN'), ('!', 'PUNCT')]
[('Great', 'ADJ'), ('Doctor', 'NOUN'), ('!', 'PUNCT')]
3
3
3
TEST 3 [('Great', 'O', 'O'), ('Doctor', 'O', 'O'), ('!', 'O', 'O')]
[('Dr.', 'PROPN'), ('Faris', 'PROPN'), ('is', 'VERB'), ('a', 'DET'), ('great', 'ADJ'), ('doctor', 'NOUN'), ('!', 'PUNCT')]
[('Dr.', 'PROPN'), ('Faris', 'PROPN'), ('is', 'VERB'), ('a', 'DET'), ('great', 'ADJ'), ('doctor', 'NOUN'), ('!', 'PUNCT')]
7
7
7
TEST 7 [('Dr.', 'B', 'B'), ('Faris', 'I', 'I'), ('is', 'O', 'O'), ('a', 'O', 'O'), ('great', 'O', 'O'), ('doctor', 'O', 'O'), ('!', 'O', 'O')]
[('I', 'PRON'), ('would', 'AUX'), ('recommend', 'VERB'), ('him', 'PRON'), ('to', 'ADP'), ('anyone', 'NOUN'), ('.', 'PUNCT')]
[('I', 'PRON'), ('would', 'AUX'), ('recommend', 'VERB'), ('him', 'PRON'), ('to', 'ADP'), ('anyone', 'NOUN'), ('.', 'PUNCT')]
7
7
7
TEST 7 [('I', 'O', 'O'), ('would', 'O', 'O'), ('recommend', 'O', 'O'), ('him', 'O', 'O'), ('to', 'O', 'O'), ('anyone', 'O', 'O'), ('.', 'O', 'O')]
[('I', 'PRON'), ('

23
TEST 23 [('This', 'O', 'O'), ('place', 'O', 'O'), ('is', 'O', 'O'), ('identical', 'O', 'O'), ('to', 'O', 'O'), ('the', 'O', 'O'), ('Youngstown', 'B', 'O'), ('Sports', 'I', 'O'), ('Grille', 'I', 'O'), (',', 'O', 'O'), ('so', 'O', 'O'), ('I', 'O', 'O'), ('imagine', 'O', 'O'), ('they', 'O', 'O'), ('are', 'O', 'O'), ('owned', 'O', 'O'), ('/', 'O', 'O'), ('operated', 'O', 'O'), ('by', 'O', 'O'), ('the', 'O', 'O'), ('same', 'O', 'O'), ('people', 'O', 'O'), ('.', 'O', 'O')]
[('It', 'PRON'), ('clearly', 'ADV'), ('had', 'AUX'), ('been', 'AUX'), ('prepared', 'VERB'), ('from', 'ADP'), ('fresh', 'ADJ'), ('ingredients', 'NOUN'), ('.', 'PUNCT')]
[('It', 'PRON'), ('clearly', 'ADV'), ('had', 'AUX'), ('been', 'AUX'), ('prepared', 'VERB'), ('from', 'ADP'), ('fresh', 'ADJ'), ('ingredients', 'NOUN'), ('.', 'PUNCT')]
9
9
9
TEST 9 [('It', 'O', 'O'), ('clearly', 'O', 'O'), ('had', 'O', 'O'), ('been', 'O', 'O'), ('prepared', 'O', 'O'), ('from', 'O', 'O'), ('fresh', 'O', 'O'), ('ingredients', 'O', 'O'), ('.

[('Fantastic', 'ADJ'), ('food', 'NOUN'), ('served', 'VERB'), ('without', 'ADP'), ('pretense', 'NOUN'), (',', 'PUNCT'), ('very', 'ADV'), ('reasonably', 'ADV'), ('priced', 'VERB'), ('wine', 'NOUN'), ('selections', 'NOUN'), ('.', 'PUNCT')]
12
12
12
TEST 12 [('Fantastic', 'O', 'O'), ('food', 'O', 'O'), ('served', 'O', 'O'), ('without', 'O', 'O'), ('pretense', 'O', 'O'), (',', 'O', 'O'), ('very', 'O', 'O'), ('reasonably', 'O', 'O'), ('priced', 'O', 'O'), ('wine', 'O', 'O'), ('selections', 'O', 'O'), ('.', 'O', 'O')]
[('A', 'DET'), ('great', 'ADJ'), ('place', 'NOUN'), ('to', 'PART'), ('go', 'VERB'), ('for', 'ADP'), ('dinner', 'NOUN'), ('after', 'ADP'), ('a', 'DET'), ('day', 'NOUN'), ('of', 'ADP'), ('wine', 'NOUN'), ('tasting', 'NOUN'), ('.', 'PUNCT')]
[('A', 'DET'), ('great', 'ADJ'), ('place', 'NOUN'), ('to', 'PART'), ('go', 'VERB'), ('for', 'ADP'), ('dinner', 'NOUN'), ('after', 'ADP'), ('a', 'DET'), ('day', 'NOUN'), ('of', 'ADP'), ('wine', 'NOUN'), ('tasting', 'NOUN'), ('.', 'PUNCT')]
14
14

# Modèle GRU avec POS

In [21]:
model_sst_generator = lambda pretrained_weights, le, le_vocab : NN_Model(model=GRU(pretrained_weights, le,
                                                                                   embed_size=300, hidden_size=128,
                                                                                   use_pos=True),
                                                                         criterion=nn.CrossEntropyLoss(),
                                                                         optim=optim.Adam
                                                                        )

model_mwe_generator = lambda pretrained_weights, le, le_vocab : NN_Model(model=GRU(pretrained_weights, le,
                                                                                   embed_size=300, hidden_size=128,
                                                                                   use_pos=True),
                                                                         criterion=nn.CrossEntropyLoss(),
                                                                         optim=optim.Adam
                                                                        )

preprocess_train_eval(model_sst_generator, model_mwe_generator, epochs_sst=50, epochs_mwe=50,
                      generate_pretrained_weights=False, use_pos=True)

Model SST:
1 0.03210902558298614 0.019921529541412988 0.6613257932767829
2 0.01965199953461375 0.015455645157231225 0.6895381715362865
3 0.01700758580900296 0.013895051181316375 0.7042412818096135
4 0.016074254456713427 0.013284492658244239 0.7219604147031102
5 0.015429591699596245 0.01269069756898615 0.7304429783223374
6 0.014839763458941302 0.01217304335700141 0.7328934967012253
7 0.014388843000059364 0.011784428358078003 0.7428212378259503
8 0.013893635402871371 0.011473780622084935 0.7516808042726987
9 0.013493996107993504 0.011097456307874785 0.747282437951618
10 0.013137220271679219 0.010857940837740897 0.7504241281809614
11 0.012854979094311964 0.010637977843483288 0.7590323594093622
12 0.012546051627434801 0.010400915352834595 0.7595350298460571
13 0.012342842713322233 0.010281885084178712 0.7623625510524662
14 0.012120782530495864 0.010145109229617649 0.7657555765001571
15 0.011911450716383793 0.010019692613018884 0.7663839145460257
16 0.011746726067302553 0.009895076809657944

In [22]:
# Évaluation avec 'dimsumeval.py'
!python ../dimsum-data-1.5/scripts/dimsumeval.py val.gold val.pred

[40m[97m[('We', 'PRON'), ('called', 'VERB'), ('few', 'ADJ'), ('companies', 'NOUN'), ('before', 'SCONJ'), ('we', 'PRON'), ('decide', 'VERB'), ('to', 'PART'), ('hire', 'VERB'), ('them', 'PRON'), ('.', 'PUNCT')]
[('We', 'PRON'), ('called', 'VERB'), ('few', 'ADJ'), ('companies', 'NOUN'), ('before', 'SCONJ'), ('we', 'PRON'), ('decide', 'VERB'), ('to', 'PART'), ('hire', 'VERB'), ('them', 'PRON'), ('.', 'PUNCT')]
11
11
11
TEST 11 [('We', 'O', 'O'), ('called', 'O', 'O'), ('few', 'O', 'B'), ('companies', 'O', 'I'), ('before', 'O', 'O'), ('we', 'O', 'O'), ('decide', 'O', 'O'), ('to', 'O', 'O'), ('hire', 'O', 'O'), ('them', 'O', 'O'), ('.', 'O', 'O')]
[('They', 'PRON'), ('came', 'VERB'), ('on', 'ADP'), ('time', 'NOUN'), ('and', 'CONJ'), ('completed', 'VERB'), ('their', 'PRON'), ('work', 'NOUN'), ('quickly', 'ADV'), ('.', 'PUNCT')]
[('They', 'PRON'), ('came', 'VERB'), ('on', 'ADP'), ('time', 'NOUN'), ('and', 'CONJ'), ('completed', 'VERB'), ('their', 'PRON'), ('work', 'NOUN'), ('quickly', 'ADV'),

[('Then', 'ADV'), ('he', 'PRON'), ('renigged', 'VERB'), ('when', 'ADV'), ('he', 'PRON'), ('read', 'VERB'), ('my', 'PRON'), ('Health', 'PROPN'), ('Care', 'PROPN'), ('Proxy', 'PROPN'), (',', 'PUNCT'), ('even', 'ADV'), ('though', 'SCONJ'), ('i', 'PRON'), ('agreed', 'VERB'), ('to', 'PART'), ('be', 'VERB'), ('on', 'ADP'), ('the', 'DET'), ('ventilator', 'NOUN'), ('for', 'ADP'), ('2', 'NUM'), ('months', 'NOUN'), ('following', 'VERB'), ('surgery', 'NOUN'), ('(', 'PUNCT'), ('as', 'SCONJ'), ('he', 'PRON'), ('had', 'AUX'), ('twice', 'ADV'), ('stated', 'VERB'), ('i', 'PRON'), ('must', 'AUX'), ('agree', 'VERB'), ('to', 'ADP'), (')', 'PUNCT'), ('.', 'PUNCT')]
[('Then', 'ADV'), ('he', 'PRON'), ('renigged', 'VERB'), ('when', 'ADV'), ('he', 'PRON'), ('read', 'VERB'), ('my', 'PRON'), ('Health', 'PROPN'), ('Care', 'PROPN'), ('Proxy', 'PROPN'), (',', 'PUNCT'), ('even', 'ADV'), ('though', 'SCONJ'), ('i', 'PRON'), ('agreed', 'VERB'), ('to', 'PART'), ('be', 'VERB'), ('on', 'ADP'), ('the', 'DET'), ('ventilato

[('They', 'PRON'), ('spent', 'VERB'), ('a', 'DET'), ('lot', 'NOUN'), ('of', 'ADP'), ('time', 'NOUN'), ('with', 'ADP'), ('me', 'PRON'), ('and', 'CONJ'), ('got', 'VERB'), ('me', 'PRON'), ('into', 'ADP'), ('a', 'DET'), ('great', 'ADJ'), ('pair', 'NOUN'), ('of', 'ADP'), ('shoes', 'NOUN'), ('.', 'PUNCT')]
[('They', 'PRON'), ('spent', 'VERB'), ('a', 'DET'), ('lot', 'NOUN'), ('of', 'ADP'), ('time', 'NOUN'), ('with', 'ADP'), ('me', 'PRON'), ('and', 'CONJ'), ('got', 'VERB'), ('me', 'PRON'), ('into', 'ADP'), ('a', 'DET'), ('great', 'ADJ'), ('pair', 'NOUN'), ('of', 'ADP'), ('shoes', 'NOUN'), ('.', 'PUNCT')]
18
18
18
TEST 18 [('They', 'O', 'O'), ('spent', 'B', 'O'), ('a', 'b', 'O'), ('lot', 'i', 'O'), ('of', 'o', 'O'), ('time', 'I', 'O'), ('with', 'O', 'O'), ('me', 'O', 'O'), ('and', 'O', 'O'), ('got', 'O', 'O'), ('me', 'O', 'O'), ('into', 'O', 'O'), ('a', 'O', 'O'), ('great', 'O', 'O'), ('pair', 'O', 'O'), ('of', 'O', 'O'), ('shoes', 'O', 'O'), ('.', 'O', 'O')]
[('I', 'PRON'), ('think', 'VERB'), 

[('A', 'DET'), ('well', 'ADV'), ('communicated', 'VERB'), ('and', 'CONJ'), ('will', 'AUX'), ('be', 'AUX'), ('hireing', 'VERB'), ('again', 'ADV'), ('for', 'ADP'), ('another', 'DET'), ('projects', 'NOUN'), ('......', 'PUNCT')]
12
12
12
TEST 12 [('A', 'O', 'O'), ('well', 'O', 'O'), ('communicated', 'O', 'O'), ('and', 'O', 'O'), ('will', 'O', 'O'), ('be', 'O', 'O'), ('hireing', 'O', 'O'), ('again', 'O', 'O'), ('for', 'O', 'O'), ('another', 'O', 'O'), ('projects', 'O', 'O'), ('......', 'O', 'O')]
[('Thanks', 'NOUN')]
[('Thanks', 'NOUN')]
1
1
1
TEST 1 [('Thanks', 'O', 'O')]
[('Friendly', 'ADJ'), ('Efficient', 'ADJ'), ('and', 'CONJ'), ('overall', 'ADV'), ('great', 'ADJ'), ('place', 'NOUN'), ('for', 'ADP'), ('people', 'NOUN'), ('in', 'ADP'), ('chronic', 'ADJ'), ('intractable', 'ADJ'), ('pain', 'NOUN')]
[('Friendly', 'ADJ'), ('Efficient', 'ADJ'), ('and', 'CONJ'), ('overall', 'ADV'), ('great', 'ADJ'), ('place', 'NOUN'), ('for', 'ADP'), ('people', 'NOUN'), ('in', 'ADP'), ('chronic', 'ADJ'), ('int

[('I', 'PRON'), ('have', 'AUX'), ('been', 'VERB'), ('extremely', 'ADV'), ('pleased', 'ADJ'), ('with', 'ADP'), ('the', 'DET'), ('signs', 'NOUN'), ('and', 'CONJ'), ('pop', 'VERB'), ('-', 'PUNCT'), ('up', 'ADP'), ('banners', 'NOUN'), ('she', 'PRON'), ('has', 'AUX'), ('supplied', 'VERB'), ('to', 'ADP'), ('me', 'PRON'), ('over', 'ADP'), ('the', 'DET'), ('years', 'NOUN'), ('-', 'PUNCT'), ('a', 'NOUN'), ('truly', 'ADV'), ('first', 'ADJ'), ('class', 'NOUN'), ('family', 'NOUN'), ('business', 'NOUN'), ('run', 'VERB'), ('by', 'ADP'), ('Tina', 'PROPN'), ('and', 'CONJ'), ('her', 'PRON'), ('husband', 'NOUN'), ('Chris', 'PROPN'), ('.', 'PUNCT')]
36
36
36
TEST 36 [('I', 'O', 'O'), ('have', 'O', 'O'), ('been', 'O', 'O'), ('extremely', 'O', 'O'), ('pleased', 'O', 'O'), ('with', 'O', 'O'), ('the', 'O', 'O'), ('signs', 'O', 'O'), ('and', 'O', 'O'), ('pop', 'B', 'O'), ('-', 'I', 'O'), ('up', 'I', 'O'), ('banners', 'O', 'O'), ('she', 'O', 'O'), ('has', 'O', 'O'), ('supplied', 'O', 'O'), ('to', 'O', 'O'), ('

23
23
23
TEST 23 [('They', 'O', 'O'), ('also', 'O', 'O'), ('got', 'O', 'O'), ('my', 'O', 'O'), ('friend', 'O', 'O'), ('s', 'O', 'O'), ('order', 'O', 'O'), ('mixed', 'B', 'O'), ('up', 'I', 'O'), ('and', 'O', 'O'), ('wanted', 'O', 'O'), ('to', 'O', 'O'), ('charger', 'O', 'O'), ('her', 'O', 'O'), ('$', 'O', 'O'), ('10', 'O', 'O'), ('more', 'O', 'O'), ('than', 'O', 'O'), ('what', 'O', 'O'), ('she', 'O', 'O'), ('had', 'O', 'O'), ('wanted', 'O', 'O'), ('.', 'O', 'O')]
[('They', 'PRON'), ('have', 'VERB'), ('a', 'DET'), ('great', 'ADJ'), ('selection', 'NOUN'), ('of', 'ADP'), ('wine', 'NOUN'), ('from', 'ADP'), ('all', 'ADV'), ('over', 'ADP'), ('the', 'DET'), ('world', 'NOUN'), ('with', 'ADP'), ('all', 'DET'), ('different', 'ADJ'), ('prices', 'NOUN'), ('.', 'PUNCT')]
[('They', 'PRON'), ('have', 'VERB'), ('a', 'DET'), ('great', 'ADJ'), ('selection', 'NOUN'), ('of', 'ADP'), ('wine', 'NOUN'), ('from', 'ADP'), ('all', 'ADV'), ('over', 'ADP'), ('the', 'DET'), ('world', 'NOUN'), ('with', 'ADP'), ('all

4
4
TEST 4 [('But', 'O', 'O'), ('not', 'O', 'O'), ('so', 'O', 'O'), ('.', 'O', 'O')]
[('First', 'ADJ'), ('Time', 'NOUN'), ('Ballerina', 'NOUN')]
[('First', 'ADJ'), ('Time', 'NOUN'), ('Ballerina', 'NOUN')]
3
3
3
TEST 3 [('First', 'B', 'O'), ('Time', 'I', 'O'), ('Ballerina', 'O', 'O')]
[('My', 'PRON'), ('daughter', 'NOUN'), ('is', 'AUX'), ('starting', 'VERB'), ('ballet', 'NOUN'), ('this', 'DET'), ('year', 'NOUN'), ('for', 'ADP'), ('the', 'DET'), ('first', 'ADJ'), ('time', 'NOUN'), ('.', 'PUNCT')]
[('My', 'PRON'), ('daughter', 'NOUN'), ('is', 'AUX'), ('starting', 'VERB'), ('ballet', 'NOUN'), ('this', 'DET'), ('year', 'NOUN'), ('for', 'ADP'), ('the', 'DET'), ('first', 'ADJ'), ('time', 'NOUN'), ('.', 'PUNCT')]
12
12
12
TEST 12 [('My', 'O', 'O'), ('daughter', 'O', 'O'), ('is', 'O', 'O'), ('starting', 'O', 'O'), ('ballet', 'O', 'O'), ('this', 'O', 'O'), ('year', 'O', 'O'), ('for', 'O', 'O'), ('the', 'O', 'O'), ('first', 'O', 'O'), ('time', 'O', 'O'), ('.', 'O', 'O')]
[('I', 'PRON'), ("'m", 'V

# Modèles GRU pour les Supersens et GRU+CRF pour les MWEs (avec POS)

In [25]:
model_sst_generator = lambda pretrained_weights, le, le_vocab : NN_Model(model=GRU(pretrained_weights, le,
                                                                                   embed_size=300, hidden_size=128,
                                                                                   use_pos=True),
                                                                         criterion=nn.CrossEntropyLoss(),
                                                                         optim=optim.Adam
                                                                        )

model_mwe_generator = lambda pretrained_weights, le, le_vocab : NN_CRF_Model(nn_model=GRU(pretrained_weights, le,
                                                                                      embed_size=300, hidden_size=128,
                                                                                      use_pos=True),
                                                                             crf_model=CRF(len(le), batch_first=True),
                                                                             optim=optim.Adam
                                                                            )

preprocess_train_eval(model_sst_generator, model_mwe_generator, epochs_sst=50, epochs_mwe=50,
                      generate_pretrained_weights=False, use_pos=True)

Model SST:
1 0.03238574175225656 0.019972241504324806 0.6542255733584669
2 0.01976959500933162 0.015688069495889877 0.6911090166509583
3 0.017127558389353092 0.014008973870012495 0.7087024819352812
4 0.016050560235196402 0.013226058748033311 0.7177505497957901
5 0.015396892350143746 0.012725883515344726 0.7318253220232485
6 0.01483728918300991 0.012232638398806254 0.740119384228715
7 0.014312156629406225 0.011708410456776619 0.7447062519635564
8 0.013907561986422958 0.011401524229182138 0.7479736098020735
9 0.013511030987708048 0.01105486224922869 0.7475337731699654
10 0.013143726458468584 0.010915459858046638 0.7559535029846057
11 0.012861454351986757 0.010660003001491229 0.7558906691800189
12 0.012605165558650615 0.010508566349744796 0.7592208608231228
13 0.01235011827736321 0.01033689748081896 0.7606660383286208
14 0.012146072555915625 0.010208718644248115 0.7651272384542884
15 0.011946483146721993 0.010041397127012412 0.7634935595350298
16 0.01175343266339769 0.009961039138336977 0

In [26]:
# Évaluation avec 'dimsumeval.py'
!python ../dimsum-data-1.5/scripts/dimsumeval.py val.gold val.pred

[40m[97m[('We', 'PRON'), ('called', 'VERB'), ('few', 'ADJ'), ('companies', 'NOUN'), ('before', 'SCONJ'), ('we', 'PRON'), ('decide', 'VERB'), ('to', 'PART'), ('hire', 'VERB'), ('them', 'PRON'), ('.', 'PUNCT')]
[('We', 'PRON'), ('called', 'VERB'), ('few', 'ADJ'), ('companies', 'NOUN'), ('before', 'SCONJ'), ('we', 'PRON'), ('decide', 'VERB'), ('to', 'PART'), ('hire', 'VERB'), ('them', 'PRON'), ('.', 'PUNCT')]
11
11
11
TEST 11 [('We', 'O', 'O'), ('called', 'O', 'B'), ('few', 'O', 'I'), ('companies', 'O', 'O'), ('before', 'O', 'O'), ('we', 'O', 'O'), ('decide', 'O', 'O'), ('to', 'O', 'O'), ('hire', 'O', 'O'), ('them', 'O', 'O'), ('.', 'O', 'O')]
[('They', 'PRON'), ('came', 'VERB'), ('on', 'ADP'), ('time', 'NOUN'), ('and', 'CONJ'), ('completed', 'VERB'), ('their', 'PRON'), ('work', 'NOUN'), ('quickly', 'ADV'), ('.', 'PUNCT')]
[('They', 'PRON'), ('came', 'VERB'), ('on', 'ADP'), ('time', 'NOUN'), ('and', 'CONJ'), ('completed', 'VERB'), ('their', 'PRON'), ('work', 'NOUN'), ('quickly', 'ADV'),

12
TEST 12 [('Two', 'O', 'O'), ('months', 'O', 'O'), ('and', 'O', 'O'), ('at', 'B', 'O'), ('least', 'I', 'O'), ('two', 'O', 'O'), ('months', 'O', 'O'), ('are', 'O', 'O'), ('totally', 'O', 'O'), ('different', 'O', 'O'), ('things', 'O', 'O'), ('.', 'O', 'O')]
[('He', 'PRON'), ('did', 'AUX'), ('not', 'PART'), ('even', 'ADV'), ('give', 'VERB'), ('me', 'PRON'), ('the', 'DET'), ('chance', 'NOUN'), ('to', 'PART'), ('say', 'VERB'), ('i', 'PRON'), ('would', 'AUX'), ('stay', 'VERB'), ('on', 'ADP'), ('the', 'DET'), ('ventilator', 'NOUN'), ('longer', 'ADV'), (',', 'PUNCT'), ('which', 'DET'), ('i', 'PRON'), ('would', 'AUX'), ('have', 'AUX'), ('.', 'PUNCT')]
[('He', 'PRON'), ('did', 'AUX'), ('not', 'PART'), ('even', 'ADV'), ('give', 'VERB'), ('me', 'PRON'), ('the', 'DET'), ('chance', 'NOUN'), ('to', 'PART'), ('say', 'VERB'), ('i', 'PRON'), ('would', 'AUX'), ('stay', 'VERB'), ('on', 'ADP'), ('the', 'DET'), ('ventilator', 'NOUN'), ('longer', 'ADV'), (',', 'PUNCT'), ('which', 'DET'), ('i', 'PRON'), ('w

[('Thanks', 'NOUN'), ('Run', 'PROPN'), ('on', 'PROPN'), ('!', 'PUNCT')]
4
4
4
TEST 4 [('Thanks', 'O', 'O'), ('Run', 'B', 'O'), ('on', 'I', 'B'), ('!', 'O', 'I')]
[('Bulwark', 'NOUN'), ('regarding', 'VERB'), ('service', 'NOUN'), ('by', 'ADP'), ('Eric', 'PROPN')]
[('Bulwark', 'NOUN'), ('regarding', 'VERB'), ('service', 'NOUN'), ('by', 'ADP'), ('Eric', 'PROPN')]
5
5
5
TEST 5 [('Bulwark', 'O', 'O'), ('regarding', 'O', 'B'), ('service', 'O', 'I'), ('by', 'O', 'O'), ('Eric', 'O', 'O')]
[('Just', 'ADV'), ('wanted', 'VERB'), ('you', 'PRON'), ('to', 'PART'), ('know', 'VERB'), ('that', 'SCONJ'), ('Eric', 'PROPN'), ('came', 'VERB'), ('by', 'ADV'), ('as', 'SCONJ'), ('scheduled', 'VERB'), ('today', 'NOUN'), ('and', 'CONJ'), ('sprayed', 'VERB'), ('our', 'PRON'), ('house', 'NOUN'), ('for', 'ADP'), ('scorpions', 'NOUN'), ('.', 'PUNCT')]
[('Just', 'ADV'), ('wanted', 'VERB'), ('you', 'PRON'), ('to', 'PART'), ('know', 'VERB'), ('that', 'SCONJ'), ('Eric', 'PROPN'), ('came', 'VERB'), ('by', 'ADV'), ('as', 

[('A', 'DET'), ('well', 'ADV'), ('communicated', 'VERB'), ('and', 'CONJ'), ('will', 'AUX'), ('be', 'AUX'), ('hireing', 'VERB'), ('again', 'ADV'), ('for', 'ADP'), ('another', 'DET'), ('projects', 'NOUN'), ('......', 'PUNCT')]
12
12
12
TEST 12 [('A', 'O', 'O'), ('well', 'O', 'O'), ('communicated', 'O', 'O'), ('and', 'O', 'O'), ('will', 'O', 'O'), ('be', 'O', 'O'), ('hireing', 'O', 'O'), ('again', 'O', 'O'), ('for', 'O', 'O'), ('another', 'O', 'O'), ('projects', 'O', 'O'), ('......', 'O', 'O')]
[('Thanks', 'NOUN')]
[('Thanks', 'NOUN')]
1
1
1
TEST 1 [('Thanks', 'O', 'O')]
[('Friendly', 'ADJ'), ('Efficient', 'ADJ'), ('and', 'CONJ'), ('overall', 'ADV'), ('great', 'ADJ'), ('place', 'NOUN'), ('for', 'ADP'), ('people', 'NOUN'), ('in', 'ADP'), ('chronic', 'ADJ'), ('intractable', 'ADJ'), ('pain', 'NOUN')]
[('Friendly', 'ADJ'), ('Efficient', 'ADJ'), ('and', 'CONJ'), ('overall', 'ADV'), ('great', 'ADJ'), ('place', 'NOUN'), ('for', 'ADP'), ('people', 'NOUN'), ('in', 'ADP'), ('chronic', 'ADJ'), ('int

36
TEST 36 [('I', 'O', 'O'), ('have', 'O', 'O'), ('been', 'O', 'O'), ('extremely', 'O', 'O'), ('pleased', 'O', 'O'), ('with', 'O', 'O'), ('the', 'O', 'O'), ('signs', 'O', 'O'), ('and', 'O', 'O'), ('pop', 'B', 'O'), ('-', 'I', 'O'), ('up', 'I', 'O'), ('banners', 'O', 'O'), ('she', 'O', 'O'), ('has', 'O', 'O'), ('supplied', 'O', 'O'), ('to', 'O', 'O'), ('me', 'O', 'O'), ('over', 'B', 'O'), ('the', 'I', 'O'), ('years', 'I', 'O'), ('-', 'O', 'O'), ('a', 'O', 'O'), ('truly', 'O', 'O'), ('first', 'B', 'O'), ('class', 'I', 'O'), ('family', 'B', 'O'), ('business', 'I', 'O'), ('run', 'O', 'O'), ('by', 'O', 'O'), ('Tina', 'O', 'O'), ('and', 'O', 'O'), ('her', 'O', 'O'), ('husband', 'O', 'O'), ('Chris', 'O', 'O'), ('.', 'O', 'O')]
[('Hospitality', 'NOUN'), ('.!', 'PUNCT')]
[('Hospitality', 'NOUN'), ('.!', 'PUNCT')]
2
2
2
TEST 2 [('Hospitality', 'O', 'O'), ('.!', 'O', 'O')]
[('Very', 'ADV'), ('good', 'ADJ'), ('hospitality', 'NOUN'), ('offered', 'VERB'), ('.!', 'PUNCT')]
[('Very', 'ADV'), ('good', 

[('But', 'CONJ'), ('I', 'PRON'), ('found', 'VERB'), ('the', 'DET'), ('location', 'NOUN'), ('wonderful', 'ADJ'), ('and', 'CONJ'), ('the', 'DET'), ('neighbors', 'NOUN'), ('very', 'ADV'), ('kind', 'ADJ'), ('.', 'PUNCT')]
[('But', 'CONJ'), ('I', 'PRON'), ('found', 'VERB'), ('the', 'DET'), ('location', 'NOUN'), ('wonderful', 'ADJ'), ('and', 'CONJ'), ('the', 'DET'), ('neighbors', 'NOUN'), ('very', 'ADV'), ('kind', 'ADJ'), ('.', 'PUNCT')]
12
12
12
TEST 12 [('But', 'O', 'O'), ('I', 'O', 'O'), ('found', 'O', 'O'), ('the', 'O', 'O'), ('location', 'O', 'O'), ('wonderful', 'O', 'O'), ('and', 'O', 'O'), ('the', 'O', 'O'), ('neighbors', 'O', 'O'), ('very', 'O', 'O'), ('kind', 'O', 'O'), ('.', 'O', 'O')]
[('Never', 'ADV'), ('had', 'VERB'), ('a', 'DET'), ('problem', 'NOUN'), ('with', 'ADP'), ('the', 'DET'), ('staff', 'NOUN'), ('and', 'CONJ'), ('found', 'VERB'), ('them', 'PRON'), ('very', 'ADV'), ('helpful', 'ADJ'), ('when', 'ADV'), ('something', 'NOUN'), ('went', 'VERB'), ('wrong', 'ADJ'), ('.', 'PUNC

24
24
TEST 24 [('A', 'O', 'O'), ('short', 'O', 'O'), ('but', 'O', 'O'), ('wide', 'O', 'O'), ('-', 'O', 'O'), ('ranging', 'O', 'B'), ('menu', 'O', 'I'), ('executed', 'O', 'O'), ('with', 'O', 'O'), ('innovative', 'O', 'O'), ('perfection', 'O', 'O'), ('in', 'O', 'O'), ('a', 'O', 'O'), ('cozy', 'O', 'O'), ('hole', 'B', 'O'), ('in', 'I', 'O'), ('the', 'I', 'O'), ('wall', 'I', 'O'), ('just', 'O', 'O'), ('off', 'O', 'O'), ('the', 'O', 'O'), ('main', 'O', 'O'), ('street', 'O', 'O'), ('.', 'O', 'O')]
[('Fantastic', 'ADJ'), ('food', 'NOUN'), ('served', 'VERB'), ('without', 'ADP'), ('pretense', 'NOUN'), (',', 'PUNCT'), ('very', 'ADV'), ('reasonably', 'ADV'), ('priced', 'VERB'), ('wine', 'NOUN'), ('selections', 'NOUN'), ('.', 'PUNCT')]
[('Fantastic', 'ADJ'), ('food', 'NOUN'), ('served', 'VERB'), ('without', 'ADP'), ('pretense', 'NOUN'), (',', 'PUNCT'), ('very', 'ADV'), ('reasonably', 'ADV'), ('priced', 'VERB'), ('wine', 'NOUN'), ('selections', 'NOUN'), ('.', 'PUNCT')]
12
12
12
TEST 12 [('Fantastic

[('They', 'PRON'), ('say', 'VERB'), ('no', 'INTJ'), (',', 'PUNCT'), ('Warwick', 'PROPN'), ('in', 'ADP'), ('New', 'PROPN'), ('Jersey', 'PROPN'), (',', 'PUNCT'), ('Call', 'VERB'), ('New', 'PROPN'), ('Jersey', 'PROPN'), ('.', 'PUNCT')]
13
13
13
TEST 13 [('They', 'O', 'O'), ('say', 'O', 'O'), ('no', 'O', 'O'), (',', 'O', 'B'), ('Warwick', 'O', 'I'), ('in', 'O', 'I'), ('New', 'B', 'O'), ('Jersey', 'I', 'O'), (',', 'O', 'O'), ('Call', 'O', 'B'), ('New', 'B', 'I'), ('Jersey', 'I', 'O'), ('.', 'O', 'O')]
[('I', 'PRON'), ('laugh', 'VERB'), ('and', 'CONJ'), ('say', 'VERB'), (',', 'PUNCT'), ('no', 'INTJ'), (',', 'PUNCT'), ('that', 'DET'), ('Warwick', 'PROPN'), ('is', 'VERB'), ('in', 'ADP'), ('New', 'PROPN'), ('York', 'PROPN'), (',', 'PUNCT'), ('but', 'CONJ'), ('I', 'PRON'), ("'m", 'VERB'), ('at', 'ADP'), ('the', 'DET'), ('Radison', 'PROPN'), ('-', 'PUNCT'), ('Warwick', 'PROPN'), ('.', 'PUNCT')]
[('I', 'PRON'), ('laugh', 'VERB'), ('and', 'CONJ'), ('say', 'VERB'), (',', 'PUNCT'), ('no', 'INTJ'), ('

# Modèles GRU+CRF pour les 2 (avec POS)

In [27]:
model_crf_generator = lambda pretrained_weights, le, le_vocab : NN_CRF_Model(nn_model=GRU(pretrained_weights, le,
                                                                                      embed_size=300, hidden_size=128,
                                                                                      use_pos=True),
                                                                             crf_model=CRF(len(le), batch_first=True),
                                                                             optim=optim.Adam
                                                                            )

model_mwe_generator = lambda pretrained_weights, le, le_vocab : NN_CRF_Model(nn_model=GRU(pretrained_weights, le,
                                                                                      embed_size=300, hidden_size=128,
                                                                                      use_pos=True),
                                                                             crf_model=CRF(len(le), batch_first=True),
                                                                             optim=optim.Adam
                                                                            )

preprocess_train_eval(model_sst_generator, model_mwe_generator, epochs_sst=50, epochs_mwe=50,
                      generate_pretrained_weights=False, use_pos=True)

Model SST:
1 0.03140960785916037 0.020038209855556488 0.6687401822180333
2 0.019824627663770508 0.015589585983090931 0.6929311969839774
3 0.017065957849598527 0.01396633680495951 0.7038014451775055
4 0.016084127551780354 0.013295309493939082 0.7208922400251335
5 0.015466769790677806 0.01272499052186807 0.7313226515865535
6 0.014852877669690727 0.012204606624113188 0.738234370091109
7 0.014308954960322515 0.01171842830048667 0.743449575871819
8 0.013870734709222675 0.011411436067687141 0.7460885956644675
9 0.013471379951552004 0.011123524233698844 0.7523091423185674
10 0.013127855404248795 0.010841771711905797 0.7556393339616714
11 0.012878838696980909 0.010643672198057175 0.760100534087339
12 0.012607597531077052 0.010476034672723875 0.7588438579956016
13 0.012378356257305503 0.010328105092048645 0.7634935595350298
14 0.012197142486311062 0.010209189189804926 0.7629908890983349
15 0.01196169166488284 0.010054745628601975 0.76770342444235
16 0.011746975753803486 0.009984554888473617 0.7

In [28]:
# Évaluation avec 'dimsumeval.py'
!python ../dimsum-data-1.5/scripts/dimsumeval.py val.gold val.pred

[40m[97m[('We', 'PRON'), ('called', 'VERB'), ('few', 'ADJ'), ('companies', 'NOUN'), ('before', 'SCONJ'), ('we', 'PRON'), ('decide', 'VERB'), ('to', 'PART'), ('hire', 'VERB'), ('them', 'PRON'), ('.', 'PUNCT')]
[('We', 'PRON'), ('called', 'VERB'), ('few', 'ADJ'), ('companies', 'NOUN'), ('before', 'SCONJ'), ('we', 'PRON'), ('decide', 'VERB'), ('to', 'PART'), ('hire', 'VERB'), ('them', 'PRON'), ('.', 'PUNCT')]
11
11
11
TEST 11 [('We', 'O', 'O'), ('called', 'O', 'B'), ('few', 'O', 'I'), ('companies', 'O', 'O'), ('before', 'O', 'O'), ('we', 'O', 'O'), ('decide', 'O', 'O'), ('to', 'O', 'O'), ('hire', 'O', 'O'), ('them', 'O', 'O'), ('.', 'O', 'O')]
[('They', 'PRON'), ('came', 'VERB'), ('on', 'ADP'), ('time', 'NOUN'), ('and', 'CONJ'), ('completed', 'VERB'), ('their', 'PRON'), ('work', 'NOUN'), ('quickly', 'ADV'), ('.', 'PUNCT')]
[('They', 'PRON'), ('came', 'VERB'), ('on', 'ADP'), ('time', 'NOUN'), ('and', 'CONJ'), ('completed', 'VERB'), ('their', 'PRON'), ('work', 'NOUN'), ('quickly', 'ADV'),

11
11
11
TEST 11 [('ok', 'O', 'O'), ('but', 'O', 'O'), ('just', 'O', 'O'), ('becuse', 'O', 'O'), ('we', 'O', 'O'), ('where', 'O', 'B'), ('on', 'B', 'I'), ('a', 'I', 'O'), ('tight', 'o', 'O'), ('budget', 'I', 'O'), ('.', 'O', 'O')]
[('me', 'PRON'), ('and', 'CONJ'), ('my', 'PRON'), ('dad', 'NOUN'), ('where', 'VERB'), ('in', 'ADP'), ('NJ', 'PROPN'), ('for', 'ADP'), ('a', 'DET'), ('kc', 'PROPN'), ('chiefs', 'PROPN'), ('(', 'PUNCT'), ('my', 'PRON'), ('home', 'NOUN'), ('team', 'NOUN'), (')', 'PUNCT'), ('vs', 'ADP'), ('the', 'DET'), ('NY', 'PROPN'), ('jets', 'PROPN'), ('and', 'CONJ'), ('for', 'ADP'), ('game', 'NOUN'), ('4', 'NUM'), ('of', 'ADP'), ('the', 'DET'), ('world', 'NOUN'), ('series', 'NOUN'), ('.', 'PUNCT')]
[('me', 'PRON'), ('and', 'CONJ'), ('my', 'PRON'), ('dad', 'NOUN'), ('where', 'VERB'), ('in', 'ADP'), ('NJ', 'PROPN'), ('for', 'ADP'), ('a', 'DET'), ('kc', 'PROPN'), ('chiefs', 'PROPN'), ('(', 'PUNCT'), ('my', 'PRON'), ('home', 'NOUN'), ('team', 'NOUN'), (')', 'PUNCT'), ('vs', 'ADP

23
23
TEST 23 [('It', 'O', 'O'), ('is', 'O', 'O'), ('the', 'O', 'O'), ('easiest', 'O', 'O'), ('thing', 'O', 'O'), ('that', 'O', 'O'), ('I', 'O', 'O'), ('have', 'O', 'O'), ('ever', 'O', 'O'), ('done', 'O', 'O'), ('and', 'O', 'O'), ('I', 'O', 'O'), ('tell', 'O', 'O'), ('all', 'O', 'O'), ('my', 'O', 'O'), ('friends', 'O', 'O'), ('that', 'O', 'O'), ('they', 'O', 'O'), ('should', 'O', 'O'), ('do', 'O', 'O'), ('it', 'O', 'O'), ('too', 'O', 'O'), ('.', 'O', 'O')]
[('I', 'PRON'), ('sent', 'VERB'), ('a', 'DET'), ('customer', 'NOUN'), ('of', 'ADP'), ('mine', 'PRON'), ('to', 'ADP'), ('you', 'PRON'), ('.', 'PUNCT')]
[('I', 'PRON'), ('sent', 'VERB'), ('a', 'DET'), ('customer', 'NOUN'), ('of', 'ADP'), ('mine', 'PRON'), ('to', 'ADP'), ('you', 'PRON'), ('.', 'PUNCT')]
9
9
9
TEST 9 [('I', 'O', 'O'), ('sent', 'O', 'O'), ('a', 'O', 'O'), ('customer', 'O', 'O'), ('of', 'O', 'O'), ('mine', 'O', 'O'), ('to', 'O', 'O'), ('you', 'O', 'O'), ('.', 'O', 'O')]
[('Dr.', 'PROPN'), ('Ruona', 'PROPN'), (',', 'PUNCT')

[('A', 'DET'), ('well', 'ADV'), ('communicated', 'VERB'), ('and', 'CONJ'), ('will', 'AUX'), ('be', 'AUX'), ('hireing', 'VERB'), ('again', 'ADV'), ('for', 'ADP'), ('another', 'DET'), ('projects', 'NOUN'), ('......', 'PUNCT')]
12
12
12
TEST 12 [('A', 'O', 'B'), ('well', 'O', 'I'), ('communicated', 'O', 'O'), ('and', 'O', 'O'), ('will', 'O', 'O'), ('be', 'O', 'O'), ('hireing', 'O', 'O'), ('again', 'O', 'O'), ('for', 'O', 'O'), ('another', 'O', 'O'), ('projects', 'O', 'O'), ('......', 'O', 'O')]
[('Thanks', 'NOUN')]
[('Thanks', 'NOUN')]
1
1
1
TEST 1 [('Thanks', 'O', 'O')]
[('Friendly', 'ADJ'), ('Efficient', 'ADJ'), ('and', 'CONJ'), ('overall', 'ADV'), ('great', 'ADJ'), ('place', 'NOUN'), ('for', 'ADP'), ('people', 'NOUN'), ('in', 'ADP'), ('chronic', 'ADJ'), ('intractable', 'ADJ'), ('pain', 'NOUN')]
[('Friendly', 'ADJ'), ('Efficient', 'ADJ'), ('and', 'CONJ'), ('overall', 'ADV'), ('great', 'ADJ'), ('place', 'NOUN'), ('for', 'ADP'), ('people', 'NOUN'), ('in', 'ADP'), ('chronic', 'ADJ'), ('int

50
50
50
TEST 50 [('Walked', 'O', 'O'), ('in', 'O', 'O'), ('and', 'O', 'O'), ('was', 'O', 'B'), ('out', 'B', 'I'), ('ta', 'I', 'I'), ('there', 'O', 'O'), ('in', 'O', 'O'), ('10', 'O', 'O'), ('mins', 'O', 'O'), ('with', 'O', 'O'), ('a', 'O', 'O'), ('really', 'O', 'O'), ('good', 'O', 'O'), ('deal', 'O', 'O'), ('i', 'O', 'O'), ('thought', 'O', 'O'), ('i', 'O', 'O'), ('was', 'O', 'O'), ('going', 'B', 'O'), ('to', 'I', 'O'), ('be', 'O', 'O'), ('paying', 'O', 'O'), ('a', 'B', 'O'), ('lot', 'I', 'O'), ('because', 'O', 'O'), ('i', 'O', 'O'), ('had', 'O', 'O'), ('a', 'O', 'O'), ('DUI', 'O', 'O'), ('but', 'O', 'O'), ('with', 'O', 'O'), ('my', 'O', 'O'), ('DUI', 'O', 'O'), ('and', 'O', 'O'), ('Sr', 'B', 'O'), ('-', 'I', 'O'), ('22', 'I', 'O'), ('they', 'O', 'O'), ('were', 'O', 'O'), ('able', 'O', 'O'), ('to', 'O', 'O'), ('get', 'O', 'O'), ('me', 'O', 'O'), ('the', 'O', 'O'), ('best', 'O', 'O'), ('deal', 'O', 'O'), ('out', 'B', 'O'), ('there', 'I', 'O'), ('.', 'O', 'O')]
[('Caldwell', 'PROPN'), ('

TEST 16 [('We', 'O', 'O'), ('would', 'O', 'O'), ('like', 'O', 'O'), ('to', 'O', 'O'), ('thank', 'B', 'O'), ('you', 'I', 'O'), ('for', 'O', 'O'), ('the', 'O', 'O'), ('roofing', 'O', 'O'), ('job', 'O', 'O'), ('you', 'O', 'O'), ('did', 'O', 'O'), ('on', 'O', 'O'), ('our', 'O', 'O'), ('home', 'O', 'O'), ('.', 'O', 'O')]
[('Everything', 'NOUN'), ('was', 'AUX'), ('done', 'VERB'), ('on', 'ADP'), ('a', 'DET'), ('timely', 'ADJ'), ('manner', 'NOUN'), ('and', 'CONJ'), ('things', 'NOUN'), ('were', 'AUX'), ('cleaned', 'VERB'), ('and', 'CONJ'), ('picked', 'VERB'), ('up', 'ADP'), ('every', 'DET'), ('day', 'NOUN'), ('when', 'ADV'), ('the', 'DET'), ('crew', 'NOUN'), ('was', 'VERB'), ('done', 'ADJ'), ('.', 'PUNCT')]
[('Everything', 'NOUN'), ('was', 'AUX'), ('done', 'VERB'), ('on', 'ADP'), ('a', 'DET'), ('timely', 'ADJ'), ('manner', 'NOUN'), ('and', 'CONJ'), ('things', 'NOUN'), ('were', 'AUX'), ('cleaned', 'VERB'), ('and', 'CONJ'), ('picked', 'VERB'), ('up', 'ADP'), ('every', 'DET'), ('day', 'NOUN'), ('w

TEST 9 [('My', 'O', 'O'), ('hair', 'O', 'O'), ('is', 'O', 'O'), ('uneven', 'O', 'O'), ('and', 'O', 'O'), ('it', 'O', 'O'), ('looks', 'O', 'O'), ('rediculous', 'O', 'O'), ('.', 'O', 'O')]
[('This', 'DET'), ('woman', 'NOUN'), ('should', 'AUX'), ('be', 'AUX'), ('working', 'VERB'), ('in', 'ADP'), ('supercuts', 'PROPN'), ('...', 'PUNCT'), ('if', 'ADP'), ('that', 'DET'), ('.', 'PUNCT')]
[('This', 'DET'), ('woman', 'NOUN'), ('should', 'AUX'), ('be', 'AUX'), ('working', 'VERB'), ('in', 'ADP'), ('supercuts', 'PROPN'), ('...', 'PUNCT'), ('if', 'ADP'), ('that', 'DET'), ('.', 'PUNCT')]
11
11
11
TEST 11 [('This', 'O', 'O'), ('woman', 'O', 'O'), ('should', 'O', 'O'), ('be', 'O', 'O'), ('working', 'O', 'O'), ('in', 'O', 'O'), ('supercuts', 'O', 'O'), ('...', 'O', 'O'), ('if', 'O', 'O'), ('that', 'O', 'O'), ('.', 'O', 'O')]
[('This', 'DET'), ('was', 'VERB'), ('a', 'DET'), ('terrible', 'ADJ'), ('experience', 'NOUN'), ('and', 'CONJ'), ('I', 'PRON'), ('hope', 'VERB'), ('that', 'SCONJ'), ('no', 'DET'), ('

[('They', 'PRON'), ('also', 'ADV'), ('got', 'VERB'), ('my', 'PRON'), ('friend', 'NOUN'), ('s', 'PART'), ('order', 'NOUN'), ('mixed', 'VERB'), ('up', 'ADP'), ('and', 'CONJ'), ('wanted', 'VERB'), ('to', 'PART'), ('charger', 'VERB'), ('her', 'PRON'), ('$', 'SYM'), ('10', 'NUM'), ('more', 'ADJ'), ('than', 'SCONJ'), ('what', 'PRON'), ('she', 'PRON'), ('had', 'AUX'), ('wanted', 'VERB'), ('.', 'PUNCT')]
23
23
23
TEST 23 [('They', 'O', 'O'), ('also', 'O', 'O'), ('got', 'O', 'O'), ('my', 'O', 'O'), ('friend', 'O', 'O'), ('s', 'O', 'O'), ('order', 'O', 'O'), ('mixed', 'B', 'O'), ('up', 'I', 'B'), ('and', 'O', 'I'), ('wanted', 'O', 'O'), ('to', 'O', 'O'), ('charger', 'O', 'O'), ('her', 'O', 'O'), ('$', 'O', 'O'), ('10', 'O', 'O'), ('more', 'O', 'O'), ('than', 'O', 'O'), ('what', 'O', 'O'), ('she', 'O', 'O'), ('had', 'O', 'O'), ('wanted', 'O', 'O'), ('.', 'O', 'O')]
[('They', 'PRON'), ('have', 'VERB'), ('a', 'DET'), ('great', 'ADJ'), ('selection', 'NOUN'), ('of', 'ADP'), ('wine', 'NOUN'), ('from',

3
3
3
TEST 3 [('Excellent', 'O', 'O'), ('location', 'O', 'O'), ('.', 'O', 'O')]
[('Good', 'ADJ'), ('sports', 'NOUN'), ('bar', 'NOUN'), ('.', 'PUNCT')]
[('Good', 'ADJ'), ('sports', 'NOUN'), ('bar', 'NOUN'), ('.', 'PUNCT')]
4
4
4
TEST 4 [('Good', 'O', 'O'), ('sports', 'B', 'O'), ('bar', 'I', 'O'), ('.', 'O', 'O')]
[('Hyatt', 'PROPN'), ('web', 'NOUN'), ('site', 'NOUN'), ('improved', 'VERB'), ('.', 'PUNCT')]
[('Hyatt', 'PROPN'), ('web', 'NOUN'), ('site', 'NOUN'), ('improved', 'VERB'), ('.', 'PUNCT')]
5
5
5
TEST 5 [('Hyatt', 'O', 'B'), ('web', 'B', 'I'), ('site', 'I', 'I'), ('improved', 'O', 'O'), ('.', 'O', 'O')]
[('Accurate', 'ADJ'), ('check', 'NOUN'), ('-', 'PUNCT'), ('out', 'NOUN'), ('.', 'PUNCT')]
[('Accurate', 'ADJ'), ('check', 'NOUN'), ('-', 'PUNCT'), ('out', 'NOUN'), ('.', 'PUNCT')]
5
5
5
TEST 5 [('Accurate', 'O', 'O'), ('check', 'B', 'O'), ('-', 'I', 'O'), ('out', 'I', 'O'), ('.', 'O', 'O')]
[('Rooms', 'NOUN'), ('clean', 'ADJ'), ('.', 'PUNCT')]
[('Rooms', 'NOUN'), ('clean', 'ADJ'),

4
4
4
TEST 4 [('But', 'O', 'O'), ('not', 'O', 'O'), ('so', 'O', 'O'), ('.', 'O', 'O')]
[('First', 'ADJ'), ('Time', 'NOUN'), ('Ballerina', 'NOUN')]
[('First', 'ADJ'), ('Time', 'NOUN'), ('Ballerina', 'NOUN')]
3
3
3
TEST 3 [('First', 'B', 'O'), ('Time', 'I', 'O'), ('Ballerina', 'O', 'O')]
[('My', 'PRON'), ('daughter', 'NOUN'), ('is', 'AUX'), ('starting', 'VERB'), ('ballet', 'NOUN'), ('this', 'DET'), ('year', 'NOUN'), ('for', 'ADP'), ('the', 'DET'), ('first', 'ADJ'), ('time', 'NOUN'), ('.', 'PUNCT')]
[('My', 'PRON'), ('daughter', 'NOUN'), ('is', 'AUX'), ('starting', 'VERB'), ('ballet', 'NOUN'), ('this', 'DET'), ('year', 'NOUN'), ('for', 'ADP'), ('the', 'DET'), ('first', 'ADJ'), ('time', 'NOUN'), ('.', 'PUNCT')]
12
12
12
TEST 12 [('My', 'O', 'O'), ('daughter', 'O', 'O'), ('is', 'O', 'O'), ('starting', 'O', 'O'), ('ballet', 'O', 'O'), ('this', 'O', 'O'), ('year', 'O', 'O'), ('for', 'O', 'O'), ('the', 'O', 'O'), ('first', 'O', 'O'), ('time', 'O', 'O'), ('.', 'O', 'O')]
[('I', 'PRON'), ("'m", 

14
14
TEST 14 [('A', 'O', 'O'), ('friend', 'O', 'O'), ('and', 'O', 'O'), ('I', 'O', 'O'), ('recently', 'O', 'O'), ('took', 'O', 'O'), ('our', 'O', 'O'), ('16', 'O', 'O'), ('and', 'O', 'O'), ('18', 'O', 'O'), ('month', 'B', 'B'), ('olds', 'I', 'I'), ('here', 'O', 'O'), ('.', 'O', 'O')]
[('It', 'PRON'), ("'s", 'VERB'), ('not', 'PART'), ('the', 'DET'), ('classiest', 'ADJ'), ('place', 'NOUN'), (',', 'PUNCT'), ('but', 'CONJ'), ('it', 'PRON'), ('was', 'VERB'), ('cleaner', 'ADJ'), ('than', 'SCONJ'), ('I', 'PRON'), ('expected', 'VERB'), ('and', 'CONJ'), ('the', 'DET'), ('staff', 'NOUN'), ('was', 'VERB'), ('very', 'ADV'), ('friendly', 'ADJ'), ('.', 'PUNCT')]
[('It', 'PRON'), ("'s", 'VERB'), ('not', 'PART'), ('the', 'DET'), ('classiest', 'ADJ'), ('place', 'NOUN'), (',', 'PUNCT'), ('but', 'CONJ'), ('it', 'PRON'), ('was', 'VERB'), ('cleaner', 'ADJ'), ('than', 'SCONJ'), ('I', 'PRON'), ('expected', 'VERB'), ('and', 'CONJ'), ('the', 'DET'), ('staff', 'NOUN'), ('was', 'VERB'), ('very', 'ADV'), ('frien

# Tests fonction combine_sst_mwe

In [18]:
_ = None
X = [[i+1, _, _, _] for i in range(10)]# + [['', '<eos>', _, _]] + [[i+1, _, _, _] for i in range(10)] + ['<eos>']
#y_mwe = ['I', 'b', 'b', 'I', 'b', 'B', 'I', 'O', 'O', 'O']
#y_mwe = ['I', 'I', 'I', 'I', 'o', 'B', 'B', 'O', 'O', 'O']
y_mwe = ['O', 'O', 'O', 'B', 'o', 'O', 'O', 'O', 'O', 'O']
y_sst = ['', '', '', '', '', 'v.social', '', 'n.act', '', '']

#y_mwe = ['O', 'B', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'B'] + ['I'] + ['O', 'B', 'I', 'O', 'O', 'O', 'O', 'O', 'O', 'B'] + ['<eos>']
#y_sst = ['', '', '', '', '', 'v.social', '', 'n.act', '', ''] + [''] + ['', '', '', '', '', 'v.social', '', 'n.act', '', ''] + ['<eos>']

combine_sst_mwe(X, y_sst, y_mwe)

(['', '', '', '', '', 'v.social', '', 'n.act', '', ''],
 ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'])