# Deep Learning for NLP - Project

Done with Python 3.6

In [21]:
%load_ext autoreload
%autoreload 2
import io
import os
import numpy as np
import scipy
import utils

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [22]:
PATH_TO_DATA = "data/"
SST_DIR = os.path.join(PATH_TO_DATA,'SST')

# 1) Monolingual (English) word embeddings 

In [7]:
class Word2vec():
    def __init__(self, fname, nmax=100000):
        self.load_wordvec(fname, nmax)
        self.word2id = dict.fromkeys(self.word2vec.keys())
        self.id2word = {v: k for k, v in self.word2id.items()}
        self.embeddings = np.array(self.word2vec.values())
    
    def load_wordvec(self, fname, nmax):
        self.word2vec = {}
        with io.open(fname, encoding='utf-8') as f:
            next(f)
            for i, line in enumerate(f):
                word, vec = line.split(' ', 1)
                self.word2vec[word] = np.fromstring(vec, sep=' ')
                if i == (nmax - 1):
                    break
        self.words = np.asarray([*self.word2vec.keys()])
        self.size = len(self.word2vec[self.words[0]])
        print('Loaded %s pretrained word vectors' % (len(self.word2vec)))

    def most_similar(self, w, K=5):
        # K most similar words: self.score  -  np.argsort
        list_score = [self.score(w, w_dico) for w_dico in self.word2vec]
        if np.max(list_score) == 0:
            return []
        
        index_best_words = np.argsort(list_score)[::-1][:K]
        return self.words[index_best_words].tolist()

    def score(self, w1, w2):
        # cosine similarity: np.dot  -  np.linalg.norm
        if not w1 in self.word2vec or not w2 in self.word2vec:
            return 0
        emb1 = self.word2vec[w1]
        emb2 = self.word2vec[w2]
        return emb1.dot(emb2.T)/(np.linalg.norm(emb1)*np.linalg.norm(emb2))


In [36]:
w2v = Word2vec(os.path.join(PATH_TO_DATA, 'crawl-300d-200k.vec'), nmax=25000)

# You will be evaluated on the output of the following:
print("\n\nA few scores...")
for w1, w2 in zip(('cat', 'dog', 'dogs', 'paris', 'germany'), ('dog', 'pet', 'cats', 'france', 'berlin')):
    print(w1, w2, w2v.score(w1, w2))

print("\n\nThe most similar words...")
for w1 in ['cat', 'dog', 'dogs', 'paris', 'germany']:
    best_matches = w2v.most_similar(w1)
    print(f"{w1}: {', '.join(best_matches)}")

Loaded 25000 pretrained word vectors


A few scores...
cat dog 0.671683666279249
dog pet 0.6842064029669219
dogs cats 0.7074389328052404
paris france 0
germany berlin 0


The most similar words...
cat: cat, cats, kitty, kitten, Cat
dog: dog, dogs, puppy, Dog, canine
dogs: dogs, dog, Dogs, puppies, cats
paris: 
germany: 


In [65]:
class BoV():
    def __init__(self, w2v):
        self.w2v = w2v
    
    def encode(self, sentences, idf=False):
        # takes a list of sentences, outputs a numpy array of sentence embeddings
        sentemb = []
        for sent in sentences:
            if idf is False:
                # mean of word vectors
                embeddings = [self.w2v.word2vec[w] for w in sent if w in w2v.word2vec]
                if len(embeddings) == 0: #if any word in the sentence is in our lookup table
                    sentemb.append(np.zeros(self.w2v.size))
                else:
                    sentemb.append(np.mean(embeddings, axis=0))
            else:
                # idf-weighted mean of word vectors
                embeddings = [idf[w]*self.w2v.word2vec[w] for w in sent if w in w2v.word2vec and w in idf]
                if len(embeddings) == 0: #if any word in the sentence is in our lookup table
                    sentemb.append(np.zeros(self.w2v.size))
                else:
                    sentemb.append(np.mean(embeddings, axis=0))
        return np.vstack(sentemb)

    def most_similar(self, s, sentences, idf=False, K=5):
        # get most similar sentences and **print** them
        list_score = [self.score(s, sentence, idf) for sentence in sentences]
        index_best_sent = (np.argsort(list_score)[::-1][:K])
        if max(list_score) == 0:
            print(f"Oops... The words are unkwnown in the sentence {s}!")
            return []
        
        all_sent = []
        print(f'Best {K} matches of "{s}"')
        for i, idx in enumerate(index_best_sent):
            print(f'{i}: {sentences[idx]}')
            all_sent.append(sentences[idx])
        return all_sent
    

    def score(self, s1, s2, idf=False):
        # cosine similarity: use   np.dot  and  np.linalg.norm
        emb1, emb2 = self.encode([s1, s2], idf)
        if np.linalg.norm(emb1) == 0 or np.linalg.norm(emb2) == 0:
             return 0
        return emb1.dot(emb2.T)/(np.linalg.norm(emb1)*np.linalg.norm(emb2))
        
                                           
    def build_idf(self, sentences):
        # build the idf dictionary: associate each word to its idf value
        idf = {}
        for sent in sentences:
            for w in set(sent):
                idf[w] = idf.get(w, 0) + 1        
        for w in idf:
            idf[w] = max(1, np.log10(len(sentences) / (idf[w])))
        return idf

In [64]:
w2v = Word2vec(os.path.join(PATH_TO_DATA, 'crawl-300d-200k.vec'), nmax=25000)
s2v = BoV(w2v)

# Load sentences

with open(os.path.join(PATH_TO_DATA, 'sentences.txt')) as f:
    sentences = f.readlines()

# # You will be evaluated on the output of the following:
s2v.most_similar('' if not sentences else sentences[10], sentences)  # BoV-mean
score = s2v.score('' if not sentences else sentences[7], '' if not sentences else sentences[13])
print(f"Score without IDF is {score}")

# # Build idf scores for each word
idf = s2v.build_idf(sentences) 
s2v.most_similar('' if not sentences else sentences[10], sentences, idf)  # BoV-idf
score = s2v.score('' if not sentences else sentences[7], '' if not sentences else sentences[13], idf)
print(f"Score with IDF is {score}")

Loaded 25000 pretrained word vectors
Best 5 matches of "1 smiling african american boy . 
"
0: 1 smiling african american boy . 

1: an african american male is singing into a microphone . 

2: an african american man is jumping in the air , while a boy claps . 

3: a man in brown is helping a man in red climb a rock . 

4: a smiling woman carrying her baby in a sling . 

Score without IDF is 0.9564367613713493
Best 5 matches of "1 smiling african american boy . 
"
0: 1 smiling african american boy . 

1: an african american man is jumping in the air , while a boy claps . 

2: an african american male is singing into a microphone . 

3: a man rock climbing in a forest . 

4: a man in brown is helping a man in red climb a rock . 

Score with IDF is 0.9562052331058694


# 2) Multilingual (English-French) word embeddings

Let's consider a bilingual dictionary of size V_a (e.g French-English).

Let's define **X** and **Y** the **French** and **English** matrices.

They contain the embeddings associated to the words in the bilingual dictionary.

We want to find a **mapping W** that will project the source word space (e.g French) to the target word space (e.g English).

Procrustes : **W\* = argmin || W.X - Y ||  s.t  W^T.W = Id**
has a closed form solution:
**W = U.V^T  where  U.Sig.V^T = SVD(Y.X^T)**

In what follows, you are asked to: 

In [31]:
# 1 - Download and load 50k first vectors of
#     https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.en.vec
#     https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.fr.vec

# TYPE CODE HERE
try:
    from urllib import urlretrieve
except ImportError:
    from urllib.request import urlretrieve

files = ["wiki.en.vec", "wiki.fr.vec"]
urls = ["https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.en.vec",
       "https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.fr.vec"]

for f, u in zip(files, urls):
    if not os.path.isfile(os.path.join(PATH_TO_DATA, f)):
        print("Downloading", f)
        urlretrieve(u, os.path.join(PATH_TO_DATA, f))

w2v_eng = Word2vec(os.path.join(PATH_TO_DATA, 'wiki.en.vec'), nmax=50000)
w2v_fr = Word2vec(os.path.join(PATH_TO_DATA, 'wiki.fr.vec'), nmax=50000)

Loaded 50000 pretrained word vectors


StopIteration: 

In [None]:
# 2 - Get words that appear in both vocabs (= identical character strings)
#     Use it to create the matrix X and Y (of aligned embeddings for these words)

common = [w for w in w2v_eng.word2vec if w in w2v_fr.word2vec]
X = np.vstack([w2v_eng.word2vec[w] for w in common])
Y = np.vstack([w2v_fr.word2vec[w] for w in common])

In [None]:
# 3 - Solve the Procrustes using the scipy package and: scipy.linalg.svd() and get the optimal W
#     Now W*French_vector is in the same space as English_vector

# TYPE CODE HERE
X = X[:100]
Y = Y[:100]
U, s, Vh = scipy.linalg.svd(Y.dot(X.T))
W = U.dot(Vh)

In [None]:
# 4 - After alignment with W, give examples of English nearest neighbors of some French words (and vice versa)
#     You will be evaluated on that part and the code above

# TYPE CODE HERE
import scipy.linalg as la
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
%matplotlib inline  

class BilingualWord2Vec:
    #Class that allows to compute and visualize the alignement from one class to another one
    def __init__(self, w2v_1,w2v_2):
        self.w2v_lang_1 = w2v_1
        self.w2v_lang_2 = w2v_2
        
    def build_alignement(self,max_size):
        #Solve the Procrustes probelm from language 1 to language 2 and give the optimal alinement matrix
        words_in_both_vocab = [w for w in self.w2v_lang_1.word2vec if w in self.w2v_lang_2.word2vec]
        words_in_both_vocab = words_in_both_vocab[:max_size]
        X = np.vstack([self.w2v_lang_1.word2vec[w] for w in words_in_both_vocab]).T
        Y = np.vstack([self.w2v_lang_2.word2vec[w] for w in words_in_both_vocab]).T
        
        U, s, Vh = la.svd(Y.dot(X.T))
        return U.dot(Vh)
    
    def mostsimilarother_lang(self,word,W_alignement,K=5,lang_input = 1,lang_output=2):
        #word : input word
        #W_alignement : alignement from input word to output
        #K: number of nearest neightbor
        #lang_input: input language
        #lang_output: output language (similar words language)
        if lang_input == 1: #settings in order to let the choice of input and output language
            word2vec_1 = self.w2v_lang_1.word2vec
        else:
            word2vec_1 = self.w2v_lang_2.word2vec
        
        if lang_output == 1:  #settings in order to let the choice of input and output language
            word2vec_2 = self.w2v_lang_1.word2vec
        else:
            word2vec_2 = self.w2v_lang_2.word2vec
            
        query = W_alignement.dot(word2vec_1[word]) if word in word2vec_1 else 0 #projection in output space
        keys = word2vec_2.values()
        
        list_score = [self.score(query,key) for key in keys]
        index_best_words = (np.argsort(list_score)[::-1][:K])
        
        return np.array(word2vec_2.keys())[index_best_words] #return K most similar words

    def score(self, emb1, emb2):
        # cosine similarity: np.dot  -  np.linalg.norm
        if ((np.linalg.norm(emb1)!=0) and ((np.linalg.norm(emb2)!=0))):
            return emb1.dot(emb2.T)/(np.linalg.norm(emb1)*np.linalg.norm(emb2))
        else:
            return 0
        
    def plotsimilarwordother_lang(self,word,W_alignement,pca,K=5,lang_input = 1,lang_output=2):
        #This function plots the word given as input in the output space embedding given the alignement matrix
        #It is inspire from the TP1
        #word : input word
        #W_alignement : alignement from input word to output
        #K: number of nearest neightbor
        #lang_input: input language
        #lang_output: output language (similar words language)
        if lang_input == 1:  #settings in order to let the choice of input and output language
            word2vec_1 = self.w2v_lang_1.word2vec
        else:
            word2vec_1 = self.w2v_lang_2.word2vec
        
        if lang_output == 1:  #settings in order to let the choice of input and output language
            word2vec_2 = self.w2v_lang_1.word2vec
        else:
            word2vec_2 = self.w2v_lang_2.word2vec
            
            
        arr = np.empty((0,word2vec_2.values()[0].shape[0]), dtype='f')
        word_labels = [word]
        # get close words
        close_words = self.mostsimilarother_lang(word,W_alignement,K,lang_input,lang_output)
        # add the vector for each of the closest words to the array
        pos_word = W_alignement.dot(word2vec_1[word]) if word in word2vec_1 else np.zeros(word2vec_2.values()[0].shape)
        arr = np.append(arr, np.array([pos_word]), axis=0)
        for wrd_score in close_words:
            wrd_vector = word2vec_2[wrd_score]
            word_labels.append(wrd_score)
            arr = np.append(arr, np.array([wrd_vector]), axis=0)

        # find tsne coords for 2 dimensions
        np.set_printoptions(suppress=True)
        Y = pca.transform(arr)

        x_coords = Y[:, 0]
        y_coords = Y[:, 1]
        # display scatter plot
        plt.figure(figsize=(8, 6), dpi=80)
        plt.scatter(x_coords, y_coords)

        for label, x, y in zip(word_labels, x_coords, y_coords):
            plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points', fontsize=12)
        plt.xlim(x_coords.min()-0.2, x_coords.max()+0.2)
        plt.ylim(y_coords.min()-0.2, y_coords.max()+0.2)
        plt.title('Nearest neighbors visualization of the word "%s"' % word)

        plt.show()
        
biw2vec = BilingualWord2Vec(w2v_eng,w2v_fr)
W = biw2vec.build_alignement(19000) 
pca = PCA(n_components=2)
pca.fit(np.array(biw2vec.w2v_lang_2.word2vec.values()))
biw2vec.plotsimilarwordother_lang('cat',W,pca,K=10) #test with the english word cat
biw2vec.plotsimilarwordother_lang('chien',W.T,pca,K=10,lang_input=2,lang_output=1) #test with the french word cat

If you want to dive deeper on this subject: https://github.com/facebookresearch/MUSE

# 3) Sentence classification with BoV and scikit-learn

In [20]:
# 1 - Load train/dev/test of Stanford Sentiment TreeBank (SST)
#     (https://nlp.stanford.edu/~socherr/EMNLP2013_RNTN.pdf)

x_train,y_train = decode_with_labels(os.path.join(SST_DIR, 'stsa.fine.train'))
x_dev,y_dev = decode_with_labels(os.path.join(SST_DIR,'stsa.fine.dev'))
x_test = decode_without_labels(os.path.join(SST_DIR, 'stsa.fine.test.X'))

In [11]:
# 2 - Encode sentences with the BoV model above
w2v = Word2vec(os.path.join(PATH_TO_DATA, 'wiki.en.vec'), nmax=50000)
s2v = BoV(w2v)

idf = s2v.build_idf(x_train)

x_train_encode = s2v.encode(x_train)
x_dev_encode = s2v.encode(x_dev)
x_test_encode = s2v.encode(x_test)

x_train_encode_idf = s2v.encode(x_train,idf)
x_dev_encode_idf = s2v.encode(x_dev,idf)

FileNotFoundError: [Errno 2] No such file or directory: 'data/wiki.en.vec'

In [7]:
# 3 - Learn Logistic Regression on top of sentence embeddings using scikit-learn
#     (consider tuning the L2 regularization on the dev set)

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

lr = LogisticRegression(C= 0.7,max_iter=1000,tol=1e-10)
lr.fit(x_train_encode,y_train)
prediction_dev = lr.predict(x_dev_encode)
prediction_train = lr.predict(x_train_encode)

lr_idf = LogisticRegression(C= 0.5,max_iter=1000,tol=1e-10)
lr_idf.fit(x_train_encode_idf,y_train)
prediction_dev_idf = lr_idf.predict(x_dev_encode_idf)
prediction_train_idf = lr_idf.predict(x_train_encode_idf)


print('Precision sur le train set', accuracy_score(y_train,prediction_train))
print('Precision sur le dev set', accuracy_score(y_dev,prediction_dev))
print('Precision sur le train set idf', accuracy_score(y_train,prediction_train_idf))
print('Precision sur le dev set idf ', accuracy_score(y_dev,prediction_dev_idf))


In [8]:
# 4 - Produce 2210 predictions for the test set (in the same order). One line = one prediction (=0,1,2,3,4).
#     Attach the output file "logreg_bov_y_test_sst.txt" to your deliverable.
#     You will be evaluated on the results of the test set.


def write_submission(path_output,prediction):
    #function that write the submission
    lines = '\n'.join([str(p) for p in prediction])
    with open(path_output,'w') as f:
        f.writelines(lines)

    

prediction_test = lr.predict(x_test_encode)
write_submission(os.path.join('.',r'logreg_bov_y_test_sst.txt'),prediction_test)

In [9]:
# BONUS!
# 5 - Try to improve performance with another classifier
#     Attach the output file "XXX_bov_y_test_sst.txt" to your deliverable (where XXX = the name of the classifier)

 classifier)

from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

rf_classifier = RandomForestClassifier(n_estimators=100)
rf_classifier.fit(x_train_encode,y_train) #No extra regularization, bagging is enough in general
prediction_dev_rf = rf_classifier.predict(x_dev_encode)
prediction_train_rf = rf_classifier.predict(x_train_encode)


rf_classifier_idf = RandomForestClassifier(n_estimators=100)
rf_classifier_idf.fit(x_train_encode_idf,y_train) #No extra regularization, bagging is enough in general
prediction_dev_rf_idf = rf_classifier_idf.predict(x_dev_encode_idf)
prediction_train_rf_idf = rf_classifier_idf.predict(x_train_encode_idf)


print 'Random Forest Precision sur le train set', accuracy_score(y_train,prediction_train_rf)
print 'Random Forest Precision sur le dev set', accuracy_score(y_dev,prediction_dev_rf)

print 'Random Forest Precision sur le train set idf', accuracy_score(y_train,prediction_train_rf_idf)
print 'Random Forest Precision sur le dev set idf', accuracy_score(y_dev,prediction_dev_rf_idf)


xgb_classifier = XGBClassifier(max_depth=3,n_estimators=300,reg_lambda=1) #with early stopping based on val set
xgb_classifier.fit(x_train_encode,y_train,verbose=False,early_stopping_rounds=30,eval_metric='mlogloss',eval_set=[(x_train_encode,y_train),(x_dev_encode,y_dev)])
# TYPE CODE HERE
prediction_dev_xgb = xgb_classifier.predict(x_dev_encode)
prediction_train_xgb = xgb_classifier.predict(x_train_encode)

xgb_classifier_idf = XGBClassifier(max_depth=3,n_estimators=300,reg_lambda=1) #with early stopping based on val set
xgb_classifier_idf.fit(x_train_encode_idf,y_train,verbose=False,early_stopping_rounds=30,eval_metric='mlogloss',eval_set=[(x_train_encode,y_train),(x_dev_encode,y_dev)])
# TYPE CODE HERE
prediction_dev_xgb_idf = xgb_classifier_idf.predict(x_dev_encode_idf)
prediction_train_xgb_idf = xgb_classifier_idf.predict(x_train_encode_idf)


print 'XGBoost Precision sur le train set', accuracy_score(y_train,prediction_train_xgb)
print 'XGBoost  Precision sur le dev set', accuracy_score(y_dev,prediction_dev_xgb)

print 'XGBoost Precision sur le train set idf', accuracy_score(y_train,prediction_train_xgb_idf)
print 'XGBoost  Precision sur le dev set idf', accuracy_score(y_dev,prediction_dev_xgb_idf)



lgb_classifier = LGBMClassifier(max_depth=3,n_estimators=300,reg_lambda=1) #with early stopping based on val set
lgb_classifier.fit(x_train_encode,y_train,verbose=False,early_stopping_rounds=30,eval_metric='multi_logloss',eval_set=[(x_train_encode,y_train),(x_dev_encode,y_dev)])
# TYPE CODE HERE
prediction_dev_lgbm = lgb_classifier.predict(x_dev_encode)
prediction_train_lgbm = lgb_classifier.predict(x_train_encode)

lgb_classifier_idf = LGBMClassifier(max_depth=3,n_estimators=300,reg_lambda=1) #with early stopping based on val set
lgb_classifier_idf.fit(x_train_encode_idf,y_train,verbose=False,early_stopping_rounds=30,eval_metric='multi_logloss',eval_set=[(x_train_encode,y_train),(x_dev_encode,y_dev)])
# TYPE CODE HERE
prediction_dev_lgbm_idf = lgb_classifier_idf.predict(x_dev_encode_idf)
prediction_train_lgbm_idf = lgb_classifier_idf.predict(x_train_encode_idf)

print 'LGBM Precision sur le train set', accuracy_score(y_train,prediction_train_lgbm_idf)
print 'LGBM  Precision sur le dev set', accuracy_score(y_dev,prediction_dev_lgbm_idf)

print 'LGBM Precision sur le train set idf', accuracy_score(y_train,prediction_train_lgbm_idf)
print 'LGBM  Precision sur le dev set idf', accuracy_score(y_dev,prediction_dev_lgbm_idf)


# 4) Sentence classification with LSTMs in Keras

## 4.1 - Preprocessing

In [27]:
import keras
from keras.preprocessing.text import one_hot
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences

In [58]:
# 1 - Load train/dev/test sets of SST
x_train, y_train = decode_with_labels(os.path.join(SST_DIR, 'stsa.fine.train'))
x_dev, y_dev = decode_with_labels(os.path.join(SST_DIR,'stsa.fine.dev'))
x_test = decode_without_labels(os.path.join(SST_DIR, 'stsa.fine.test.X'))

In [61]:
# 2 - Transform text to integers using keras.preprocessing.text.one_hot function

list_words = set([w for sentences in x_train + x_dev + x_test  for w in sentences])
n_words = len(list_words)

X_train = [one_hot(' '.join(x), n_words) for x in x_train]
X_dev = [one_hot(' '.join(x), n_words) for x in x_dev]
X_test = [one_hot(' '.join(x), n_words) for x in x_test]

Y_train = to_categorical(y_train)
Y_dev = to_categorical(y_dev)
print("Y:", Y_train.shape)

AttributeError: 'list' object has no attribute 'shape'

**Padding input data**

Models in Keras (and elsewhere) take batches of sentences of the same length as input. It is because Deep Learning framework have been designed to handle well Tensors, which are particularly suited for fast computation on the GPU.

Since sentences have different sizes, we "pad" them. That is, we add dummy "padding" tokens so that they all have the same length.

The input to a Keras model thus has this size : (batchsize, maxseqlen) where maxseqlen is the maximum length of a sentence in the batch.

In [62]:
# 3 - Pad your sequences using keras.preprocessing.sequence.pad_sequences
#     https://keras.io/preprocessing/sequence/
max_len = np.max([len(line) for line in x_train + x_dev + x_test])

X_train = pad_sequences(X_train, maxlen=max_len)
X_dev = pad_sequences(X_dev, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)
print("X:", X_train.shape)

X: (8544, 56)


## 4.2 - Design and train your model

In [43]:
# 4 - Design your encoder + classifier using keras.layers
#     In Keras, Torch and other deep learning framework, we create a "container" which is the Sequential() module.
#     Then we add components to this contained : the lookuptable, the LSTM, the classifier etc.
#     All of these components are contained in the Sequential() and are trained together.

from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Activation

embed_dim  = 32  # word embedding dimension
nhid = 64  # number of hidden units in the LSTM
vocab_size = 0  # size of the vocabulary
n_classes  = 5

model = Sequential()
model.add(Embedding(vocab_size, embed_dim))
model.add(LSTM(nhid, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(n_classes, activation='sigmoid'))


In [44]:
# 5 - Define your loss/optimizer/metrics

loss_classif = 'categorical_crossentropy' # find the right loss for multi-class classification
optimizer = 'adam' # find the right optimizer
metrics_classif = ['accuracy']

# Observe how easy (but blackboxed) this is in Keras
model.compile(loss=loss_classif,
              optimizer=optimizer,
              metrics=metrics_classif)
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, None, 32)          0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 64)                24832     
_________________________________________________________________
dense_3 (Dense)              (None, 5)                 325       
Total params: 25,157
Trainable params: 25,157
Non-trainable params: 0
_________________________________________________________________
None


In [63]:
# 6 - Train your model and find the best hyperparameters for your dev set
#     you will be evaluated on the quality of your predictions on the test set

bs = 64
n_epochs = 6
history = model.fit(X_train, Y_train, batch_size=bs, epochs=n_epochs, validation_data=(X_dev, Y_dev))

AttributeError: 'Tensor' object has no attribute 'assign'

In [None]:
# 7 - Generate your predictions on the test set using model.predict(x_test)
#     https://keras.io/models/model/
#     Log your predictions in a file (one line = one integer: 0,1,2,3,4)
#     Attach the output file "logreg_lstm_y_test_sst.txt" to your deliverable.

# TYPE CODE HERE


## 4.3 -- innovate !

In [None]:
# 8 - Open question: find a model that is better on your dev set
#     (e.g: use a 1D ConvNet, use a better classifier, pretrain your lookup tables ..)
#     you will get point if the results on the test set are better: be careful of not overfitting your dev set too much..
#     Attach the output file "XXX_XXX_y_test_sst.txt" to your deliverable.

# TYPE CODE HERE
