# Features Explored

## Hashtag Distribution

In [77]:
import pandas as pd
import os
import re
import numpy as np
from collections import Counter
import nltk
import math
import pickle
import gensim
from textblob import Sentence

delimiter = '\t'
dir_name = '/Users/nbarnaba/PycharmProjects/Keras_Samples/src'

In [78]:
def get_all_tweets_as_whole_text(df):
    texts = df[1].values
    whole_text = ''
    for each in texts:
        whole_text = whole_text + ' ' + each
    return whole_text

In [79]:
train_file_name = os.path.join(dir_name, '..', 'data', 'en_train', 'EI-reg-en_anger_train.txt')
df_anger = pd.read_csv(train_file_name, header=None, delimiter=delimiter)
anger_texts = get_all_tweets_as_whole_text(df_anger)
anger_hashtags = [each[0] for each in Counter(re.findall('#\w+', anger_texts)).most_common()]

In [80]:
train_file_name = os.path.join(dir_name, '..', 'data', 'en_train',  'EI-reg-en_fear_train.txt')
df_fear = pd.read_csv(train_file_name, header=None, delimiter=delimiter)
fear_texts = get_all_tweets_as_whole_text(df_fear)
fear_hashtags = [each[0] for each in Counter(re.findall('#\w+', fear_texts)).most_common()]

In [81]:
train_file_name = os.path.join(dir_name, '..', 'data', 'en_train',  'EI-reg-en_sadness_train.txt')
df_sadness = pd.read_csv(train_file_name, header=None, delimiter=delimiter)
sadness_texts = get_all_tweets_as_whole_text(df_sadness)
sadness_hashtags = [each[0] for each in Counter(re.findall('#\w+', sadness_texts)).most_common()]

In [82]:
train_file_name = os.path.join(dir_name, '..', 'data', 'en_train',  'EI-reg-en_joy_train.txt')
df_joy = pd.read_csv(train_file_name, header=None, delimiter=delimiter)
joy_texts = get_all_tweets_as_whole_text(df_joy)
joy_hashtags = [each[0] for each in Counter(re.findall('#\w+', joy_texts)).most_common()]

In [83]:
hashtags = anger_hashtags + joy_hashtags + sadness_hashtags + fear_hashtags

In [84]:
unique_hashtags = [each[0] for each in Counter(hashtags).most_common() if each[1]==1]

In [85]:
anger_hashtags = [each for each in anger_hashtags if each in unique_hashtags]
fear_hashtags = [each for each in fear_hashtags if each in unique_hashtags]
joy_hashtags = [each for each in joy_hashtags if each in unique_hashtags]
sadness_hashtags = [each for each in sadness_hashtags if each in unique_hashtags]
print('Anger : '+str(len(anger_hashtags)))
print('fear : '+str(len(fear_hashtags)))
print('joy : '+str(len(joy_hashtags)))
print('sadness : '+str(len(sadness_hashtags)))
print('unique : '+str(len(unique_hashtags)))
print('total : '+str(len(hashtags)))

Anger : 483
fear : 963
joy : 529
sadness : 582
unique : 2557
total : 3363


## Unicode Emo Hashtag

In [86]:
file_name = os.path.join(dir_name, '..','resources', 'NRC-Hashtag-Emotion-Lexicon-v0.2', 'NRC-Hashtag-Emotion-Lexicon-v0.2.txt')
df_emo_hashtag = pd.read_csv(file_name, header=None, delimiter=delimiter)
df_emo_hashtag = df_emo_hashtag[(df_emo_hashtag[0] == 'sadness') |
                               (df_emo_hashtag[0] == 'anger') | 
                               (df_emo_hashtag[0] == 'fear') | 
                               (df_emo_hashtag[0] == 'joy')]
print(df_emo_hashtag[(df_emo_hashtag[0] == 'anger') & (df_emo_hashtag[1] == 'pissed')][2].values[0])
print(df_emo_hashtag)

1.1371909298
          0                 1         2
3908   fear         #westbank  1.951954
3909   fear     #apprehension  1.951954
3910   fear            #su4mh  1.951954
3911   fear          aaaaaaah  1.951954
3912   fear              #ied  1.951954
3913   fear        #coldsweat  1.951954
3914   fear             #isaf  1.951954
3915   fear           cryotek  1.951954
3916   fear          #rushing  1.951954
3917   fear              #shy  1.951954
3918   fear     #apprehensive  1.951954
3919   fear            #mosul  1.951954
3920   fear          #fearful  1.951954
3921   fear           #ashdod  1.951954
3922   fear    #socialanxiety  1.951954
3923   fear        #backtrack  1.951954
3924   fear         terrifies  1.951954
3925   fear   #claustrophobia  1.951954
3926   fear            qassam  1.951954
3927   fear        #hezbollah  1.951954
3928   fear          #hamas25  1.951954
3929   fear              #dfw  1.951954
3930   fear           #feared  1.951954
3931   fear          #talib

In [87]:
emo_hashtag = df_emo_hashtag.as_matrix()
emo_hashtag

array([['fear', '#westbank', 1.95195360556393],
       ['fear', '#apprehension', 1.95195360556393],
       ['fear', '#su4mh', 1.95195360556393],
       ..., 
       ['joy', 'doctor', 0.0020351827030041197],
       ['joy', 'grad', 0.00113671004597709],
       ['joy', '1000', 0.00113671004597709]], dtype=object)

In [88]:
def filter_emo_hashtag(emotion, hashtag):
    temp = None
    for each in emo_hashtag: 
        if each[0] == emotion and (each[1] == hashtag):
            if temp is None:
                temp = each
            else:
                temp = np.concatenate((temp, each), axis=0)
    return temp
filter_emo_hashtag('joy', 'grad')

array(['joy', 'grad', 0.00113671004597709], dtype=object)

In [89]:
def get_hashtags(tweet_content):
    return re.findall('#[a-zA-Z]+', tweet_content)

In [90]:
def get_hashtag_emo_value(hashtag, emotion):
    emo_value = filter_emo_hashtag(emotion, hashtag)
    return emo_value[2] if emo_value is not None else 0

In [91]:
def get_emot_value_from_hashtag(tweet_content, emotion):
    hashtags = get_hashtags(tweet_content)
    if len(hashtags) > 0:
        _t_emo = []
        for each_hashtag in hashtags:
            _t_emo.append(get_hashtag_emo_value(each_hashtag, emotion))
    else:
        _t_emo = [0]
    return np.mean(_t_emo)

## Unicode Emoticon Lexicon

In [92]:
import string
translation_table = dict.fromkeys(map(ord, string.punctuation), None)

In [93]:
file_name = os.path.join(dir_name, '..','resources', 'NRC-Emoticon-Lexicon-v1.0', 'Emoticon-unigrams.txt')
df_emo_words = pd.read_csv(file_name, header=None, delimiter=delimiter)
df_emo_words[0] = df_emo_words[0].str.lower().str.translate(translation_table)
emo_words = df_emo_words.as_matrix() 
print(emo_words)

[['jeffreydonovan' 5.0 6 0]
 ['familar' 5.0 6 0]
 ['vppatel2011' 5.0 6 0]
 ..., 
 ['clarianne' -4.999 0 5]
 ['scrambling' -4.999 0 8]
 ['ballsed' -4.999 0 6]]


In [94]:
def filter_emo_word(word):
    temp = None
    for each in emo_words: 
        if each[0] == word:
            if temp is None:
                temp = each
            else:
                temp = np.concatenate((temp, each), axis=0)
    return temp
print (filter_emo_word('ballsed')[1])

-4.999


In [135]:
def get_tokenized_ndarray_from_df(df):
    translation_table = dict.fromkeys(map(ord, string.punctuation), None)
    return df[1].str.lower().str.translate(translation_table).str.split().values
get_tokenized_ndarray_from_df(df_anger)

array([ list(['how', 'the', 'fuk', 'who', 'the', 'heck', 'moved', 'my', 'fridge', 'should', 'i', 'knock', 'the', 'landlord', 'door', 'angry', 'mad']),
       list(['so', 'my', 'indian', 'uber', 'driver', 'just', 'called', 'someone', 'the', 'n', 'word', 'if', 'i', 'wasnt', 'in', 'a', 'moving', 'vehicle', 'id', 'have', 'jumped', 'out', 'disgusted']),
       list(['dpduk', 'i', 'asked', 'for', 'my', 'parcel', 'to', 'be', 'delivered', 'to', 'a', 'pick', 'up', 'store', 'not', 'my', 'address', 'fuming', 'poorcustomerservice']),
       ...,
       list(['id', 'love', '2', 'c', 'gyimah', 'in', 'action', 'but', 'his', 'coach', 'is', 'holding', 'a', 'grudge', 'against', 'him']),
       list(['forgiving', 'means', 'operating', 'with', 'gods', 'spirit', 'amp', 'god', 'isnt', 'unforgiving', 'amp', 'doesnt', 'hold', 'a', 'grudge', 'so', 'who', 'am', 'i', 'exactly']),
       list(['ive', 'got', 'a', 'lot', 'of', 'tokens', 'saved', 'up', 'and', 'i', 'wanna', 'spam', 'the', 'event', 'song', 'but', 'my'

In [134]:
def get_emoticon_lexicon_value(df):
    cleaned_tokenized = get_tokenized_ndarray_from_df(df)
    emoticon_lexicon_value = []
    for each_tweet in cleaned_tokenized:
        emoti_word_count = 0
        emoti_value = 0
        for each_word in each_tweet:

            tmp = filter_emo_word(each_word)
            if tmp is not None:
                emoti_value += tmp[1]
                emoti_word_count += 1
        emoti_value = (emoti_value)/(emoti_word_count+1)
        emoticon_lexicon_value.append(emoti_value)
    return emoticon_lexicon_value

## Phrase Vector (by averaging the constituent word vectors)

In [97]:
word_model = None

In [98]:
def load_word_vectors():
    global word_model
    global dir_name
#     embedding_file_loc = os.path.join(dir_name, '..', 'resources', 'GoogleNews-vectors-negative300.bin')
    embedding_file_loc = os.path.join(dir_name, '..', 'resources', 'wiki.en', 'wiki.en.vec')
    print("Loading the data file... Please wait...")
    word_model = gensim.models.KeyedVectors.load_word2vec_format(embedding_file_loc, binary=False)
    print("Successfully loaded 3.6 G bin file!")
    return word_model

In [99]:
def get_phrase_vector_obj(value):
    return PhraseVector(value)

In [100]:
class PhraseVector:
    def __init__(self, phrase):
        self.phrase = phrase
        self.vector = self.phrase_to_vec(phrase)
        self.pos_tag = self.get_words_in_phrase(phrase)

    @staticmethod
    def convert_vector_set_to_average(vector_set, ignore=[]):
        if len(ignore) == 0:
            return np.mean(vector_set, axis=0)
        else:
            return np.dot(np.transpose(vector_set), ignore) / sum(ignore)

    @staticmethod
    def get_unique_token_tags(vector1, vector2):
        tag_list = []
        for each_tag in vector1.pos_tag + vector2.pos_tag:
            if each_tag not in tag_list:
                tag_list.append(each_tag)
        return tag_list

    def phrase_to_vec(self, phrase):
        # _stop_words = stopwords.words("english")
        phrase = phrase.lower()
        verified_words = [word for word in phrase.split()]
        vector_set = []
        for each_word in verified_words:
            try:
                word_vector = word_model[each_word]
                vector_set.append(word_vector)
            except:
                pass
        return self.convert_vector_set_to_average(vector_set)

    def get_cosine_similarity(self, other_vector):
        cosine_similarity = np.dot(self.vector, other_vector.vector) / (
        np.linalg.norm(self.vector) * np.linalg.norm(other_vector.vector))
        try:
            if math.isnan(cosine_similarity):
                cosine_similarity = 0
        except:
            cosine_similarity = 0
        return cosine_similarity

    def get_words_in_phrase(self, phrase):
        if phrase.strip() == '':
            return []
        else:
            tagged_input = nltk.pos_tag(phrase.split(), tagset='universal')
            prev_item, prev_tag = tagged_input[0]
            g_item_list = [prev_item]
            cur_group_index = 0
            space = ' '
            revised_tag = []
            for cur_item, cur_tag in tagged_input[1:]:
                cur_item = cur_item.lower()
                if prev_tag is cur_tag:
                    g_item_list[cur_group_index] += space + cur_item
                else:
                    revised_tag.append((g_item_list[cur_group_index], prev_tag))
                    prev_tag = cur_tag
                    g_item_list.append(cur_item)
                    cur_group_index += 1
            revised_tag.append((g_item_list[cur_group_index], prev_tag))
            return revised_tag

In [101]:
import pickle
word_model = load_word_vectors()
print(word_model)

Loading the data file... Please wait...
Successfully loaded 3.6 G bin file!
<gensim.models.keyedvectors.KeyedVectors object at 0x12805d588>


In [136]:
def get_phrase_vectors(df, emotion):
    tweet_vectors_obj = None
    tweet_vectors = None
    labels = None

    tweet_vectors_obj = np.vectorize(get_phrase_vector_obj)(df[1].values)
    tweet_vectors = np.array([[]])
    for each_vector in tweet_vectors_obj:
        curr_vector = each_vector.vector
        if np.isnan(curr_vector).any():
            curr_vector = np.zeros(shape=(1, 300))
        else:
            curr_vector = curr_vector.reshape(1, len(each_vector.vector))
        if np.min(tweet_vectors.shape) == 0:
            tweet_vectors = np.concatenate((tweet_vectors, curr_vector), axis=1)
        else:
            tweet_vectors = np.concatenate((tweet_vectors, curr_vector), axis=0)
    labels = df[3].values

    return tweet_vectors, labels

## Polarity and Subjectivity using Textblob

In [137]:
def get_polarity_and_subjectivity(df, emotion):
    polarity_list = []
    subjectivity_list = []

    polarity_list = np.array(list(map(lambda x: Sentence(x).polarity, df[1].values)))
    subjectivity_list = np.array(list(map(lambda x: Sentence(x).subjectivity, df[1].values)))

    polarity_list = polarity_list.reshape(len(polarity_list),1)
    subjectivity_list = subjectivity_list.reshape(len(subjectivity_list),1)
    return polarity_list, subjectivity_list

## All Features Concatenated

In [138]:
def get_features(df, emotion):
    print('inside get_features...')
    polarity_list, subjectivity_list = get_polarity_and_subjectivity(df, emotion)
    tweet_vectors, labels = get_phrase_vectors(df, emotion)
    emoticon_lexicon_value = get_emoticon_lexicon_value(df)
    emoticon_lexicon_value = np.array(emoticon_lexicon_value).reshape(len(emoticon_lexicon_value),1)
    hashtag_feature_nrc_hashtag_emoticon = np.array([get_emot_value_from_hashtag(each, emotion) 
                                                       for each in df.as_matrix()[:,1]]).reshape(len(df), 1)
    return np.concatenate((polarity_list, 
                           subjectivity_list, 
                           tweet_vectors, 
                           emoticon_lexicon_value,
                           hashtag_feature_nrc_hashtag_emoticon), axis=1)
    

## Recurrent Neural Network

In [139]:
from numpy import asarray
from numpy import zeros
from keras.models import Sequential
from keras.layers import *
from keras.constraints import min_max_norm
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import sys
import os
import csv

max_length = -1

In [140]:
def get_embedding_parameters(features):
    vocab_size, embedded_vector_length = features.shape
    max_length = vocab_size
    embedding_matrix = features
    return vocab_size, embedded_vector_length, max_length, embedding_matrix

In [141]:
def get_embedding_model(vocab_size, embedded_vector_length, embedding_matrix, max_length, 
                  optimizer='adam',
                  loss='mean_squared_logarithmic_error'):
    model = Sequential()
    e = Embedding(vocab_size, embedded_vector_length, weights=[embedding_matrix], input_length=max_length, trainable=False)
    model.add(e)
    lstm = Bidirectional(LSTM(300, activation='relu',
                              kernel_initializer='random_uniform',
                              bias_initializer='zeros',
                              kernel_constraint=min_max_norm(min_value=-1.0, max_value=1.0, rate=0.5, axis=0),
                              bias_constraint=min_max_norm(min_value=-1.0, max_value=1.0, rate=0.5, axis=0)))
    model.add(lstm)
    model.add(Dense(300, activation='relu'))
#     # compile the model
#     model.compile(optimizer=optimizer, loss=loss)
#     # summarize the model
    print(model.summary())
    return model

In [142]:
def get_dense_model(vocab_size, embedded_vector_length, embedding_matrix, max_length, 
                  optimizer='adam',
                  loss='mean_squared_logarithmic_error'):
    model = Sequential()
    model.add(Dense(300, 
                    activation='relu', 
                    input_shape=(embedded_vector_length,)))
#     # compile the model
#     model.compile(optimizer=optimizer, loss=loss)
#     # summarize the model
    print(model.summary())
    return model

In [143]:
def get_rnn_model(embedding_models,
                  optimizer='adam',
                  loss='mean_squared_logarithmic_error',
                  output_activation='relu'):
    model = Sequential()
    model.add(Merge(embedding_models, mode='concat', concat_axis=1))
    model.add(Dense(1, activation=output_activation,
                    kernel_initializer='random_uniform',
                    bias_initializer='zeros',
                    kernel_constraint=min_max_norm(min_value=-1.0, max_value=1.0, rate=.5, axis=0),
                    bias_constraint=min_max_norm(min_value=-1.0, max_value=1.0, rate=0.5, axis=0)))
    # compile the model
    model.compile(optimizer=optimizer, loss=loss)
    # summarize the model
    print(model.summary())
    return model
    # fit the model

In [144]:
def get_embedding_index(embedding_file_name):
    # load the whole embedding into memory
    embeddings_index = dict()
    # f = open('../resources/glove.6B/glove.6B.'+str(embedded_vector_length)+'d.txt')
    with open(embedding_file_name) as f:
        l_no = 0
        for line in f:
            if l_no == 0:
                l_no += 1
                continue
            values = line.split()
            word = values[0]
            try:
                coefs = asarray(values[1:], dtype='float32')
            except ValueError as e:
                coefs = asarray(values[-embedded_vector_length:], dtype='float32')
            if word not in embeddings_index.keys():
                embeddings_index[word] = coefs
    return  embeddings_index

In [145]:
def write_to_file(tweet_ids, assgn_emotions, tweet_contents, predicted_scores, file_name):
    dir_name = '/Users/nbarnaba/PycharmProjects/Keras_Samples/src'
    if not os.path.exists(os.path.join(dir_name, '..', 'output')):
        os.makedirs(os.path.join(dir_name, '..', 'output'))
    with open(os.path.join(dir_name, '..', 'output', file_name), 'w') as f:
        file_writer = csv.writer(f, delimiter='\t')
        for each_tweet_id, each_tweet_content, each_emotion, each_score in \
                zip(tweet_ids, tweet_contents, assgn_emotions, predicted_scores):
            file_writer.writerow([each_tweet_id, each_tweet_content, each_emotion, each_score])

In [146]:
def build_vocab(df, embeddings_index=None):
    cleaned_tokenized = get_tokenized_ndarray_from_df(df)
    unique_tokens = set()
    for each_tweet in cleaned_tokenized:
        filtered_set = []
        if embeddings_index is None:
            filtered_set = each_tweet
        else:
            for each_word in each_tweet:
                if each_word in embeddings_index.keys():
                    filtered_set.append(each_word)
        unique_tokens = unique_tokens.union(set(filtered_set))
    unique_tokens = list(unique_tokens)
    vocab = {each_word : each_index+1 for each_index, each_word in enumerate(unique_tokens)}
    vocab['<unk>' ] = 0
    return vocab

In [147]:
def build_embedding_matrix(vocab, embeddings_index, embedded_vector_length=300):
    vocab_size = len(vocab)
    embedding_matrix = zeros((vocab_size, embedded_vector_length))
    for word in vocab.keys():
        if word in embeddings_index.keys():
            embedding_matrix[vocab[word]] = embeddings_index[word]
    return embedding_matrix

In [148]:
def get_encode_docs(df, vocab):
    cleaned_tokenized = get_tokenized_ndarray_from_df(df)
    encoded_docs = []
    max_length = -1
    for each_tweet in cleaned_tokenized:
        encoded_tokenized_tweet = []
        for each_word in each_tweet:
            if each_word in vocab.keys():
                encoded_tokenized_tweet.append(vocab[each_word])
            else:
                encoded_tokenized_tweet.append(0)
        current_length = len(encoded_tokenized_tweet)
        if current_length > max_length:
            max_length = current_length
        encoded_docs.append(encoded_tokenized_tweet)
    return encoded_docs, max_length

In [206]:
def get_padded_docs(df, vocab, encoded_size=None):
    encoded_docs, max_length = get_encode_docs(df, vocab)
    padded_docs = np.array([[]])
    if encoded_size is not None:
        max_length = encoded_size
    for each_encoded_doc in encoded_docs:
        each_encoded_size = len(each_encoded_doc)
        each_encoded_doc = np.array(each_encoded_doc)
        if max_length >= each_encoded_size: 
            each_encoded_doc = np.array([np.pad(each_encoded_doc, (0, max_length-each_encoded_size), 'constant')])
        else:
            each_encoded_doc = np.array([each_encoded_doc[:max_length]])
        if padded_docs.size == 0:
            padded_docs = np.concatenate((padded_docs, each_encoded_doc), axis=1)
        else:
            padded_docs = np.concatenate((padded_docs, each_encoded_doc), axis=0)
    return padded_docs, max_length

In [None]:
dir_name = '/Users/nbarnaba/PycharmProjects/Keras_Samples/src'
char_level = False
docs = []
labels = []
tweet_ids = []
emotions = []

embedded_vector_length = 300
emotion_names = ['sadness', 'joy', 'anger', 'fear']
embedding_name = 'wiki.en'
embeddings_index = get_embedding_index(os.path.join(dir_name,
                                                    '..',
                                                    'resources',
                                                    embedding_name,
                                                    'wiki.en.vec'))
                                                    # 'glove.6B.' + str(embedded_vector_length) + 'd.txt'))

for emot_id, emotion in enumerate(emotion_names):
    training_file_name = os.path.join(dir_name, '..','data','en_train','EI-reg-en_'+emotion+'_train.txt')
    
    df = pd.read_csv(training_file_name, header=None, delimiter=delimiter)
    tweet_ids_train, docs_train, emotions_train, label_train = [df[each].values for each in range(4)]

    print('Loaded %s word vectors.' % len(embeddings_index))
    vocab = build_vocab(df)
    print('Built vocabulary...')
    padded_docs_train, em_max_length = get_padded_docs(df, vocab)
    print('Padded docs for training is created...')
    vocab_size = len(vocab)
    embedding_matrix = build_embedding_matrix(vocab, embeddings_index)
    print('Loaded embedding matrix...')
    # define model with word embeddings
    _, embedded_vector_length = embedding_matrix.shape
    word_model = get_embedding_model(vocab_size, embedded_vector_length, embedding_matrix, em_max_length)
    # Added features
    current_feature = get_features(df, emotion)
    print('extra features loaded...')
    print('loaded word model...')
    vocab_size, embedded_vector_length, max_length, embedding_matrix = get_embedding_parameters(current_feature)

    phrase_vector_model = get_dense_model(vocab_size, embedded_vector_length, embedding_matrix, max_length)
    print('loaded phrase model...')
    model=get_rnn_model([word_model, phrase_vector_model])
    
    print(padded_docs_train.shape)
    print(current_feature.shape)
    
    model.fit([padded_docs_train, current_feature], label_train, verbose=1)
    
    print('model has been fit...')
    # dev set
    dev_file_name=os.path.join(dir_name, '..', 'data', 'en_dev', '2018-EI-reg-En-' + emotion + '-dev.txt')
    df = pd.read_csv(dev_file_name, header=None, delimiter=delimiter)
    tweet_ids, docs, emotions, labels = [df[each].values for each in range(4)]
    padded_docs_dev, _ = get_padded_docs(df, vocab, encoded_size = em_max_length)
    dev_features = get_features(df, emotion)
    
    predicted_list = model.predict([padded_docs_dev, dev_features])
    write_to_file(tweet_ids, emotions, docs, labels, emotion + '_' + embedding_name + '_dev_labels')
    predicted_list = [each[0] for each in predicted_list]
    write_to_file(tweet_ids, emotions, docs, predicted_list, emotion+'_'+embedding_name+'_dev')

    print('Mean Squared Error of Validation Set: '+str(mean_squared_error(labels, predicted_list)))

Loaded 2518843 word vectors.
Built vocabulary...
Padded docs for training is created...
Loaded embedding matrix...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_15 (Embedding)     (None, 33, 300)           1811100   
_________________________________________________________________
bidirectional_15 (Bidirectio (None, 600)               1442400   
_________________________________________________________________
dense_36 (Dense)             (None, 300)               180300    
Total params: 3,433,800
Trainable params: 1,622,700
Non-trainable params: 1,811,100
_________________________________________________________________
None
inside get_features...


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [205]:
embedding_matrix.shape

(5834, 300)