### TensorFlow + Glove Twitter

Embedding:

In [137]:
from tensorflow import keras
from keras.preprocessing.text import Tokenizer
import pandas as pd

In [517]:
tweets = pd.read_csv('train.csv') 
tests = pd.read_csv('test.csv') 

In [518]:
tweets.drop_duplicates(subset = 'text', keep = False, inplace = True)
tweets.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7434 entries, 0 to 7612
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   id        7434 non-null   int64 
 1   keyword   7378 non-null   object
 2   location  4982 non-null   object
 3   text      7434 non-null   object
 4   target    7434 non-null   int64 
dtypes: int64(2), object(3)
memory usage: 348.5+ KB


In [519]:
t = Tokenizer()
t.fit_on_texts(tweets['text'])
vocab_size = len(t.word_index) + 1
print(vocab_size)

22586


In [142]:
# integer encode the documents
encoded_docs = t.texts_to_sequences(tweets['text'])
enconded_test = t.texts_to_sequences(tests['text'])

In [143]:
from keras.preprocessing.sequence import pad_sequences

max_length = 31 # Maxima cantidad de palabras en los tweets
padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post')
padded_tests = pad_sequences(enconded_test, maxlen=max_length, padding='post')
print(padded_docs)

[[ 116 4534   25 ...    0    0    0]
 [ 182   46  242 ...    0    0    0]
 [  40 1705 1572 ...    0    0    0]
 ...
 [ 106  225  453 ...    0    0    0]
 [ 121  837 1338 ...    0    0    0]
 [   4  201   53 ...    0    0    0]]


In [144]:
import numpy as np

embeddings_index = dict()
f = open('glove.twitter.27B.100d.txt') # Vectores entrenados de 100 dimensiones
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()
print('Loaded %s word vectors.' % len(embeddings_index))

Loaded 1193514 word vectors.


In [145]:
embedding_matrix = np.zeros((vocab_size, 100))
for word, i in t.word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None: # Si la palabra no esta queda llena de 0s
        embedding_matrix[i] = embedding_vector

### CNN

In [662]:
tweets_features.loc[:,'length':]

Unnamed: 0,length,avg_word_length,amount_of_words,amount_of_unique_words,sentiment,stopwords_count,punctuation_count,mentions_count,hashtags_count,longest_word_length_without_stopwords,stopword_word_ratio,adjectives_count,nouns_count,verbs_count,adverbs_count
0,69,4.384615,13,13,0.2732,6,1,0,1,7,0.461538,0,6,1,0
1,38,4.571429,7,7,-0.3400,0,1,0,0,6,0.000000,0,6,0,0
2,133,5.090909,22,20,-0.2960,11,3,0,0,10,0.500000,1,7,7,0
3,65,7.125000,8,8,0.0000,1,2,0,1,10,0.125000,1,4,1,0
4,88,4.500000,16,15,0.0000,7,2,0,2,6,0.437500,0,6,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7429,136,6.210526,19,19,-0.6841,6,12,0,1,10,0.315789,0,13,3,0
7430,114,3.423077,26,25,-0.4939,16,1,0,0,8,0.615385,2,4,5,3
7431,121,5.100000,20,18,-0.7650,1,11,0,0,8,0.050000,0,14,0,0
7432,83,6.636364,11,11,-0.4939,2,5,0,0,8,0.181818,2,6,1,0


In [20]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras import layers

model1 = Sequential()
e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=False)
model1.add(e)
model1.add(layers.Conv1D(256, 7, activation='relu'))
model1.add(Flatten())
model1.add(Dense(1, activation='sigmoid'))

model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

print(model1.summary())

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 31, 100)           2258600   
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 25, 256)           179456    
_________________________________________________________________
flatten_3 (Flatten)          (None, 6400)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 6401      
Total params: 2,444,457
Trainable params: 185,857
Non-trainable params: 2,258,600
_________________________________________________________________
None


In [21]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = \
train_test_split(padded_docs, tweets['target'], test_size = 0.25, random_state = 123)

In [22]:
from keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

model1.fit(x_train, y_train,
          validation_data=(x_test, y_test),
          batch_size=128,
          epochs=50,
          verbose=2,
          callbacks=callback)

Epoch 1/50
44/44 - 1s - loss: 0.5262 - accuracy: 0.7444 - val_loss: 0.4449 - val_accuracy: 0.7983
Epoch 2/50
44/44 - 1s - loss: 0.3909 - accuracy: 0.8335 - val_loss: 0.4308 - val_accuracy: 0.8133
Epoch 3/50
44/44 - 1s - loss: 0.3256 - accuracy: 0.8687 - val_loss: 0.4464 - val_accuracy: 0.8144
Epoch 00003: early stopping


<tensorflow.python.keras.callbacks.History at 0x152fee190>

In [25]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras import layers

model1 = Sequential()
e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=False)
model1.add(e)
model1.add(layers.Conv1D(256, 7, activation='relu'))
model1.add(Flatten())
model1.add(Dense(1, activation='sigmoid'))

model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

print(model1.summary())

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, 31, 100)           2258600   
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 25, 256)           179456    
_________________________________________________________________
flatten_5 (Flatten)          (None, 6400)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 6401      
Total params: 2,444,457
Trainable params: 185,857
Non-trainable params: 2,258,600
_________________________________________________________________
None


In [26]:
from keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

model1.fit(padded_docs, tweets['target'],
          batch_size=128,
          epochs=5,
          verbose=2,
          callbacks=callback)

Epoch 1/5
59/59 - 1s - loss: 0.5096 - accuracy: 0.7612
Epoch 2/5
59/59 - 1s - loss: 0.3848 - accuracy: 0.8366
Epoch 3/5
59/59 - 1s - loss: 0.3255 - accuracy: 0.8680
Epoch 4/5
59/59 - 1s - loss: 0.2696 - accuracy: 0.8932
Epoch 5/5
59/59 - 1s - loss: 0.2180 - accuracy: 0.9257


<tensorflow.python.keras.callbacks.History at 0x155cb0950>

### Submit

In [29]:
test_result = model1.predict(padded_tests)

In [32]:
test_result

array([[0.4535236 ],
       [0.6374486 ],
       [0.51281166],
       ...,
       [0.7867126 ],
       [0.6981692 ],
       [0.24020772]], dtype=float32)

In [40]:
submit = []

for i in test_result:
    if i >= 0.5 :
        submit.append(1)
    else:
        submit.append(0)

submit

[0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,


In [49]:
tests

Unnamed: 0,id,keyword,location,text,target
0,0,,,Just happened a terrible car crash,0
1,2,,,"Heard about #earthquake is different cities, s...",1
2,3,,,"there is a forest fire at spot pond, geese are...",1
3,9,,,Apocalypse lighting. #Spokane #wildfires,1
4,11,,,Typhoon Soudelor kills 28 in China and Taiwan,1
...,...,...,...,...,...
3258,10861,,,EARTHQUAKE SAFETY LOS ANGELES ÛÒ SAFETY FASTE...,1
3259,10865,,,Storm in RI worse than last hurricane. My city...,1
3260,10868,,,Green Line derailment in Chicago http://t.co/U...,1
3261,10874,,,MEG issues Hazardous Weather Outlook (HWO) htt...,1


In [51]:
submit_df = tests[['id', 'target']]

In [52]:
submit_df

Unnamed: 0,id,target
0,0,0
1,2,1
2,3,1
3,9,1
4,11,1
...,...,...
3258,10861,1
3259,10865,1
3260,10868,1
3261,10874,1


In [53]:
submit_df.to_csv('submit_prueba_36.csv', index=False)

### CNN

In [54]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras import layers

model1 = Sequential()
e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=False)
model1.add(e)
model1.add(layers.Conv1D(256, 7, activation='relu'))
model1.add(Flatten())
model1.add(Dense(10, activation='sigmoid'))
model1.add(Dense(1, activation='sigmoid'))

model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

print(model1.summary())

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (None, 31, 100)           2258600   
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 25, 256)           179456    
_________________________________________________________________
flatten_6 (Flatten)          (None, 6400)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 10)                64010     
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 11        
Total params: 2,502,077
Trainable params: 243,477
Non-trainable params: 2,258,600
_________________________________________________________________
None


In [55]:
from keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

model1.fit(x_train, y_train,
          validation_data=(x_test, y_test),
          batch_size=128,
          epochs=50,
          verbose=2,
          callbacks=callback)

Epoch 1/50
44/44 - 1s - loss: 0.5305 - accuracy: 0.7365 - val_loss: 0.4629 - val_accuracy: 0.8010
Epoch 2/50
44/44 - 1s - loss: 0.4246 - accuracy: 0.8240 - val_loss: 0.4222 - val_accuracy: 0.8144
Epoch 3/50
44/44 - 1s - loss: 0.3736 - accuracy: 0.8560 - val_loss: 0.4243 - val_accuracy: 0.8112
Epoch 00003: early stopping


<tensorflow.python.keras.callbacks.History at 0x1536348d0>

### GRID SEARCH

In [76]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras import layers

from keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

def build_model(fully_conected, num_filters, kern_size):
    model1 = Sequential()
    e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=True)
    model1.add(e)
    model1.add(layers.Conv1D(num_filters, kern_size, activation='relu'))
    model1.add(Flatten())
    model1.add(Dense(fully_conected, activation='sigmoid'))
    model1.add(Dense(1, activation='sigmoid'))

    model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model1

In [77]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

param_grid = dict(num_filters=[32, 128, 144, 256],
                      kern_size=[3, 5, 7],
                      batch_size = [45,65,76,88],
                      fully_conected = [30, 60, 800], epochs = [15])

model = KerasClassifier(build_fn=build_model, epochs=15, validation_split=0.1,verbose=1)

grid = RandomizedSearchCV(estimator=model, param_distributions=param_grid,
                              cv=4, verbose=2, n_iter=5, n_jobs=1,scoring = 'accuracy')

grid_result = grid.fit(x_train, y_train, callbacks=[callback])


test_accuracy = grid.score(x_test, y_test)

# Save and evaluate results
s = ('Best Accuracy : {:.4f}\n{}\n\n\n')
output_string = s.format(
            grid_result.best_score_,
            grid_result.best_params_)
            
print(output_string)

Fitting 4 folds for each of 5 candidates, totalling 20 fits
[CV] num_filters=128, kern_size=7, fully_conected=800, epochs=15, batch_size=88 
Epoch 1/15


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
[CV]  num_filters=128, kern_size=7, fully_conected=800, epochs=15, batch_size=88, total=   5.1s
[CV] num_filters=128, kern_size=7, fully_conected=800, epochs=15, batch_size=88 
Epoch 1/15


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    5.1s remaining:    0.0s


Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=128, kern_size=7, fully_conected=800, epochs=15, batch_size=88, total=   3.7s
[CV] num_filters=128, kern_size=7, fully_conected=800, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=128, kern_size=7, fully_conected=800, epochs=15, batch_size=88, total=   5.2s
[CV] num_filters=128, kern_size=7, fully_conected=800, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=128, kern_size=7, fully_conected=800, epochs=15, batch_size=88, total=   3.9s
[CV] num_filters=144, kern_size=3, fully_conected=800, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=144, kern_size=3, fully_conected=800, epochs=15, batch_size=88, total=   4.5s
[CV] num_filters=144, kern_size=3, fully_conected=800, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=144, kern_size=3, fully_

[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:  1.5min finished


Epoch 2/15
Epoch 00002: early stopping
Best Accuracy : 0.8083
{'num_filters': 128, 'kern_size': 3, 'fully_conected': 60, 'epochs': 15, 'batch_size': 76}





In [79]:
model = Sequential()
e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=True)
model.add(e)
model.add(layers.Conv1D(128, 3, activation='relu'))
model.add(Flatten())
model.add(Dense(60, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [80]:
x_train, x_test, y_train, y_test = \
train_test_split(padded_docs, tweets['target'], test_size = 0.25, random_state = 123)
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

model.fit(x_train, y_train,
          validation_data=(x_test, y_test),
          batch_size=76,
          epochs=15,
          verbose=2,
          callbacks=callback)

Epoch 1/15
74/74 - 1s - loss: 0.5077 - accuracy: 0.7516 - val_loss: 0.4315 - val_accuracy: 0.8112
Epoch 2/15
74/74 - 1s - loss: 0.3732 - accuracy: 0.8348 - val_loss: 0.4110 - val_accuracy: 0.8289
Epoch 3/15
74/74 - 1s - loss: 0.2807 - accuracy: 0.8874 - val_loss: 0.4254 - val_accuracy: 0.8219
Epoch 00003: early stopping


<tensorflow.python.keras.callbacks.History at 0x1e9752a50>

In [93]:
model2 = Sequential()
e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=True)
model2.add(e)
model2.add(layers.Conv1D(128, 3, activation='relu'))
model2.add(Flatten())
model2.add(Dense(60, activation='sigmoid'))
model2.add(Dense(1, activation='sigmoid'))

model2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [94]:
model2.fit(padded_docs, tweets['target'],
          batch_size=76,
          epochs=2,
          verbose=2)

Epoch 1/2
98/98 - 2s - loss: 0.5198 - accuracy: 0.7441
Epoch 2/2
98/98 - 2s - loss: 0.3652 - accuracy: 0.8445


<tensorflow.python.keras.callbacks.History at 0x20498b590>

In [95]:
test_result = model1.predict(padded_tests)

In [96]:
test_result

array([[0.674353  ],
       [0.80772024],
       [0.6589092 ],
       ...,
       [0.8821541 ],
       [0.8534104 ],
       [0.3019307 ]], dtype=float32)

In [97]:
submit = []

for i in test_result:
    if i >= 0.5 :
        submit.append(1)
    else:
        submit.append(0)

submit

[1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,


In [98]:
tests['target'] = submit

In [99]:
submit_df = tests[['id', 'target']]

In [100]:
submit_df

Unnamed: 0,id,target
0,0,1
1,2,1
2,3,1
3,9,1
4,11,1
...,...,...
3258,10861,1
3259,10865,1
3260,10868,1
3261,10874,1


In [89]:
submit_df.to_csv('submit_prueba_37.csv', index=False)

### CNN

In [106]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras import layers

from keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

def build_model(fully_conected, num_filters, kern_size):
    model1 = Sequential()
    e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=True)
    model1.add(e)
    model1.add(layers.Conv1D(128, 3, activation='relu'))
    model1.add(layers.Conv1D(num_filters, kern_size, activation='relu'))
    model1.add(Flatten())
    model1.add(Dense(fully_conected, activation='sigmoid'))
    model1.add(Dense(1, activation='sigmoid'))

    model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model1

In [108]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

param_grid = dict(num_filters=[ 144, 256],
                      kern_size=[3, 5, 7],
                      batch_size = [76,88],
                      fully_conected = [60, 800], epochs = [15])

model = KerasClassifier(build_fn=build_model, epochs=15, validation_split=0.1,verbose=1)

grid = RandomizedSearchCV(estimator=model, param_distributions=param_grid,
                              cv=4, verbose=2, n_iter=5, n_jobs=1,scoring = 'accuracy')

grid_result = grid.fit(x_train, y_train, callbacks=[callback])


test_accuracy = grid.score(x_test, y_test)

# Save and evaluate results
s = ('Best Accuracy : {:.4f}\n{}\n\n\n')
output_string = s.format(
            grid_result.best_score_,
            grid_result.best_params_)
            
print(output_string)

Fitting 4 folds for each of 5 candidates, totalling 20 fits
[CV] num_filters=256, kern_size=3, fully_conected=60, epochs=15, batch_size=76 
Epoch 1/15


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=256, kern_size=3, fully_conected=60, epochs=15, batch_size=76, total=   4.9s
[CV] num_filters=256, kern_size=3, fully_conected=60, epochs=15, batch_size=76 
Epoch 1/15


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.9s remaining:    0.0s


Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=256, kern_size=3, fully_conected=60, epochs=15, batch_size=76, total=   5.0s
[CV] num_filters=256, kern_size=3, fully_conected=60, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=256, kern_size=3, fully_conected=60, epochs=15, batch_size=76, total=   5.1s
[CV] num_filters=256, kern_size=3, fully_conected=60, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=256, kern_size=3, fully_conected=60, epochs=15, batch_size=76, total=   3.7s
[CV] num_filters=144, kern_size=7, fully_conected=60, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=144, kern_size=7, fully_conected=60, epochs=15, batch_size=88, total=   5.4s
[CV] num_filters=144, kern_size=7, fully_conected=60, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters

[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:  2.5min finished


Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
Best Accuracy : 0.8074
{'num_filters': 256, 'kern_size': 5, 'fully_conected': 800, 'epochs': 15, 'batch_size': 76}





In [None]:
para submitear

### CNN

In [109]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras import layers

from keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

def build_model(fully_conected, num_filters, kern_size):
    model1 = Sequential()
    e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=True)
    model1.add(e)
    model1.add(layers.Conv1D(num_filters, kern_size, activation='relu'))
    model1.add(layers.Conv1D(128, 3, activation='relu'))
    model1.add(Flatten())
    model1.add(Dense(fully_conected, activation='sigmoid'))
    model1.add(Dense(1, activation='sigmoid'))

    model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model1

In [110]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

param_grid = dict(num_filters=[144, 256],
                      kern_size=[3, 5, 7],
                      batch_size = [76,88],
                      fully_conected = [60, 800], epochs = [15])

model = KerasClassifier(build_fn=build_model, epochs=15, validation_split=0.1,verbose=1)

grid = RandomizedSearchCV(estimator=model, param_distributions=param_grid,
                              cv=4, verbose=2, n_iter=5, n_jobs=1,scoring = 'accuracy')

grid_result = grid.fit(x_train, y_train, callbacks=[callback])


test_accuracy = grid.score(x_test, y_test)

# Save and evaluate results
s = ('Best Accuracy : {:.4f}\n{}\n\n\n')
output_string = s.format(
            grid_result.best_score_,
            grid_result.best_params_)
            
print(output_string)

Fitting 4 folds for each of 5 candidates, totalling 20 fits
[CV] num_filters=144, kern_size=3, fully_conected=60, epochs=15, batch_size=88 
Epoch 1/15


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=144, kern_size=3, fully_conected=60, epochs=15, batch_size=88, total=   4.5s
[CV] num_filters=144, kern_size=3, fully_conected=60, epochs=15, batch_size=88 
Epoch 1/15


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.5s remaining:    0.0s


Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=144, kern_size=3, fully_conected=60, epochs=15, batch_size=88, total=   4.4s
[CV] num_filters=144, kern_size=3, fully_conected=60, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  num_filters=144, kern_size=3, fully_conected=60, epochs=15, batch_size=88, total=   5.7s
[CV] num_filters=144, kern_size=3, fully_conected=60, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=144, kern_size=3, fully_conected=60, epochs=15, batch_size=88, total=   3.2s
[CV] num_filters=256, kern_size=7, fully_conected=60, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=256, kern_size=7, fully_conected=60, epochs=15, batch_size=88, total=   6.0s
[CV] num_filters=256, kern_size=7, fully_conected=60, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  

[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:  1.9min finished


Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
Best Accuracy : 0.8052
{'num_filters': 144, 'kern_size': 3, 'fully_conected': 60, 'epochs': 15, 'batch_size': 88}





submit

In [114]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras import layers

from keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

def build_model(fully_conected, num_filters, kern_size):
    model1 = Sequential()
    e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=True)
    model1.add(e)
    model1.add(layers.Conv1D(num_filters, kern_size, activation='relu'))
    model1.add(Flatten())
    model1.add(Dense(1, activation='sigmoid'))

    model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model1

In [115]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

param_grid = dict(num_filters=[128, 256],
                      kern_size=[3, 5, 7],
                      batch_size = [76,88],
                      fully_conected = [16, 60], epochs = [15])

model = KerasClassifier(build_fn=build_model, epochs=15, validation_split=0.1,verbose=1)

grid = RandomizedSearchCV(estimator=model, param_distributions=param_grid,
                              cv=4, verbose=2, n_iter=5, n_jobs=1,scoring = 'accuracy')

grid_result = grid.fit(x_train, y_train, callbacks=[callback])


test_accuracy = grid.score(x_test, y_test)

# Save and evaluate results
s = ('Best Accuracy : {:.4f}\n{}\n\n\n')
output_string = s.format(
            grid_result.best_score_,
            grid_result.best_params_)
            
print(output_string)

Fitting 4 folds for each of 5 candidates, totalling 20 fits
[CV] num_filters=256, kern_size=3, fully_conected=16, epochs=15, batch_size=76 
Epoch 1/15


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  num_filters=256, kern_size=3, fully_conected=16, epochs=15, batch_size=76, total=   5.4s
[CV] num_filters=256, kern_size=3, fully_conected=16, epochs=15, batch_size=76 
Epoch 1/15


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    5.4s remaining:    0.0s


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  num_filters=256, kern_size=3, fully_conected=16, epochs=15, batch_size=76, total=   4.5s
[CV] num_filters=256, kern_size=3, fully_conected=16, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=256, kern_size=3, fully_conected=16, epochs=15, batch_size=76, total=   2.7s
[CV] num_filters=256, kern_size=3, fully_conected=16, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=256, kern_size=3, fully_conected=16, epochs=15, batch_size=76, total=   2.7s
[CV] num_filters=256, kern_size=7, fully_conected=60, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=256, kern_size=7, fully_conected=60, epochs=15, batch_size=88, total=   4.1s
[CV] num_filters=256, kern_size=7, fully_conected=60, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters

[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:  1.3min finished


Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
Best Accuracy : 0.8031
{'num_filters': 256, 'kern_size': 3, 'fully_conected': 60, 'epochs': 15, 'batch_size': 88}





In [121]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras import layers
from keras import activations

from keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

def build_model(fully_conected, num_filters, kern_size):
    model1 = Sequential()
    e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=True)
    model1.add(e)
    model1.add(layers.Conv1D(num_filters, kern_size, activation='relu'))
    model1.add(layers.Activation(activations.relu))
    model1.add(Flatten())
    model1.add(Dense(1, activation='sigmoid'))

    model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model1

In [124]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

param_grid = dict(num_filters=[64, 128, 256],
                      kern_size=[3, 5, 7],
                      batch_size = [76,88],
                      fully_conected = [16, 60], epochs = [15])

model = KerasClassifier(build_fn=build_model, epochs=15, validation_split=0.1,verbose=1)

grid = RandomizedSearchCV(estimator=model, param_distributions=param_grid,
                              cv=10, verbose=2, n_iter=5, n_jobs=1,scoring = 'accuracy')

grid_result = grid.fit(x_train, y_train, callbacks=[callback])


test_accuracy = grid.score(x_test, y_test)

# Save and evaluate results
s = ('Best Accuracy : {:.4f}\n{}\n\n\n')
output_string = s.format(
            grid_result.best_score_,
            grid_result.best_params_)
            
print(output_string)

Fitting 10 folds for each of 5 candidates, totalling 50 fits
[CV] num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=88 
Epoch 1/15


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=88, total=   4.2s
[CV] num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=88 
Epoch 1/15


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.2s remaining:    0.0s


Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=88, total=   3.2s
[CV] num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=88, total=   4.4s
[CV] num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=88, total=   3.2s
[CV] num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=88, total=   5.4s
[CV] num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=256, kern_

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  num_filters=128, kern_size=3, fully_conected=16, epochs=15, batch_size=76, total=   5.9s
[CV] num_filters=128, kern_size=3, fully_conected=16, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=128, kern_size=3, fully_conected=16, epochs=15, batch_size=76, total=   4.9s
[CV] num_filters=128, kern_size=3, fully_conected=16, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=128, kern_size=3, fully_conected=16, epochs=15, batch_size=76, total=   4.7s
[CV] num_filters=128, kern_size=3, fully_conected=16, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=128, kern_size=3, fully_conected=16, epochs=15, batch_size=76, total=   4.6s
[CV] num_filters=128, kern_size=3, fully_conected=16, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
E

Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=76, total=   3.9s
[CV] num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=76, total=   6.0s
[CV] num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=76, total=   5.6s
[CV] num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=256, kern_size=5, fully_conected=16, epochs=15, batch_size=76, total=   3.9s
[CV] num_filters=128, kern_size=3, fully_conected=60, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  

[CV]  num_filters=64, kern_size=5, fully_conected=16, epochs=15, batch_size=88, total=   3.6s
[CV] num_filters=64, kern_size=5, fully_conected=16, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  num_filters=64, kern_size=5, fully_conected=16, epochs=15, batch_size=88, total=   4.7s
[CV] num_filters=64, kern_size=5, fully_conected=16, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  num_filters=64, kern_size=5, fully_conected=16, epochs=15, batch_size=88, total=   4.7s
Epoch 1/15


[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed:  4.1min finished


Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
Best Accuracy : 0.8045
{'num_filters': 128, 'kern_size': 3, 'fully_conected': 16, 'epochs': 15, 'batch_size': 76}





In [128]:
model1 = Sequential()
e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=True)
model1.add(e)
model1.add(layers.Conv1D(256, 5, activation='relu'))
model1.add(layers.Activation(activations.relu))
model1.add(Flatten())
model1.add(Dense(1, activation='sigmoid'))

model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [126]:
x_train, x_test, y_train, y_test = \
train_test_split(padded_docs, tweets['target'], test_size = 0.25, random_state = 123)
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

model1.fit(x_train, y_train,
          validation_data=(x_test, y_test),
          batch_size=76,
          epochs=2,
          verbose=2,
          callbacks=callback)

Epoch 1/15
74/74 - 2s - loss: 0.5144 - accuracy: 0.7476 - val_loss: 0.4274 - val_accuracy: 0.8101
Epoch 2/15
74/74 - 2s - loss: 0.3513 - accuracy: 0.8502 - val_loss: 0.4149 - val_accuracy: 0.8219
Epoch 3/15
74/74 - 2s - loss: 0.2459 - accuracy: 0.9087 - val_loss: 0.4504 - val_accuracy: 0.8004
Epoch 00003: early stopping


<tensorflow.python.keras.callbacks.History at 0x247682b10>

In [129]:
model1.fit(padded_docs, tweets['target'],
          batch_size=76,
          epochs=2,
          verbose=2)

Epoch 1/2
98/98 - 2s - loss: 0.4884 - accuracy: 0.7702
Epoch 2/2
98/98 - 2s - loss: 0.3404 - accuracy: 0.8569


<tensorflow.python.keras.callbacks.History at 0x21b88e050>

In [130]:
test_result = model1.predict(padded_tests)

In [131]:
test_result

array([[0.62312335],
       [0.81232417],
       [0.7813207 ],
       ...,
       [0.75635624],
       [0.81779766],
       [0.2327313 ]], dtype=float32)

In [132]:
submit = []

for i in test_result:
    if i >= 0.5 :
        submit.append(1)
    else:
        submit.append(0)

submit

[1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,


In [133]:
tests['target'] = submit

In [134]:
submit_df = tests[['id', 'target']]

In [135]:
submit_df

Unnamed: 0,id,target
0,0,1
1,2,1
2,3,1
3,9,1
4,11,1
...,...,...
3258,10861,1
3259,10865,1
3260,10868,1
3261,10874,1


In [136]:
submit_df.to_csv('submit_prueba_38.csv', index=False)

### CNN

In [148]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras import layers

model1 = Sequential()
e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=False)
model1.add(e)
model1.add(layers.Conv1D(256, 7, activation='relu'))
model1.add(layers.MaxPooling1D(pool_size=2, padding="valid"))
model1.add(Flatten())
model1.add(Dense(1, activation='sigmoid'))

model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

print(model1.summary())

Model: "sequential_531"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_531 (Embedding)    (None, 31, 100)           2258600   
_________________________________________________________________
conv1d_696 (Conv1D)          (None, 25, 256)           179456    
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 12, 256)           0         
_________________________________________________________________
flatten_507 (Flatten)        (None, 3072)              0         
_________________________________________________________________
dense_871 (Dense)            (None, 1)                 3073      
Total params: 2,441,129
Trainable params: 182,529
Non-trainable params: 2,258,600
_________________________________________________________________
None


In [149]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = \
train_test_split(padded_docs, tweets['target'], test_size = 0.25, random_state = 123)

In [150]:
from keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

model1.fit(x_train, y_train,
          validation_data=(x_test, y_test),
          batch_size=128,
          epochs=50,
          verbose=2,
          callbacks=callback)

Epoch 1/50
44/44 - 1s - loss: 0.5158 - accuracy: 0.7525 - val_loss: 0.4399 - val_accuracy: 0.8031
Epoch 2/50
44/44 - 1s - loss: 0.3831 - accuracy: 0.8339 - val_loss: 0.4263 - val_accuracy: 0.8166
Epoch 3/50
44/44 - 1s - loss: 0.3178 - accuracy: 0.8732 - val_loss: 0.4314 - val_accuracy: 0.8085
Epoch 00003: early stopping


<tensorflow.python.keras.callbacks.History at 0x3092ba510>

In [151]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras import layers
from keras import activations

from keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

def build_model(num_filters, kern_size, pool_sizes):
    model1 = Sequential()
    e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=True)
    model1.add(e)
    model1.add(layers.Conv1D(num_filters, kern_size, activation='relu'))
    model1.add(layers.MaxPooling1D(pool_size=pool_sizes, padding="valid"))
    model1.add(Flatten())
    model1.add(Dense(1, activation='sigmoid'))

    model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model1

In [152]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

param_grid = dict(num_filters=[128, 256],
                      kern_size=[3, 5, 7],
                      batch_size = [76,88],
                      pool_sizes = [2, 10], epochs = [15])

model = KerasClassifier(build_fn=build_model, epochs=15, validation_split=0.1,verbose=1)

grid = RandomizedSearchCV(estimator=model, param_distributions=param_grid,
                              cv=10, verbose=2, n_iter=5, n_jobs=1,scoring = 'accuracy')

grid_result = grid.fit(x_train, y_train, callbacks=[callback])


test_accuracy = grid.score(x_test, y_test)

# Save and evaluate results
s = ('Best Accuracy : {:.4f}\n{}\n\n\n')
output_string = s.format(
            grid_result.best_score_,
            grid_result.best_params_)
            
print(output_string)

Fitting 10 folds for each of 5 candidates, totalling 50 fits
[CV] pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=76 
Epoch 1/15


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=76, total=   4.6s
[CV] pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=76 
Epoch 1/15


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.6s remaining:    0.0s


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=76, total=   5.7s
[CV] pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=76, total=   4.3s
[CV] pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=76, total=   4.3s
[CV] pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=76, total=   5.5s
[CV] pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes

Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 00005: early stopping
[CV]  pool_sizes=10, num_filters=128, kern_size=3, epochs=15, batch_size=76, total=   6.2s
[CV] pool_sizes=10, num_filters=128, kern_size=3, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=10, num_filters=128, kern_size=3, epochs=15, batch_size=76, total=   9.7s
[CV] pool_sizes=10, num_filters=128, kern_size=3, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  pool_sizes=10, num_filters=128, kern_size=3, epochs=15, batch_size=76, total=   5.1s
[CV] pool_sizes=10, num_filters=128, kern_size=3, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  pool_sizes=10, num_filters=128, kern_size=3, epochs=15, batch_size=76, total=   5.3s
[CV] pool_sizes=10, num_filters=128, kern_size=3, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 00005

Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=2, num_filters=256, kern_size=3, epochs=15, batch_size=76, total=   4.9s
[CV] pool_sizes=2, num_filters=256, kern_size=3, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=2, num_filters=256, kern_size=3, epochs=15, batch_size=76, total=   5.0s
[CV] pool_sizes=2, num_filters=256, kern_size=3, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=2, num_filters=256, kern_size=3, epochs=15, batch_size=76, total=   5.1s
[CV] pool_sizes=2, num_filters=256, kern_size=3, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  pool_sizes=2, num_filters=256, kern_size=3, epochs=15, batch_size=76, total=   6.0s
[CV] pool_sizes=2, num_filters=256, kern_size=3, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=2, num_filters=256, kern_size

[CV]  pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=88, total=   4.7s
[CV] pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=88, total=   2.7s
[CV] pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=88, total=   2.7s
[CV] pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=88, total=   4.7s
[CV] pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=10, num_filters=128, kern_size=7, epochs=15, batch_size=88, total=   4.7s
[CV] poo

[CV]  pool_sizes=10, num_filters=256, kern_size=3, epochs=15, batch_size=88, total=   4.8s
[CV] pool_sizes=10, num_filters=256, kern_size=3, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  pool_sizes=10, num_filters=256, kern_size=3, epochs=15, batch_size=88, total=   2.8s
[CV] pool_sizes=10, num_filters=256, kern_size=3, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=10, num_filters=256, kern_size=3, epochs=15, batch_size=88, total=   3.7s
[CV] pool_sizes=10, num_filters=256, kern_size=3, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  pool_sizes=10, num_filters=256, kern_size=3, epochs=15, batch_size=88, total=   4.7s
[CV] pool_sizes=10, num_filters=256, kern_size=3, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=10, num_filters=256, kern_size=3, epochs=15, batch_size=88, total=   3.

[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed:  4.0min finished


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
Best Accuracy : 0.8093
{'pool_sizes': 10, 'num_filters': 128, 'kern_size': 3, 'epochs': 15, 'batch_size': 76}





In [153]:
model = build_model(128, 3, 10)
model.summary()

Model: "sequential_583"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_583 (Embedding)    (None, 31, 100)           2258600   
_________________________________________________________________
conv1d_748 (Conv1D)          (None, 29, 128)           38528     
_________________________________________________________________
max_pooling1d_52 (MaxPooling (None, 2, 128)            0         
_________________________________________________________________
flatten_559 (Flatten)        (None, 256)               0         
_________________________________________________________________
dense_923 (Dense)            (None, 1)                 257       
Total params: 2,297,385
Trainable params: 2,297,385
Non-trainable params: 0
_________________________________________________________________


In [156]:
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

model.fit(x_train, y_train,
          validation_data=(x_test, y_test),
          batch_size=128,
          epochs=15,
          verbose=2,
          callbacks=callback)

Epoch 1/15
44/44 - 1s - loss: 0.5500 - accuracy: 0.7157 - val_loss: 0.4626 - val_accuracy: 0.7881
Epoch 2/15
44/44 - 1s - loss: 0.3979 - accuracy: 0.8280 - val_loss: 0.4310 - val_accuracy: 0.8085
Epoch 3/15
44/44 - 1s - loss: 0.3286 - accuracy: 0.8725 - val_loss: 0.4232 - val_accuracy: 0.8144
Epoch 4/15
44/44 - 1s - loss: 0.2700 - accuracy: 0.8945 - val_loss: 0.4233 - val_accuracy: 0.8187
Epoch 00004: early stopping


<tensorflow.python.keras.callbacks.History at 0x14b281550>

In [166]:
model2 = build_model(128, 3, 10)
model2.summary()

Model: "sequential_585"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_585 (Embedding)    (None, 31, 100)           2258600   
_________________________________________________________________
conv1d_750 (Conv1D)          (None, 29, 128)           38528     
_________________________________________________________________
max_pooling1d_54 (MaxPooling (None, 2, 128)            0         
_________________________________________________________________
flatten_561 (Flatten)        (None, 256)               0         
_________________________________________________________________
dense_925 (Dense)            (None, 1)                 257       
Total params: 2,297,385
Trainable params: 2,297,385
Non-trainable params: 0
_________________________________________________________________


In [167]:
model2.fit(padded_docs, tweets['target'],
          batch_size=76,
          epochs=3,
          verbose=2)

Epoch 1/3
98/98 - 1s - loss: 0.4860 - accuracy: 0.7689
Epoch 2/3
98/98 - 2s - loss: 0.3591 - accuracy: 0.8489
Epoch 3/3
98/98 - 1s - loss: 0.2790 - accuracy: 0.8901


<tensorflow.python.keras.callbacks.History at 0x265219810>

In [168]:
test_result = model2.predict(padded_tests)

In [169]:
test_result

array([[0.7977159 ],
       [0.78402555],
       [0.8646941 ],
       ...,
       [0.9667603 ],
       [0.9011095 ],
       [0.6470193 ]], dtype=float32)

In [170]:
submit = []

for i in test_result:
    if i >= 0.5 :
        submit.append(1)
    else:
        submit.append(0)

submit

[1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,


In [171]:
tests['target'] = submit

In [172]:
submit_df = tests[['id', 'target']]

In [173]:
submit_df

Unnamed: 0,id,target
0,0,1
1,2,1
2,3,1
3,9,1
4,11,1
...,...,...
3258,10861,1
3259,10865,1
3260,10868,1
3261,10874,1


In [174]:
submit_df.to_csv('submit_prueba_40.csv', index=False) #39 fue este con un epoch menos

### With features

In [418]:
tweets_features = pd.read_csv("train_features.csv")
test_features = pd.read_csv("test_features.csv")

In [419]:
tweets_features.head(1)

Unnamed: 0,id,text,target,text_without_stopwords,length,avg_word_length,amount_of_words,amount_of_unique_words,sentiment,stopwords_count,punctuation_count,mentions_count,hashtags_count,longest_word_length_without_stopwords,stopword_word_ratio,adjectives_count,nouns_count,verbs_count,adverbs_count
0,1,Our Deeds are the Reason of this #earthquake M...,1,Our Deeds Reason May ALLAH Forgive us,69,4.384615,13,13,0.2732,6,1,0,1,7,0.461538,0,6,1,0


In [420]:
test_features.head(1)

Unnamed: 0,id,text,text_without_stopwords,length,avg_word_length,amount_of_words,amount_of_unique_words,sentiment,stopwords_count,punctuation_count,mentions_count,hashtags_count,longest_word_length_without_stopwords,stopword_word_ratio,adjectives_count,nouns_count,verbs_count,adverbs_count
0,0,Just happened a terrible car crash,Just happened terrible car crash,34,4.833333,6,6,-0.7003,2,0,0,0,8,0.333333,1,2,1,1


In [421]:
tweets_features = tweets_features.drop(columns=['text', 'text_without_stopwords', 'target'])
test_features = test_features.drop(columns=['text', 'text_without_stopwords'])

In [534]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

features_train = scaler.fit_transform(tweets_features.iloc[:, 1:])
features_test = scaler.fit_transform(test_features.iloc[:, 1:])

### My tweets process

In [521]:
tweets.head()

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1


In [516]:
# Mocks real tokenizer used in glove

import re

def tokenize_input(input_text):
    to_tokens = input_text[:]
    token_specification = [
        ('url', r'https?:\/\/\S+\b|www\.(\w+\.)+\S*'),
        (' / ', r'/'),
        ('user', r'@\w+'),            
        ('smile', r'[8:=;][)d]+|[)d]+[\'`\-]?[8:=;]'),    
        ('lolface', r'[8:=;][\'`\-]?p'),      
        ('sadface', r'[8:=;][\'`\-]?\(|\)+[8:=;][\'`\-]?'),          
        ('neutralface', r'[8:=;][\'`\-]?[\/|l*]'),       
        ('heart', r'<3'),   
        ('number', r'[-+]?[.\d]*[\d]+[:,.\d]*')
    ]
    for replacement, regex in token_specification:
        to_tokens = re.sub(regex, replacement, to_tokens)
    return to_tokens

In [513]:
# Check
tokenize_input('https://regexr.com hola / :) <3 :p :(  8888 @justin')

url hola  /  smile heart lolface sadface  number !!!! wayyy user


In [544]:
tweets['text'] = tweets['text'].apply(tokenize_input)
tests['text'] = tests['text'].apply(tokenize_input)

### Train with features

In [536]:
t = Tokenizer()
t.fit_on_texts(tweets['text'])
vocab_size = len(t.word_index) + 1
print(vocab_size)

15325


In [537]:
t.word_index

{'url': 1,
 'the': 2,
 'user': 3,
 'a': 4,
 'in': 5,
 'to': 6,
 'number': 7,
 'of': 8,
 'and': 9,
 'i': 10,
 'is': 11,
 'for': 12,
 'on': 13,
 'you': 14,
 'my': 15,
 'that': 16,
 'it': 17,
 'with': 18,
 'at': 19,
 'by': 20,
 'this': 21,
 'from': 22,
 'be': 23,
 'are': 24,
 'was': 25,
 'have': 26,
 'like': 27,
 'amp': 28,
 'me': 29,
 'as': 30,
 'but': 31,
 'up': 32,
 'just': 33,
 'so': 34,
 'not': 35,
 'your': 36,
 'out': 37,
 'no': 38,
 'all': 39,
 'will': 40,
 'after': 41,
 'an': 42,
 'has': 43,
 'when': 44,
 'fire': 45,
 "i'm": 46,
 'get': 47,
 'now': 48,
 'we': 49,
 'new': 50,
 'if': 51,
 'more': 52,
 'via': 53,
 'about': 54,
 'or': 55,
 'what': 56,
 'news': 57,
 'they': 58,
 'one': 59,
 'how': 60,
 'people': 61,
 'he': 62,
 "it's": 63,
 "don't": 64,
 'been': 65,
 'who': 66,
 'over': 67,
 'into': 68,
 'do': 69,
 'video': 70,
 'can': 71,
 'emergency': 72,
 'there': 73,
 'disaster': 74,
 "'": 75,
 'police': 76,
 'than': 77,
 'u': 78,
 'her': 79,
 'his': 80,
 'would': 81,
 'still': 82,

In [545]:
# integer encode the documents
encoded_docs = t.texts_to_sequences(tweets['text'])
enconded_test = t.texts_to_sequences(tests['text'])

In [546]:
from keras.preprocessing.sequence import pad_sequences

max_length = 36 # Maxima cantidad de palabras en los tweets tokenizados
padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post')
padded_tests = pad_sequences(enconded_test, maxlen=max_length, padding='post')
print(padded_docs)

[[ 111 4340   24 ...    0    0    0]
 [ 176   45  231 ...    0    0    0]
 [  39 1656 1526 ...    0    0    0]
 ...
 [ 101  211  427 ...    0    0    0]
 [ 115  804 1303 ...    0    0    0]
 [   2  191   52 ...    0    0    0]]


In [547]:
import numpy as np

embeddings_index = dict()
f = open('glove.twitter.27B.100d.txt') # Vectores entrenados de 100 dimensiones
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()
print('Loaded %s word vectors.' % len(embeddings_index))

Loaded 1193514 word vectors.


In [548]:
embedding_matrix = np.zeros((vocab_size, 100))
for word, i in t.word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None: # Si la palabra no esta queda llena de 0s
        embedding_matrix[i] = embedding_vector

### CNN 

In [556]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras import layers
from keras import activations

from keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

def build_model(num_filters, kern_size, pool_sizes, fully_connected):
    model = Sequential()
    e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=36, trainable=True)
    model.add(e)
    model.add(layers.Conv1D(num_filters, kern_size, activation='relu'))
    model.add(layers.Activation(activations.relu))
    model.add(layers.MaxPooling1D(pool_size=pool_sizes, padding="valid"))
    model.add(Flatten())
    model.add(Dense(fully_connected, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [557]:
x_train, x_test, y_train, y_test = \
train_test_split(padded_docs, tweets['target'], test_size = 0.25, random_state = 123)

In [558]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

param_grid = dict(num_filters=[128, 256],
                      kern_size=[3, 5, 7],
                      batch_size = [76,88],
                      fully_connected = [16, 20],
                      pool_sizes = [2, 10], 
                      epochs = [15])

model = KerasClassifier(build_fn=build_model, epochs=15, validation_split=0.1,verbose=1)

grid = RandomizedSearchCV(estimator=model, param_distributions=param_grid,
                              cv=10, verbose=2, n_iter=5, n_jobs=1,scoring = 'accuracy')

grid_result = grid.fit(x_train, y_train, callbacks=[callback])


test_accuracy = grid.score(x_test, y_test)

# Save and evaluate results
s = ('Best Accuracy : {:.4f}\n{}\n\n\n')
output_string = s.format(
            grid_result.best_score_,
            grid_result.best_params_)
            
print(output_string)

Fitting 10 folds for each of 5 candidates, totalling 50 fits
[CV] pool_sizes=2, num_filters=128, kern_size=7, fully_connected=16, epochs=15, batch_size=76 
Epoch 1/15


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Epoch 2/15
Epoch 00002: early stopping
[CV]  pool_sizes=2, num_filters=128, kern_size=7, fully_connected=16, epochs=15, batch_size=76, total=   3.4s
[CV] pool_sizes=2, num_filters=128, kern_size=7, fully_connected=16, epochs=15, batch_size=76 
Epoch 1/15


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    3.4s remaining:    0.0s


Epoch 2/15
Epoch 00002: early stopping
[CV]  pool_sizes=2, num_filters=128, kern_size=7, fully_connected=16, epochs=15, batch_size=76, total=   3.3s
[CV] pool_sizes=2, num_filters=128, kern_size=7, fully_connected=16, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  pool_sizes=2, num_filters=128, kern_size=7, fully_connected=16, epochs=15, batch_size=76, total=   3.4s
[CV] pool_sizes=2, num_filters=128, kern_size=7, fully_connected=16, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  pool_sizes=2, num_filters=128, kern_size=7, fully_connected=16, epochs=15, batch_size=76, total=   3.5s
[CV] pool_sizes=2, num_filters=128, kern_size=7, fully_connected=16, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=2, num_filters=128, kern_size=7, fully_connected=16, epochs=15, batch_size=76, total=   4.7s
[CV] pool_sizes=2, num_filters=128, kern_size=7, fully_connected=16, epochs

Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  pool_sizes=2, num_filters=128, kern_size=7, fully_connected=20, epochs=15, batch_size=88, total=   3.3s
[CV] pool_sizes=2, num_filters=128, kern_size=7, fully_connected=20, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  pool_sizes=2, num_filters=128, kern_size=7, fully_connected=20, epochs=15, batch_size=88, total=   3.3s
[CV] pool_sizes=2, num_filters=128, kern_size=7, fully_connected=20, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=2, num_filters=128, kern_size=7, fully_connected=20, epochs=15, batch_size=88, total=   4.6s
[CV] pool_sizes=2, num_filters=128, kern_size=7, fully_connected=20, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  pool_sizes=2, num_filters=128, kern_size=7, fully_connected=20, epochs=15, batch_size=88, total=   5.8s
[CV] pool_sizes=10, num_filters=128, kern_

Epoch 4/15
Epoch 00004: early stopping
[CV]  pool_sizes=10, num_filters=128, kern_size=5, fully_connected=16, epochs=15, batch_size=88, total=   4.7s
[CV] pool_sizes=10, num_filters=128, kern_size=5, fully_connected=16, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=10, num_filters=128, kern_size=5, fully_connected=16, epochs=15, batch_size=88, total=   3.9s
[CV] pool_sizes=10, num_filters=128, kern_size=5, fully_connected=16, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=10, num_filters=128, kern_size=5, fully_connected=16, epochs=15, batch_size=88, total=   4.0s
[CV] pool_sizes=2, num_filters=256, kern_size=5, fully_connected=20, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=2, num_filters=256, kern_size=5, fully_connected=20, epochs=15, batch_size=88, total=   5.6s
[CV] pool_sizes=2, num_filters=256, kern_size=5,

Epoch 3/15
Epoch 00003: early stopping
[CV]  pool_sizes=2, num_filters=256, kern_size=5, fully_connected=20, epochs=15, batch_size=88, total=   5.5s
Epoch 1/15


[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed:  3.8min finished


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
Best Accuracy : 0.8095
{'pool_sizes': 10, 'num_filters': 128, 'kern_size': 5, 'fully_connected': 16, 'epochs': 15, 'batch_size': 88}





In [563]:
model = build_model(128, 5, 10, 16)
model.summary()

Model: "sequential_1258"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1258 (Embedding)   (None, 36, 100)           1532500   
_________________________________________________________________
conv1d_1632 (Conv1D)         (None, 32, 128)           64128     
_________________________________________________________________
activation_865 (Activation)  (None, 32, 128)           0         
_________________________________________________________________
max_pooling1d_780 (MaxPoolin (None, 3, 128)            0         
_________________________________________________________________
flatten_1162 (Flatten)       (None, 384)               0         
_________________________________________________________________
dense_2063 (Dense)           (None, 16)                6160      
_________________________________________________________________
dense_2064 (Dense)           (None, 1)             

In [564]:
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

model.fit(x_train, y_train,
          validation_data=(x_test, y_test),
          batch_size=88,
          epochs=15,
          verbose=2,
          callbacks=callback)

Epoch 1/15
64/64 - 1s - loss: 0.5112 - accuracy: 0.7512 - val_loss: 0.4428 - val_accuracy: 0.7945
Epoch 2/15
64/64 - 1s - loss: 0.3603 - accuracy: 0.8488 - val_loss: 0.4188 - val_accuracy: 0.8090
Epoch 3/15
64/64 - 1s - loss: 0.2597 - accuracy: 0.9024 - val_loss: 0.4332 - val_accuracy: 0.8004
Epoch 00003: early stopping


<tensorflow.python.keras.callbacks.History at 0x3933f98d0>

In [567]:
model_s = build_model(128, 5, 10, 16)
model_s.summary()

Model: "sequential_1260"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1260 (Embedding)   (None, 36, 100)           1532500   
_________________________________________________________________
conv1d_1634 (Conv1D)         (None, 32, 128)           64128     
_________________________________________________________________
activation_867 (Activation)  (None, 32, 128)           0         
_________________________________________________________________
max_pooling1d_782 (MaxPoolin (None, 3, 128)            0         
_________________________________________________________________
flatten_1164 (Flatten)       (None, 384)               0         
_________________________________________________________________
dense_2067 (Dense)           (None, 16)                6160      
_________________________________________________________________
dense_2068 (Dense)           (None, 1)             

In [568]:
model_s.fit(padded_docs, tweets['target'],
          batch_size=88,
          epochs=2,
          verbose=2)

Epoch 1/2
85/85 - 1s - loss: 0.4859 - accuracy: 0.7735
Epoch 2/2
85/85 - 1s - loss: 0.3435 - accuracy: 0.8555


<tensorflow.python.keras.callbacks.History at 0x4f0213650>

In [569]:
test_result = model_s.predict(padded_tests)

In [570]:
test_result

array([[0.8464004 ],
       [0.8057761 ],
       [0.78480256],
       ...,
       [0.9500956 ],
       [0.90281564],
       [0.63919055]], dtype=float32)

In [571]:
submit = []

for i in test_result:
    if i >= 0.5 :
        submit.append(1)
    else:
        submit.append(0)

submit

[1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,


In [572]:
tests['target'] = submit

In [573]:
submit_df = tests[['id', 'target']]

In [574]:
submit_df

Unnamed: 0,id,target
0,0,1
1,2,1
2,3,1
3,9,1
4,11,1
...,...,...
3258,10861,1
3259,10865,1
3260,10868,1
3261,10874,1


In [577]:
submit_df.to_csv('submit_prueba_41.csv', index=False) #39 fue este con un epoch menos

### CNN Submit 0.8204

In [673]:
tweets_features.loc[:, 'length':]

Unnamed: 0,length,avg_word_length,amount_of_words,amount_of_unique_words,sentiment,stopwords_count,punctuation_count,mentions_count,hashtags_count,longest_word_length_without_stopwords,stopword_word_ratio,adjectives_count,nouns_count,verbs_count,adverbs_count
0,69,4.384615,13,13,0.2732,6,1,0,1,7,0.461538,0,6,1,0
1,38,4.571429,7,7,-0.3400,0,1,0,0,6,0.000000,0,6,0,0
2,133,5.090909,22,20,-0.2960,11,3,0,0,10,0.500000,1,7,7,0
3,65,7.125000,8,8,0.0000,1,2,0,1,10,0.125000,1,4,1,0
4,88,4.500000,16,15,0.0000,7,2,0,2,6,0.437500,0,6,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7429,136,6.210526,19,19,-0.6841,6,12,0,1,10,0.315789,0,13,3,0
7430,114,3.423077,26,25,-0.4939,16,1,0,0,8,0.615385,2,4,5,3
7431,121,5.100000,20,18,-0.7650,1,11,0,0,8,0.050000,0,14,0,0
7432,83,6.636364,11,11,-0.4939,2,5,0,0,8,0.181818,2,6,1,0


In [830]:
from keras import layers, Input, Model
from keras.layers import Dropout, Flatten, Concatenate, Embedding, Conv1D, GlobalMaxPooling1D, Dense, Lambda, Activation
from keras import layers, Input, Model
from keras.optimizers import Adam

callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

def build_model():
    model = Sequential()
    x_train_input = Input(shape=(36,), name = 'x_train_input')
    x_train_features_input = Input(shape = (15, ), name = 'x_features_train') 
    e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=36, trainable=True)
    emb = e(x_train_input)
    
    conv_out1 = Conv1D(256, 2, activation='relu')(emb)
    
    conv_out2 = Conv1D(111, 2, activation='relu')(conv_out1)
    activation = Activation('relu')(conv_out2)
    max_pool2 = GlobalMaxPooling1D()(activation)

    conc = Concatenate()([max_pool2, x_train_features_input])

    dense2 = Dense(100, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01))(conc)
    dense3 = Dense(1, activation='sigmoid')(dense2)
    
    model = Model(inputs = [x_train_input , x_train_features_input], outputs = dense3)
    
    optimizer = Adam(learning_rate=0.001) #default
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [831]:
model = build_model()
model.summary()

Model: "model_14"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
x_train_input (InputLayer)      [(None, 36)]         0                                            
__________________________________________________________________________________________________
embedding_1699 (Embedding)      (None, 36, 100)      1532500     x_train_input[0][0]              
__________________________________________________________________________________________________
conv1d_2079 (Conv1D)            (None, 35, 256)      51456       embedding_1699[0][0]             
__________________________________________________________________________________________________
conv1d_2080 (Conv1D)            (None, 34, 111)      56943       conv1d_2079[0][0]                
___________________________________________________________________________________________

In [832]:
model.fit([padded_docs,tweets_features.loc[:, 'length':]], tweets['target'],
          batch_size=88,
          epochs=3,
          verbose=2, callbacks = [callback])

Epoch 1/3




85/85 - 2s - loss: 1.3256 - accuracy: 0.7037
Epoch 2/3




85/85 - 2s - loss: 0.7101 - accuracy: 0.8301
Epoch 3/3




85/85 - 2s - loss: 0.5264 - accuracy: 0.8520


<tensorflow.python.keras.callbacks.History at 0x4d18c5c10>

In [833]:
test_result = model.predict([padded_tests, test_features.loc[:, 'length':]])

In [834]:
test_result

array([[0.6675244],
       [0.9170276],
       [0.7969888],
       ...,
       [0.9986186],
       [0.8948508],
       [0.6564589]], dtype=float32)

In [835]:
submit = []

for i in test_result:
    if i >= 0.5 :
        submit.append(1)
    else:
        submit.append(0)

submit

[1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,


In [836]:
tests['target'] = submit

In [837]:
submit_df = tests[['id', 'target']]

In [838]:
submit_df

Unnamed: 0,id,target
0,0,1
1,2,1
2,3,1
3,9,1
4,11,1
...,...,...
3258,10861,0
3259,10865,1
3260,10868,1
3261,10874,1


In [840]:
submit_df.to_csv('submit_prueba_42.csv', index=False)

### CNN  Submit 0.8256

In [926]:
from keras import layers, Input, Model
from keras.layers import Dropout, Flatten, Concatenate, Embedding, Conv1D, GlobalMaxPooling1D, Dense, Lambda, Activation
from keras import layers, Input, Model
from keras.optimizers import Adam

callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

def build_model():
    model = Sequential()
    x_train_input = Input(shape=(36,), name = 'x_train_input')
    x_train_features_input = Input(shape = (15, ), name = 'x_features_train') 
    e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=36, trainable=True)
    emb = e(x_train_input)
    
    conv_out1 = Conv1D(256, 2, activation='relu')(emb)
    
    activation_1 = Activation('relu')(conv_out1)
    
    conv_out2 = Conv1D(111, 2, activation='relu')(activation_1)
    
    activation = Activation('relu')(conv_out2)
    
    max_pool2 = GlobalMaxPooling1D()(activation)

    conc = Concatenate()([max_pool2, x_train_features_input])

    dense2 = Dense(100, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01))(conc)
    dense3 = Dense(1, activation='sigmoid')(dense2)
    
    model = Model(inputs = [x_train_input , x_train_features_input], outputs = dense3)
    
    optimizer = Adam(learning_rate=0.001) #default
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [927]:
model = build_model()
model.summary()

Model: "model_23"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
x_train_input (InputLayer)      [(None, 36)]         0                                            
__________________________________________________________________________________________________
embedding_1710 (Embedding)      (None, 36, 100)      1532500     x_train_input[0][0]              
__________________________________________________________________________________________________
conv1d_2101 (Conv1D)            (None, 35, 256)      51456       embedding_1710[0][0]             
__________________________________________________________________________________________________
activation_1309 (Activation)    (None, 35, 256)      0           conv1d_2101[0][0]                
___________________________________________________________________________________________

In [928]:
model.fit([padded_docs,tweets_features.loc[:, 'length':]], tweets['target'],
          batch_size=88,
          epochs=3,
          verbose=2, callbacks = [callback])

Epoch 1/3




85/85 - 2s - loss: 1.5573 - accuracy: 0.6536
Epoch 2/3




85/85 - 2s - loss: 0.7745 - accuracy: 0.8245
Epoch 3/3




85/85 - 2s - loss: 0.5746 - accuracy: 0.8528


<tensorflow.python.keras.callbacks.History at 0x548723750>

In [929]:
test_result = model.predict([padded_tests, test_features.loc[:, 'length':]])

In [930]:
test_result

array([[0.72363526],
       [0.95123327],
       [0.86855066],
       ...,
       [0.99703455],
       [0.9047524 ],
       [0.6253783 ]], dtype=float32)

In [931]:
submit = []

for i in test_result:
    if i >= 0.5 :
        submit.append(1)
    else:
        submit.append(0)

submit

[1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,


In [932]:
tests['target'] = submit

In [933]:
submit_df = tests[['id', 'target']]

In [934]:
submit_df

Unnamed: 0,id,target
0,0,1
1,2,1
2,3,1
3,9,1
4,11,1
...,...,...
3258,10861,0
3259,10865,1
3260,10868,1
3261,10874,1


In [936]:
submit_df.to_csv('submit_prueba_43.csv', index=False)

### CNN

In [1314]:
from keras import layers, Input, Model
from keras.layers import Dropout, Flatten, Concatenate, Embedding, Conv1D, GlobalMaxPooling1D, Dense, Lambda, Activation
from keras import layers, Input, Model
from keras.optimizers import Adam

callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

def build_model():
    model = Sequential()
    x_train_input = Input(shape=(36,), name = 'x_train_input')
    x_train_features_input = Input(shape = (15, ), name = 'x_features_train') 
    e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=36, trainable=True)
    emb = e(x_train_input)
    
    conv_out1 = Conv1D(256, 2, activation='relu')(emb)
    
    activation_1 = Activation('relu')(conv_out1)
    
    conv_out1 = Conv1D(111, 2, activation='relu')(activation_1)
    
    activation_2 = Activation('relu')(conv_out1)
    
    conv_out2 = Conv1D(111, 2, activation='relu')(activation_2)
    
    activation = Activation('relu')(conv_out2)
    
    max_pool2 = GlobalMaxPooling1D()(activation)

    conc = Concatenate()([max_pool2, x_train_features_input])

    dense2 = Dense(90, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01))(conc)
    dense3 = Dense(1, activation='sigmoid')(dense2)
    
    model = Model(inputs = [x_train_input , x_train_features_input], outputs = dense3)
    
    optimizer = Adam(learning_rate=0.001) #default
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [1315]:
model = build_model()
model.summary()

Model: "model_65"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
x_train_input (InputLayer)      [(None, 36)]         0                                            
__________________________________________________________________________________________________
embedding_1753 (Embedding)      (None, 36, 100)      1532500     x_train_input[0][0]              
__________________________________________________________________________________________________
conv1d_2228 (Conv1D)            (None, 35, 256)      51456       embedding_1753[0][0]             
__________________________________________________________________________________________________
activation_1428 (Activation)    (None, 35, 256)      0           conv1d_2228[0][0]                
___________________________________________________________________________________________

In [1316]:
model.fit([padded_docs,tweets_features.loc[:, 'length':]], tweets['target'],
          batch_size=88,
          epochs=3,
          verbose=2, callbacks = [callback])

Epoch 1/3




85/85 - 3s - loss: 2.0475 - accuracy: 0.6219
Epoch 2/3




85/85 - 3s - loss: 0.8349 - accuracy: 0.7674
Epoch 3/3




85/85 - 3s - loss: 0.6080 - accuracy: 0.8391


<tensorflow.python.keras.callbacks.History at 0x57d6ac810>

In [1317]:
test_result = model.predict([padded_tests, test_features.loc[:, 'length':]])

In [1318]:
test_result

array([[0.7608732 ],
       [0.8834977 ],
       [0.75991297],
       ...,
       [0.9748985 ],
       [0.94214296],
       [0.63062084]], dtype=float32)

In [1319]:
submit = []

for i in test_result:
    if i >= 0.5 :
        submit.append(1)
    else:
        submit.append(0)

submit

[1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,


In [1320]:
tests['target'] = submit

In [1321]:
submit_df = tests[['id', 'target']]

In [1322]:
submit_df

Unnamed: 0,id,target
0,0,1
1,2,1
2,3,1
3,9,1
4,11,1
...,...,...
3258,10861,0
3259,10865,1
3260,10868,1
3261,10874,1
