In [1]:
from tensorflow import keras
from keras.preprocessing.text import Tokenizer
import pandas as pd

In [2]:
tweets = pd.read_csv('train.csv') 
tests = pd.read_csv('test.csv') 

In [3]:
tweets.drop_duplicates(subset = 'text', keep = False, inplace = True)
tweets.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7434 entries, 0 to 7612
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   id        7434 non-null   int64 
 1   keyword   7378 non-null   object
 2   location  4982 non-null   object
 3   text      7434 non-null   object
 4   target    7434 non-null   int64 
dtypes: int64(2), object(3)
memory usage: 348.5+ KB


In [4]:
t = Tokenizer()
t.fit_on_texts(tweets['text'])
vocab_size = len(t.word_index) + 1
print(vocab_size)

22586


In [5]:
t.word_index

{'t': 1,
 'co': 2,
 'http': 3,
 'the': 4,
 'a': 5,
 'in': 6,
 'to': 7,
 'of': 8,
 'and': 9,
 'i': 10,
 'is': 11,
 'for': 12,
 'on': 13,
 'you': 14,
 'my': 15,
 'that': 16,
 'it': 17,
 'with': 18,
 'at': 19,
 'by': 20,
 'this': 21,
 'https': 22,
 'from': 23,
 'be': 24,
 'are': 25,
 'was': 26,
 'have': 27,
 'like': 28,
 'amp': 29,
 'me': 30,
 'as': 31,
 'up': 32,
 'but': 33,
 'just': 34,
 'so': 35,
 'not': 36,
 'your': 37,
 'out': 38,
 'no': 39,
 'all': 40,
 'will': 41,
 'after': 42,
 'an': 43,
 'has': 44,
 'when': 45,
 'fire': 46,
 "i'm": 47,
 'get': 48,
 'now': 49,
 'we': 50,
 'new': 51,
 'if': 52,
 'more': 53,
 '2': 54,
 'via': 55,
 'about': 56,
 'or': 57,
 'news': 58,
 'what': 59,
 'they': 60,
 'one': 61,
 'how': 62,
 'people': 63,
 'he': 64,
 "it's": 65,
 "don't": 66,
 'been': 67,
 'who': 68,
 'over': 69,
 'into': 70,
 'do': 71,
 'video': 72,
 'can': 73,
 'emergency': 74,
 "'": 75,
 'there': 76,
 'disaster': 77,
 'police': 78,
 'than': 79,
 '3': 80,
 'u': 81,
 'her': 82,
 'his': 83,

In [27]:
# integer encode the documents
encoded_docs = t.texts_to_sequences(tweets['text'])
enconded_test = t.texts_to_sequences(tests['text'])

In [28]:
from keras.preprocessing.sequence import pad_sequences

max_length = 31 # Maxima cantidad de palabras en los tweets
padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post')
padded_tests = pad_sequences(enconded_test, maxlen=max_length, padding='post')
print(padded_docs)

[[ 116 4534   25 ...    0    0    0]
 [ 182   46  242 ...    0    0    0]
 [  40 1705 1572 ...    0    0    0]
 ...
 [ 106  225  453 ...    0    0    0]
 [ 121  837 1338 ...    0    0    0]
 [   4  201   53 ...    0    0    0]]


In [8]:
import numpy as np

embeddings_index = dict()
f = open('glove.twitter.27B.100d.txt') # Vectores entrenados de 100 dimensiones
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()
print('Loaded %s word vectors.' % len(embeddings_index))

Loaded 1193514 word vectors.


In [9]:
embedding_matrix = np.zeros((vocab_size, 100))
for word, i in t.word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None: # Si la palabra no esta queda llena de 0s
        embedding_matrix[i] = embedding_vector

In [20]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras import layers

model1 = Sequential()
e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=False)
model1.add(e)
model1.add(layers.Conv1D(256, 7, activation='relu'))
model1.add(Flatten())
model1.add(Dense(1, activation='sigmoid'))

model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

print(model1.summary())

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 31, 100)           2258600   
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 25, 256)           179456    
_________________________________________________________________
flatten_3 (Flatten)          (None, 6400)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 6401      
Total params: 2,444,457
Trainable params: 185,857
Non-trainable params: 2,258,600
_________________________________________________________________
None


In [21]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = \
train_test_split(padded_docs, tweets['target'], test_size = 0.25, random_state = 123)

In [22]:
from keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

model1.fit(x_train, y_train,
          validation_data=(x_test, y_test),
          batch_size=128,
          epochs=50,
          verbose=2,
          callbacks=callback)

Epoch 1/50
44/44 - 1s - loss: 0.5262 - accuracy: 0.7444 - val_loss: 0.4449 - val_accuracy: 0.7983
Epoch 2/50
44/44 - 1s - loss: 0.3909 - accuracy: 0.8335 - val_loss: 0.4308 - val_accuracy: 0.8133
Epoch 3/50
44/44 - 1s - loss: 0.3256 - accuracy: 0.8687 - val_loss: 0.4464 - val_accuracy: 0.8144
Epoch 00003: early stopping


<tensorflow.python.keras.callbacks.History at 0x152fee190>

In [25]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras import layers

model1 = Sequential()
e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=False)
model1.add(e)
model1.add(layers.Conv1D(256, 7, activation='relu'))
model1.add(Flatten())
model1.add(Dense(1, activation='sigmoid'))

model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

print(model1.summary())

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, 31, 100)           2258600   
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 25, 256)           179456    
_________________________________________________________________
flatten_5 (Flatten)          (None, 6400)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 6401      
Total params: 2,444,457
Trainable params: 185,857
Non-trainable params: 2,258,600
_________________________________________________________________
None


In [26]:
from keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

model1.fit(padded_docs, tweets['target'],
          batch_size=128,
          epochs=5,
          verbose=2,
          callbacks=callback)

Epoch 1/5
59/59 - 1s - loss: 0.5096 - accuracy: 0.7612
Epoch 2/5
59/59 - 1s - loss: 0.3848 - accuracy: 0.8366
Epoch 3/5
59/59 - 1s - loss: 0.3255 - accuracy: 0.8680
Epoch 4/5
59/59 - 1s - loss: 0.2696 - accuracy: 0.8932
Epoch 5/5
59/59 - 1s - loss: 0.2180 - accuracy: 0.9257


<tensorflow.python.keras.callbacks.History at 0x155cb0950>

In [29]:
test_result = model1.predict(padded_tests)

In [32]:
test_result

array([[0.4535236 ],
       [0.6374486 ],
       [0.51281166],
       ...,
       [0.7867126 ],
       [0.6981692 ],
       [0.24020772]], dtype=float32)

In [40]:
submit = []

for i in test_result:
    if i >= 0.5 :
        submit.append(1)
    else:
        submit.append(0)

submit

[0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,


In [49]:
tests

Unnamed: 0,id,keyword,location,text,target
0,0,,,Just happened a terrible car crash,0
1,2,,,"Heard about #earthquake is different cities, s...",1
2,3,,,"there is a forest fire at spot pond, geese are...",1
3,9,,,Apocalypse lighting. #Spokane #wildfires,1
4,11,,,Typhoon Soudelor kills 28 in China and Taiwan,1
...,...,...,...,...,...
3258,10861,,,EARTHQUAKE SAFETY LOS ANGELES ÛÒ SAFETY FASTE...,1
3259,10865,,,Storm in RI worse than last hurricane. My city...,1
3260,10868,,,Green Line derailment in Chicago http://t.co/U...,1
3261,10874,,,MEG issues Hazardous Weather Outlook (HWO) htt...,1


In [51]:
submit_df = tests[['id', 'target']]

In [52]:
submit_df

Unnamed: 0,id,target
0,0,0
1,2,1
2,3,1
3,9,1
4,11,1
...,...,...
3258,10861,1
3259,10865,1
3260,10868,1
3261,10874,1


In [53]:
submit_df.to_csv('submit_prueba_36.csv', index=False)

In [54]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras import layers

model1 = Sequential()
e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=False)
model1.add(e)
model1.add(layers.Conv1D(256, 7, activation='relu'))
model1.add(Flatten())
model1.add(Dense(10, activation='sigmoid'))
model1.add(Dense(1, activation='sigmoid'))

model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

print(model1.summary())

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (None, 31, 100)           2258600   
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 25, 256)           179456    
_________________________________________________________________
flatten_6 (Flatten)          (None, 6400)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 10)                64010     
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 11        
Total params: 2,502,077
Trainable params: 243,477
Non-trainable params: 2,258,600
_________________________________________________________________
None


In [55]:
from keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

model1.fit(x_train, y_train,
          validation_data=(x_test, y_test),
          batch_size=128,
          epochs=50,
          verbose=2,
          callbacks=callback)

Epoch 1/50
44/44 - 1s - loss: 0.5305 - accuracy: 0.7365 - val_loss: 0.4629 - val_accuracy: 0.8010
Epoch 2/50
44/44 - 1s - loss: 0.4246 - accuracy: 0.8240 - val_loss: 0.4222 - val_accuracy: 0.8144
Epoch 3/50
44/44 - 1s - loss: 0.3736 - accuracy: 0.8560 - val_loss: 0.4243 - val_accuracy: 0.8112
Epoch 00003: early stopping


<tensorflow.python.keras.callbacks.History at 0x1536348d0>

In [76]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras import layers

from keras.callbacks import EarlyStopping
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

def build_model(fully_conected, num_filters, kern_size):
    model1 = Sequential()
    e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=True)
    model1.add(e)
    model1.add(layers.Conv1D(num_filters, kern_size, activation='relu'))
    model1.add(Flatten())
    model1.add(Dense(fully_conected, activation='sigmoid'))
    model1.add(Dense(1, activation='sigmoid'))

    model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model1

In [77]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

param_grid = dict(num_filters=[32, 128, 144, 256],
                      kern_size=[3, 5, 7],
                      batch_size = [45,65,76,88],
                      fully_conected = [30, 60, 800], epochs = [15])

model = KerasClassifier(build_fn=build_model, epochs=15, validation_split=0.1,verbose=1)

grid = RandomizedSearchCV(estimator=model, param_distributions=param_grid,
                              cv=4, verbose=2, n_iter=5, n_jobs=1,scoring = 'accuracy')

grid_result = grid.fit(x_train, y_train, callbacks=[callback])


test_accuracy = grid.score(x_test, y_test)

# Save and evaluate results
s = ('Best Accuracy : {:.4f}\n{}\n\n\n')
output_string = s.format(
            grid_result.best_score_,
            grid_result.best_params_)
            
print(output_string)

Fitting 4 folds for each of 5 candidates, totalling 20 fits
[CV] num_filters=128, kern_size=7, fully_conected=800, epochs=15, batch_size=88 
Epoch 1/15


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
[CV]  num_filters=128, kern_size=7, fully_conected=800, epochs=15, batch_size=88, total=   5.1s
[CV] num_filters=128, kern_size=7, fully_conected=800, epochs=15, batch_size=88 
Epoch 1/15


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    5.1s remaining:    0.0s


Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=128, kern_size=7, fully_conected=800, epochs=15, batch_size=88, total=   3.7s
[CV] num_filters=128, kern_size=7, fully_conected=800, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=128, kern_size=7, fully_conected=800, epochs=15, batch_size=88, total=   5.2s
[CV] num_filters=128, kern_size=7, fully_conected=800, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=128, kern_size=7, fully_conected=800, epochs=15, batch_size=88, total=   3.9s
[CV] num_filters=144, kern_size=3, fully_conected=800, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=144, kern_size=3, fully_conected=800, epochs=15, batch_size=88, total=   4.5s
[CV] num_filters=144, kern_size=3, fully_conected=800, epochs=15, batch_size=88 
Epoch 1/15
Epoch 2/15
Epoch 00002: early stopping
[CV]  num_filters=144, kern_size=3, fully_

[CV]  num_filters=128, kern_size=3, fully_conected=30, epochs=15, batch_size=76, total=   6.7s
[CV] num_filters=128, kern_size=3, fully_conected=30, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  num_filters=128, kern_size=3, fully_conected=30, epochs=15, batch_size=76, total=   5.6s
[CV] num_filters=128, kern_size=3, fully_conected=30, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=128, kern_size=3, fully_conected=30, epochs=15, batch_size=76, total=   4.1s
[CV] num_filters=128, kern_size=3, fully_conected=60, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping
[CV]  num_filters=128, kern_size=3, fully_conected=60, epochs=15, batch_size=76, total=   4.1s
[CV] num_filters=128, kern_size=3, fully_conected=60, epochs=15, batch_size=76 
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping
[CV]  num_filters=128, kern_size=3

[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed:  1.5min finished


Epoch 2/15
Epoch 00002: early stopping
Best Accuracy : 0.8083
{'num_filters': 128, 'kern_size': 3, 'fully_conected': 60, 'epochs': 15, 'batch_size': 76}





In [79]:
model = Sequential()
e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=True)
model.add(e)
model.add(layers.Conv1D(128, 3, activation='relu'))
model.add(Flatten())
model.add(Dense(60, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [80]:
x_train, x_test, y_train, y_test = \
train_test_split(padded_docs, tweets['target'], test_size = 0.25, random_state = 123)
callback = EarlyStopping(monitor = 'val_loss', patience = 1, verbose=1)
callbacks = [callback]

model.fit(x_train, y_train,
          validation_data=(x_test, y_test),
          batch_size=76,
          epochs=15,
          verbose=2,
          callbacks=callback)

Epoch 1/15
74/74 - 1s - loss: 0.5077 - accuracy: 0.7516 - val_loss: 0.4315 - val_accuracy: 0.8112
Epoch 2/15
74/74 - 1s - loss: 0.3732 - accuracy: 0.8348 - val_loss: 0.4110 - val_accuracy: 0.8289
Epoch 3/15
74/74 - 1s - loss: 0.2807 - accuracy: 0.8874 - val_loss: 0.4254 - val_accuracy: 0.8219
Epoch 00003: early stopping


<tensorflow.python.keras.callbacks.History at 0x1e9752a50>

In [81]:
model1 = Sequential()
e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=31, trainable=True)
model1.add(e)
model1.add(layers.Conv1D(128, 3, activation='relu'))
model1.add(Flatten())
model1.add(Dense(60, activation='sigmoid'))
model1.add(Dense(1, activation='sigmoid'))

model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [82]:
model1.fit(padded_docs, tweets['target'],
          batch_size=76,
          epochs=3,
          verbose=2)

Epoch 1/3
98/98 - 2s - loss: 0.4888 - accuracy: 0.7657
Epoch 2/3
98/98 - 2s - loss: 0.3660 - accuracy: 0.8446
Epoch 3/3
98/98 - 2s - loss: 0.2684 - accuracy: 0.8931


<tensorflow.python.keras.callbacks.History at 0x1ffb6a650>

In [83]:
test_result = model1.predict(padded_tests)

In [84]:
test_result

array([[0.8230183 ],
       [0.87129927],
       [0.86996144],
       ...,
       [0.8257675 ],
       [0.9024044 ],
       [0.27452403]], dtype=float32)

In [85]:
submit = []

for i in test_result:
    if i >= 0.5 :
        submit.append(1)
    else:
        submit.append(0)

submit

[1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,


In [86]:
tests['target'] = submit

In [87]:
submit_df = tests[['id', 'target']]

In [88]:
submit_df

Unnamed: 0,id,target
0,0,1
1,2,1
2,3,1
3,9,1
4,11,1
...,...,...
3258,10861,1
3259,10865,1
3260,10868,1
3261,10874,1


In [89]:
submit_df.to_csv('submit_prueba_37.csv', index=False)