In [1]:
import pandas as pd
import numpy as np

from keras.preprocessing import sequence
from keras.models import Sequential, Model
from keras.layers import LSTM, Dense, Embedding, Input, Dropout, Activation, GlobalMaxPool1D
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
dataset = 'C://Users//H//Desktop//Spring18//-AdvancedDatabases//Project//Project_Work//final_binary_dataset.csv'
data = pd.read_csv(dataset, encoding='utf-8-sig')

In [4]:
data.head()

Unnamed: 0,Tweet,For guns?,Location,For guns,Against guns,Makes no sense
0,@Mary_rnntt2 @lauren_hoggs You're blinded by y...,0.0,"Mountain Lakes, NJ",0,1,0
1,"Hey #MNLEG, don’t be idiotic. gun laws, not pa...",0.0,"Minneapolis, MN",0,1,0
2,"Gun owners, women, kids, all opposed to the ab...",0.0,"Houston, TX",0,1,0
3,@PhilipRucker Is he taking guns away from ment...,0.0,"Broomall, PA",0,1,0
4,Blissfully unaware that it’s too late: Califor...,1.0,"Teaneck, NJ",1,0,0


In [5]:
training = data.iloc[:200]
testing = data.iloc[200:]

In [6]:
labels = ['For guns', 'Against guns']
features_train = training[labels].values
features_test = testing[labels].values
tweet_train = training['Tweet']
tweet_test = testing['Tweet']

In [8]:
max_words = 20000
chunk_size = 200

def convert_embedding(text):
    tokenizer = Tokenizer(num_words=max_words)
    tokenizer.fit_on_texts(list(text))
    tokenized = tokenizer.texts_to_sequences(text)
    embedding = pad_sequences(tokenized, maxlen=chunk_size)   # Every Tweet is represented by vector of length 200
    return embedding

In [9]:
x_train = convert_embedding(tweet_train)
x_test = convert_embedding(tweet_test)

In [11]:
print("Building LSTM")
model = Sequential()
model.add(Embedding(max_words, 128))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(2, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

Building LSTM


In [12]:
print("Training LSTM")
model.fit(x_train, features_train, batch_size=32, epochs=20, validation_split=0.1)

Training LSTM
Train on 180 samples, validate on 20 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2099bdc6fd0>

In [15]:
inp = Input(shape=(chunk_size, ))
embed_size = 128
x1 = Embedding(max_words, embed_size)(inp)
x1 = LSTM(60, return_sequences=True, name='lstm_layer')(x1)
x1 = GlobalMaxPool1D()(x1)
x1 = Dropout(0.1)(x1)
x1 = Dense(50, activation='relu')(x1)
x1 = Dropout(0.1)(x1)
x1 = Dense(2, activation='sigmoid')(x1)
model = Model(inputs=inp, outputs=x1)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, features_train, epochs=5, batch_size=32, validation_data=(x_test, features_test))

Train on 200 samples, validate on 49 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x209ae67e860>

In [14]:
prediction = model.predict(x_test, batch_size=1024)
prediction

array([[0.36695078, 0.6234031 ],
       [0.36710042, 0.6242135 ],
       [0.37757   , 0.61846733],
       [0.37110582, 0.61395586],
       [0.36575592, 0.6261747 ],
       [0.36369658, 0.62905794],
       [0.35492986, 0.63483834],
       [0.35922006, 0.6315029 ],
       [0.3679899 , 0.6238169 ],
       [0.37044165, 0.6229391 ],
       [0.3611755 , 0.6291321 ],
       [0.37025034, 0.62524337],
       [0.3687732 , 0.6268966 ],
       [0.36466876, 0.6218073 ],
       [0.36980283, 0.6124285 ],
       [0.35718268, 0.6338981 ],
       [0.36181366, 0.62929714],
       [0.36944437, 0.62217516],
       [0.36119252, 0.62435883],
       [0.36447203, 0.6185644 ],
       [0.3700261 , 0.619133  ],
       [0.3584271 , 0.62629855],
       [0.36620885, 0.6185269 ],
       [0.35778505, 0.63377136],
       [0.36542153, 0.61585975],
       [0.3609792 , 0.62917817],
       [0.36823344, 0.6222783 ],
       [0.36551356, 0.6254545 ],
       [0.3619303 , 0.6260158 ],
       [0.36307952, 0.6269832 ],
       [0.