In [1]:
import pandas as pd
import numpy as np

from keras.preprocessing import sequence
from keras.models import Sequential, Model
from keras.layers import LSTM, Dense, Embedding, Input, Dropout, Activation, GlobalMaxPool1D
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
dataset = 'C://Users//H//Desktop//Spring18//-AdvancedDatabases//Project//Project_Work//final_dataset.csv'
data = pd.read_csv(dataset, encoding='utf-8-sig')
data.head()

Unnamed: 0,Tweet,For guns?,Location,For guns,Against guns,Makes no sense
0,@Mary_rnntt2 @lauren_hoggs You're blinded by y...,0.0,"Mountain Lakes, NJ",0,1,0
1,"Hey #MNLEG, don’t be idiotic. gun laws, not pa...",0.0,"Minneapolis, MN",0,1,0
2,"Gun owners, women, kids, all opposed to the ab...",0.0,"Houston, TX",0,1,0
3,@PhilipRucker Is he taking guns away from ment...,0.0,"Broomall, PA",0,1,0
4,Blissfully unaware that it’s too late: Califor...,1.0,"Teaneck, NJ",1,0,0


In [3]:
training = data.iloc[:340]
testing = data.iloc[340:]

In [4]:
labels = ['For guns', 'Against guns', 'Makes no sense']
features_train = training[labels].values
features_test = testing[labels].values
tweet_train = training['Tweet']
tweet_test = testing['Tweet']

In [5]:
max_words = 20000
chunk_size = 200

In [6]:
# Converting Tweets to Embeddings
def convert_embedding(text):
    tokenizer = Tokenizer(num_words=max_words)
    tokenizer.fit_on_texts(list(text))
    tokenized = tokenizer.texts_to_sequences(text)
    embedding = pad_sequences(tokenized, maxlen=chunk_size)   # Every Tweet is represented by vector of length 200
    return embedding

In [7]:
x_train = convert_embedding(tweet_train)
x_test = convert_embedding(tweet_test)

In [8]:
print("Building LSTM")
model = Sequential()
model.add(Embedding(max_words, 128))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(3, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

Building LSTM


In [9]:
print("Training LSTM")
model.fit(x_train, features_train, batch_size=32, epochs=20, validation_split=0.1)

Training LSTM
Train on 306 samples, validate on 34 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1fcd08c4470>

In [10]:
inp = Input(shape=(chunk_size, ))
embed_size = 128
x1 = Embedding(max_words, embed_size)(inp)
x1 = LSTM(60, return_sequences=True, name='lstm_layer')(x1)
x1 = GlobalMaxPool1D()(x1)
# x1 = Dropout(0.1)(x1)
x1 = Dense(50, activation='relu')(x1)
x1 = Dropout(0.1)(x1)
x1 = Dense(3, activation='sigmoid')(x1)
model = Model(inputs=inp, outputs=x1)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, features_train, epochs=5, batch_size=32, validation_data=(x_test, features_test))

Train on 340 samples, validate on 81 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1fcd40715c0>

In [11]:
prediction = model.predict(x_test, batch_size=1024)
prediction

array([[0.20159674, 0.42922023, 0.35911864],
       [0.19861096, 0.43909538, 0.3607603 ],
       [0.19732977, 0.42792493, 0.36550754],
       [0.20611168, 0.41040257, 0.38239464],
       [0.19534165, 0.45775244, 0.33277142],
       [0.19765311, 0.42484635, 0.37343448],
       [0.19530053, 0.4510795 , 0.3379714 ],
       [0.19821247, 0.43094257, 0.36551094],
       [0.19345771, 0.4346011 , 0.3625552 ],
       [0.20018955, 0.41410583, 0.37949583],
       [0.2012406 , 0.44914424, 0.34514642],
       [0.19469297, 0.39410302, 0.40365493],
       [0.19605087, 0.4040732 , 0.39480454],
       [0.19705611, 0.43424875, 0.3615165 ],
       [0.19365747, 0.41176164, 0.3858616 ],
       [0.20289554, 0.4187912 , 0.3789243 ],
       [0.19568655, 0.43475372, 0.35781354],
       [0.20257042, 0.43099728, 0.36225465],
       [0.19977431, 0.4414995 , 0.34871578],
       [0.19158438, 0.4239373 , 0.3744459 ],
       [0.20160803, 0.38509858, 0.40916687],
       [0.19441216, 0.45291114, 0.34294093],
       [0.

In [17]:
pd.options.display.max_colwidth = 1000
testing['Tweet']

340                                                                                                                                                                                        @RealJamesWoods So car fatalities exceed gun fatalities by a factor of 3 to 1. We should therefore get rid of cars before we get rid of guns! 
341     @NMPGVnow @SUPGVNetwork @EndNRA @CSGV @DamonForNM @SFPSdistrict Seems self destructive if rejecting money for the air rifle team, which teaches that traditional gun safety and marksmanship is important whereas idiotic pronouncements from wayne lapierre and dana loesch should be ignored. @NRA @DrCrifasi @DanielWWebster1 
342                                                                                                                                           @michaelianblack This proves that a hand gun didn’t mow down many. shot 3 that survived yes a tragedy but not a massacre if she had an AR-15 many would have died like the other shootings 
343       