In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.layers import Input, Dense, Embedding, Flatten, Dropout, LSTM
from keras.models import Model, Sequential
from keras.layers.convolutional import Conv1D
from keras.layers.pooling import MaxPooling1D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, accuracy_score, recall_score, f1_score
import seaborn as sn
from time import time
import os
import encoders
import models
%matplotlib inline

In [15]:
# Dataset
categories = ['Democrat', 'Republican']
tweetsdf = pd.read_csv('democratvsrepublicantweets/ExtractedTweets.csv')
handlesdf = pd.read_csv('democratvsrepublicantweets/TwitterHandles.csv')
raw_tweets = tweetsdf['Tweet']
normalized_tweets = raw_tweets.str.upper()
char_to_int, int_to_char = encoders.create_encoder_decoder(normalized_tweets.str.cat())
parties = tweetsdf['Party']
y = 1.0 - np.asarray(parties == 'Democrat')
max_tweet_len = normalized_tweets.str.len().max()
X = encoders.encode_strings(normalized_tweets, char_to_int, max_tweet_len)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [25]:
def model6(vocabulary_size, input_length):
    input_layer = Input(shape=(input_length,))
    x = Embedding(output_dim=32, input_dim=vocabulary_size, input_length=input_length)(input_layer)
    x = LSTM(2)(x)
    x = Dense(2, activation='softmax')(x)
    model = Model(input_layer, x)
    optimizer = Adam(lr=0.0003)
    model.compile(loss='binary_crossentropy', optimizer=optimizer)
    return 'model6-weights.hdf5', model

In [26]:
filepath, model = model6(len(char_to_int) + 1, max_tweet_len)
if filepath in os.listdir():
    model.load_weights(filepath)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         (None, 166)               0         
_________________________________________________________________
embedding_8 (Embedding)      (None, 166, 32)           16832     
_________________________________________________________________
lstm_7 (LSTM)                (None, 2)                 280       
_________________________________________________________________
dense_6 (Dense)              (None, 2)                 6         
Total params: 17,118
Trainable params: 17,118
Non-trainable params: 0
_________________________________________________________________


In [27]:
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
callbacks_list = [checkpoint, tensorboard]
model.fit(X_train, 
          np_utils.to_categorical(y_train), 
          epochs=100, 
          batch_size=1000, 
          callbacks=callbacks_list)

Epoch 1/100

Epoch 00001: loss improved from inf to 0.69361, saving model to model6-weights.hdf5
Epoch 2/100

Epoch 00002: loss improved from 0.69361 to 0.69272, saving model to model6-weights.hdf5
Epoch 3/100

Epoch 00003: loss did not improve from 0.69272
Epoch 4/100

Epoch 00004: loss improved from 0.69272 to 0.69272, saving model to model6-weights.hdf5
Epoch 5/100

Epoch 00005: loss improved from 0.69272 to 0.69272, saving model to model6-weights.hdf5
Epoch 6/100

Epoch 00006: loss improved from 0.69272 to 0.69270, saving model to model6-weights.hdf5
Epoch 7/100

Epoch 00007: loss improved from 0.69270 to 0.69266, saving model to model6-weights.hdf5
Epoch 8/100

Epoch 00008: loss improved from 0.69266 to 0.69262, saving model to model6-weights.hdf5
Epoch 9/100

Epoch 00009: loss improved from 0.69262 to 0.69201, saving model to model6-weights.hdf5
Epoch 10/100

Epoch 00010: loss improved from 0.69201 to 0.69033, saving model to model6-weights.hdf5
Epoch 11/100

Epoch 00011: loss im


Epoch 00048: loss improved from 0.67582 to 0.67573, saving model to model6-weights.hdf5
Epoch 49/100

Epoch 00049: loss improved from 0.67573 to 0.67558, saving model to model6-weights.hdf5
Epoch 50/100

Epoch 00050: loss improved from 0.67558 to 0.67459, saving model to model6-weights.hdf5
Epoch 51/100

Epoch 00051: loss improved from 0.67459 to 0.67437, saving model to model6-weights.hdf5
Epoch 52/100

Epoch 00052: loss improved from 0.67437 to 0.67377, saving model to model6-weights.hdf5
Epoch 53/100

Epoch 00053: loss did not improve from 0.67377
Epoch 54/100

Epoch 00054: loss improved from 0.67377 to 0.67318, saving model to model6-weights.hdf5
Epoch 55/100

Epoch 00055: loss improved from 0.67318 to 0.67284, saving model to model6-weights.hdf5
Epoch 56/100

Epoch 00056: loss improved from 0.67284 to 0.67267, saving model to model6-weights.hdf5
Epoch 57/100

Epoch 00057: loss improved from 0.67267 to 0.67241, saving model to model6-weights.hdf5
Epoch 58/100

Epoch 00058: loss di


Epoch 00096: loss improved from 0.65355 to 0.65292, saving model to model6-weights.hdf5
Epoch 97/100

Epoch 00097: loss improved from 0.65292 to 0.65284, saving model to model6-weights.hdf5
Epoch 98/100

Epoch 00098: loss improved from 0.65284 to 0.65214, saving model to model6-weights.hdf5
Epoch 99/100

Epoch 00099: loss did not improve from 0.65214
Epoch 100/100

Epoch 00100: loss improved from 0.65214 to 0.65179, saving model to model6-weights.hdf5


<keras.callbacks.History at 0x14aa5eeb8>