# Importing Libraries..

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import re

### Load Dataset and check Dataset

In [7]:
df = pd.read_csv('Sentiment.csv')
df.head()

Unnamed: 0,id,candidate,candidate_confidence,relevant_yn,relevant_yn_confidence,sentiment,sentiment_confidence,subject_matter,subject_matter_confidence,candidate_gold,...,relevant_yn_gold,retweet_count,sentiment_gold,subject_matter_gold,text,tweet_coord,tweet_created,tweet_id,tweet_location,user_timezone
0,1,No candidate mentioned,1.0,yes,1.0,Neutral,0.6578,None of the above,1.0,,...,,5,,,RT @NancyLeeGrahn: How did everyone feel about...,,2015-08-07 09:54:46 -0700,629697200650592256,,Quito
1,2,Scott Walker,1.0,yes,1.0,Positive,0.6333,None of the above,1.0,,...,,26,,,RT @ScottWalker: Didn't catch the full #GOPdeb...,,2015-08-07 09:54:46 -0700,629697199560069120,,
2,3,No candidate mentioned,1.0,yes,1.0,Neutral,0.6629,None of the above,0.6629,,...,,27,,,RT @TJMShow: No mention of Tamir Rice and the ...,,2015-08-07 09:54:46 -0700,629697199312482304,,
3,4,No candidate mentioned,1.0,yes,1.0,Positive,1.0,None of the above,0.7039,,...,,138,,,RT @RobGeorge: That Carly Fiorina is trending ...,,2015-08-07 09:54:45 -0700,629697197118861312,Texas,Central Time (US & Canada)
4,5,Donald Trump,1.0,yes,1.0,Positive,0.7045,None of the above,1.0,,...,,156,,,RT @DanScavino: #GOPDebate w/ @realDonaldTrump...,,2015-08-07 09:54:45 -0700,629697196967903232,,Arizona


In [3]:
df['text']

0        RT @NancyLeeGrahn: How did everyone feel about...
1        RT @ScottWalker: Didn't catch the full #GOPdeb...
2        RT @TJMShow: No mention of Tamir Rice and the ...
3        RT @RobGeorge: That Carly Fiorina is trending ...
4        RT @DanScavino: #GOPDebate w/ @realDonaldTrump...
                               ...                        
13866    RT @cappy_yarbrough: Love to see men who will ...
13867    RT @georgehenryw: Who thought Huckabee exceede...
13868    RT @Lrihendry: #TedCruz As President, I will a...
13869    RT @JRehling: #GOPDebate Donald Trump says tha...
13870    RT @Lrihendry: #TedCruz headed into the Presid...
Name: text, Length: 13871, dtype: object

In [4]:
df["sentiment"].unique()

array(['Neutral', 'Positive', 'Negative'], dtype=object)

In [5]:
df.columns

Index(['id', 'candidate', 'candidate_confidence', 'relevant_yn',
       'relevant_yn_confidence', 'sentiment', 'sentiment_confidence',
       'subject_matter', 'subject_matter_confidence', 'candidate_gold', 'name',
       'relevant_yn_gold', 'retweet_count', 'sentiment_gold',
       'subject_matter_gold', 'text', 'tweet_coord', 'tweet_created',
       'tweet_id', 'tweet_location', 'user_timezone'],
      dtype='object')

In [14]:
df['text'].shape

(10729,)

In [13]:
df["text"].iloc

1    RT @ScottWalker: Didn't catch the full #GOPdeb...
3    RT @RobGeorge: That Carly Fiorina is trending ...
4    RT @DanScavino: #GOPDebate w/ @realDonaldTrump...
5    RT @GregAbbott_TX: @TedCruz: "On my first day ...
6    RT @warriorwoman91: I liked her and was happy ...
Name: text, dtype: object

In [8]:
df = df[["text" , "sentiment"]]
df.head()

Unnamed: 0,text,sentiment
0,RT @NancyLeeGrahn: How did everyone feel about...,Neutral
1,RT @ScottWalker: Didn't catch the full #GOPdeb...,Positive
2,RT @TJMShow: No mention of Tamir Rice and the ...,Neutral
3,RT @RobGeorge: That Carly Fiorina is trending ...,Positive
4,RT @DanScavino: #GOPDebate w/ @realDonaldTrump...,Positive


In [8]:
df['sentiment'].value_counts()

Negative    8493
Neutral     3142
Positive    2236
Name: sentiment, dtype: int64

In [9]:
df = df[df.sentiment != 'Neutral']
df['sentiment'].unique()

array(['Positive', 'Negative'], dtype=object)

# Q1. Print no.of pos and neg comments

In [25]:
df['sentiment'].value_counts()

Negative    8493
Positive    2236
Name: sentiment, dtype: int64

In [19]:
max_features= 2000
tokenizer = keras.preprocessing.text.Tokenizer(num_words=max_features , split=' ')
tokenizer.fit_on_texts(df['text'].values)
X = tokenizer.texts_to_sequences(df['text'].values)
X = keras.preprocessing.sequence.pad_sequences(X)

X.shape

(10729, 29)

In [17]:
X[77]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    3, 1554,   55,   56,  559,   18,  372,  594,   55,   35,
        559,   23,  376,    2,   13,   10,   11])

### splitting the data

In [15]:
y = pd.get_dummies(df['sentiment']).values
validation_size = 1500
train_x , test_x , train_y , test_y = train_test_split(X , y , test_size = 0.2 , random_state = 42 , shuffle = True)
X_valid , y_valid = test_x[:validation_size] , test_y[:validation_size]
test_x , test_y = test_x[validation_size:] , test_y[validation_size:]
train_x.shape , X_valid.shape , test_x.shape

((8583, 30), (1500, 30), (646, 30))

# Q2. Building the LSTM model


In [16]:
embed_dim = 128
lstm_out = 196

model = keras.models.Sequential([
    keras.layers.Embedding(max_features , embed_dim , input_length = X.shape[1]),
    keras.layers.SpatialDropout1D(0.3),
    keras.layers.LSTM(lstm_out , dropout = 0.2 , recurrent_dropout = 0.2),
    keras.layers.Dense(2 , activation = 'softmax')
])

model.compile(loss = 'categorical_crossentropy' , optimizer = 'adam' , metrics = ['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 30, 128)           256000    
                                                                 
 spatial_dropout1d (SpatialD  (None, 30, 128)          0         
 ropout1D)                                                       
                                                                 
 lstm (LSTM)                 (None, 196)               254800    
                                                                 
 dense (Dense)               (None, 2)                 394       
                                                                 
Total params: 511,194
Trainable params: 511,194
Non-trainable params: 0
_________________________________________________________________


### fitting the model

In [18]:
batch_size = 32
model.fit(train_x , train_y , batch_size = batch_size , epochs = 10 ,verbose = 2,  validation_data=(X_valid , y_valid))

Epoch 1/10
269/269 - 45s - loss: 0.1792 - accuracy: 0.9283 - val_loss: 0.4673 - val_accuracy: 0.8440 - 45s/epoch - 166ms/step
Epoch 2/10
269/269 - 55s - loss: 0.1627 - accuracy: 0.9357 - val_loss: 0.5410 - val_accuracy: 0.8367 - 55s/epoch - 206ms/step
Epoch 3/10
269/269 - 64s - loss: 0.1444 - accuracy: 0.9450 - val_loss: 0.4933 - val_accuracy: 0.8340 - 64s/epoch - 239ms/step
Epoch 4/10
269/269 - 65s - loss: 0.1390 - accuracy: 0.9434 - val_loss: 0.5763 - val_accuracy: 0.8340 - 65s/epoch - 242ms/step
Epoch 5/10
269/269 - 71s - loss: 0.1279 - accuracy: 0.9487 - val_loss: 0.6035 - val_accuracy: 0.8300 - 71s/epoch - 266ms/step
Epoch 6/10
269/269 - 65s - loss: 0.1219 - accuracy: 0.9512 - val_loss: 0.6759 - val_accuracy: 0.8360 - 65s/epoch - 240ms/step
Epoch 7/10
269/269 - 65s - loss: 0.1166 - accuracy: 0.9527 - val_loss: 0.6020 - val_accuracy: 0.8327 - 65s/epoch - 240ms/step
Epoch 8/10
269/269 - 51s - loss: 0.1066 - accuracy: 0.9561 - val_loss: 0.6394 - val_accuracy: 0.8333 - 51s/epoch - 191

<keras.callbacks.History at 0x276e9043850>

### evaluate the model

In [19]:
score , accuracy = model.evaluate(test_x , test_y , verbose = 2 , batch_size = batch_size)
print("score : %.2f"%score)
print("accuracy : %.2f"%accuracy)

21/21 - 0s - loss: 0.7052 - accuracy: 0.8282 - 404ms/epoch - 19ms/step
score : 0.71
accuracy : 0.83


# Q3. Checking positive or negative

### test a predicted tweet

#### Testcase 1 : 'He is a great leader.'

In [39]:
twt = ['He is a great leader.']
twt = tokenizer.texts_to_sequences(twt)
twt = keras.preprocessing.sequence.pad_sequences(twt , maxlen= 30 , dtype = 'int32' , value = 0)
print(twt)

[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0  29   6   8 148 335]]


In [40]:
sentiment = model.predict(twt , batch_size = None , verbose = 2)[0]

1/1 - 0s - 63ms/epoch - 63ms/step


In [41]:
if(np.argmax(sentiment) == 0):
    print("negative")
elif (np.argmax(sentiment) == 1):
    print("positive")

positive


#### Testcase 2 : 'He is a terrible leader'

In [35]:
twt = ['He is a terrible leader']
twt = tokenizer.texts_to_sequences(twt)
twt = keras.preprocessing.sequence.pad_sequences(twt , maxlen= 30 , dtype = 'int32' , value = 0)
print(twt)

[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0  29   6   8 994 335]]


In [36]:
sentiment = model.predict(twt , batch_size = None , verbose = 2)[0]

1/1 - 0s - 140ms/epoch - 140ms/step


In [37]:
if(np.argmax(sentiment) == 0):
    print("negative")
elif (np.argmax(sentiment) == 1):
    print("positive")

negative
