# Import Libraries

In [1]:
# Import libraries
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import nltk                                  
from nltk.corpus import twitter_samples

# Twitter Samples using nltk

In [2]:
all_positive_tweets = twitter_samples.strings('positive_tweets.json')
all_negative_tweets = twitter_samples.strings('negative_tweets.json')

In [3]:
print('Total Positive Tweets', len(all_positive_tweets))
print('Total Negative Tweets', len(all_negative_tweets))

Total Positive Tweets 5000
Total Negative Tweets 5000


# Shuffle Tweet List

In [4]:
np.random.shuffle(all_positive_tweets)
np.random.shuffle(all_negative_tweets)

# Train Test Split

In [5]:
# Get asssign 80% of tweets to training set and 20% to test set
X_train_sent = all_positive_tweets[:4000] + all_negative_tweets[:4000]
X_test_sent = all_positive_tweets[4000:] + all_negative_tweets[4000:]

# Get label for train-test data. 1 indicates positive and 0 indicates negative
y_train = np.append(np.ones(4000), np.zeros(4000))
y_test = np.append(np.ones(1000), np.zeros(1000))

# Text Tokenize and Vectorize

In [6]:
from keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train_sent)

X_train = tokenizer.texts_to_sequences(X_train_sent)
X_test = tokenizer.texts_to_sequences(X_test_sent)

In [7]:
vocab_size = len(tokenizer.word_index) + 1

In [8]:
from keras.preprocessing.sequence import pad_sequences
X_train = pad_sequences(X_train, padding='post', maxlen=1000)
X_test = pad_sequences(X_test, padding='post', maxlen=1000)

# Model Building using RNN

In [9]:
model = keras.Sequential([
    layers.Embedding(
        input_dim=vocab_size,
        output_dim=128,
        input_length=1000),
    layers.Dropout(0.2),
    layers.Bidirectional(layers.LSTM(64)),
    layers.Dropout(0.2),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1, activation='sigmoid')
])

2022-01-13 09:55:43.026702: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [10]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 1000, 128)         2336000   
                                                                 
 dropout (Dropout)           (None, 1000, 128)         0         
                                                                 
 bidirectional (Bidirectiona  (None, 128)              98816     
 l)                                                              
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense (Dense)               (None, 32)                4128      
                                                                 
 dropout_2 (Dropout)         (None, 32)                0         
                                                        

In [11]:
history = model.fit(X_train, y_train,
                    epochs=10,
                    verbose=2,
                    validation_data=(X_test, y_test),
                    batch_size=32)

Epoch 1/10
250/250 - 133s - loss: 0.5383 - accuracy: 0.7168 - val_loss: 0.4482 - val_accuracy: 0.7895 - 133s/epoch - 532ms/step
Epoch 2/10
250/250 - 965s - loss: 0.3583 - accuracy: 0.8468 - val_loss: 0.4531 - val_accuracy: 0.8015 - 965s/epoch - 4s/step
Epoch 3/10
250/250 - 680s - loss: 0.2730 - accuracy: 0.8880 - val_loss: 0.4747 - val_accuracy: 0.7960 - 680s/epoch - 3s/step
Epoch 4/10
250/250 - 156s - loss: 0.2133 - accuracy: 0.9145 - val_loss: 0.5417 - val_accuracy: 0.7900 - 156s/epoch - 626ms/step
Epoch 5/10
250/250 - 154s - loss: 0.1670 - accuracy: 0.9341 - val_loss: 0.5872 - val_accuracy: 0.7850 - 154s/epoch - 615ms/step
Epoch 6/10
250/250 - 175s - loss: 0.1386 - accuracy: 0.9444 - val_loss: 0.6955 - val_accuracy: 0.7725 - 175s/epoch - 699ms/step
Epoch 7/10
250/250 - 148s - loss: 0.1178 - accuracy: 0.9515 - val_loss: 0.9286 - val_accuracy: 0.7745 - 148s/epoch - 591ms/step
Epoch 8/10
250/250 - 133s - loss: 0.0982 - accuracy: 0.9595 - val_loss: 0.9347 - val_accuracy: 0.7725 - 133s/e

# Prediction on Test set

In [12]:
y_pred = np.round(model.predict(X_test))

In [13]:
y_pred

array([[1.],
       [1.],
       [1.],
       ...,
       [0.],
       [0.],
       [0.]], dtype=float32)

# Model Performance

In [14]:
from sklearn.metrics import precision_recall_fscore_support as score

In [15]:
scores = score(y_test, y_pred, pos_label=0, average='binary')

print('Precision:', scores[0])
print('Recall:', scores[1])
print('F1-Score:', scores[2])

Precision: 0.7560270009643202
Recall: 0.784
F1-Score: 0.7697594501718215


# Sentiment Analysis on new tweets

In [16]:
# Find Sentiments of new tweets
def new_tweets(text):
    text = tokenizer.texts_to_sequences([text])
    text = pad_sequences(text, padding='post', maxlen=1000)
    output = np.round(model.predict(text))
    if output[0] == 1:
        print('Sentiment - This is a positive tweet!!!')
    else:
        print('Sentiment - This is a negative tweet!!!')

In [17]:
# Check tweets
print('To quit type: -1')

while True:
    text = input('Enter or copy a tweet: ')
    if text == '-1':
        break
    new_tweets(text)
    print()
    
print('Done, Thank you!')

To quit type: -1
Enter or copy a tweet: I really like that tweet. 
Sentiment - This is a positive tweet!!!

Enter or copy a tweet: I have to say I am very disappointed on you!
Sentiment - This is a negative tweet!!!

Enter or copy a tweet: Everyday is a new challenge but must say life is good. 
Sentiment - This is a positive tweet!!!

Enter or copy a tweet: -1
Done, Thank you!
