In [1]:
#Import required Libraries
import pandas as pd
import numpy as np

import re

import nltk 
#nltk.download('stopwords')
#nltk.download('wordnet')

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer


In [2]:
# Import 1.6 Millions rows dataset from csv file

cols = ["sentiment", "ids", "date", "flag", "user", "tweet"]
encoding = 'latin'
dataset = pd.read_csv('training.1600000.processed.noemoticon.csv',encoding=encoding,names=cols)


In [3]:
dataset.head(1600000)

Unnamed: 0,sentiment,ids,date,flag,user,tweet
0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,"@switchfoot http://twitpic.com/2y1zl - Awww, t..."
1,0,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,is upset that he can't update his Facebook by ...
2,0,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,@Kenichan I dived many times for the ball. Man...
3,0,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,my whole body feels itchy and like its on fire
4,0,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all...."
...,...,...,...,...,...,...
1599995,4,2193601966,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,AmandaMarie1028,Just woke up. Having no school is the best fee...
1599996,4,2193601969,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,TheWDBoards,TheWDB.com - Very cool to hear old Walt interv...
1599997,4,2193601991,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,bpbabe,Are you ready for your MoJo Makeover? Ask me f...
1599998,4,2193602064,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,tinydiamondz,Happy 38th Birthday to my boo of alll time!!! ...


In [4]:
# Let's substitute 0 to 0 and 4 to 1
dataset['sentiment'].replace({0:0,4:1},inplace = True)

In [5]:
# Clean the dataset

pattern = '@\S+|https?:\S+|http?:\S|[^A-Za-z]+|com|net'

stop_words = stopwords.words('english')
lemma = WordNetLemmatizer()

def preprocess(text):
    text = re.sub(pattern, ' ', str(text).lower()).strip()
    tokens = []
    for token in text.split():
        if token not in stop_words:
            tokens.append(lemma.lemmatize(token))
    return ' '.join(tokens)

In [6]:
dataset.tweet = dataset.tweet.apply(lambda x: preprocess(x))



In [7]:
dataset.head(1600000)

Unnamed: 0,sentiment,ids,date,flag,user,tweet
0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,awww bummer shoulda got david carr third day
1,0,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,upset update facebook texting might cry result...
2,0,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,dived many time ball managed save rest go bound
3,0,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,whole body feel itchy like fire
4,0,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,behaving mad see
...,...,...,...,...,...,...
1599995,1,2193601966,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,AmandaMarie1028,woke school best feeling ever
1599996,1,2193601969,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,TheWDBoards,thewdb cool hear old walt interview
1599997,1,2193601991,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,bpbabe,ready mojo makeover ask detail
1599998,1,2193602064,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,tinydiamondz,happy th birthday boo alll time tupac amaru sh...


In [8]:
# Split the dataset into Training and Test sets

from sklearn.model_selection import train_test_split

test_size = 0.1

dataset_train,dataset_test = train_test_split(dataset,test_size = test_size)

In [9]:
dataset_train_clean = dataset_train[['sentiment','tweet']]
dataset_test_clean = dataset_test[['sentiment','tweet']]

In [10]:
dataset_train_clean.head(1600000)

Unnamed: 0,sentiment,tweet
564746,0,already crappy day n barely started yet
1453999,1,going sunday market birkelunden first time ever
814882,1,goodmorning every rejoice today new day ur tro...
1005780,1,haha talking soup noodle lunch
235693,0,want get good grade something
...,...,...
742853,0,fell asleep really late woke really early good
1391730,1,dont know buy yet like sims
310813,0,make heat look like could change friday
233550,0,arthur smith last night silent disco awesome


In [11]:
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

max_length = 20
trunc_type = 'post'
padd_type = 'post'


tokens = Tokenizer()
tokens.fit_on_texts(dataset_train_clean['tweet'])

training_seq = tokens.texts_to_sequences(dataset_train_clean['tweet'])
X_train = pad_sequences(training_seq,maxlen = max_length,padding=padd_type,truncating=trunc_type)

testing_seq =  tokens.texts_to_sequences(dataset_test_clean['tweet'])
X_test = pad_sequences(testing_seq,maxlen = max_length,padding=padd_type,truncating=trunc_type)


In [12]:
#Declaring target labels 
y_train = dataset_train_clean['sentiment']
y_test = dataset_test_clean['sentiment']

In [13]:
#Converting everything to numpy arrays

X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

In [14]:
import gensim.models.keyedvectors as word2vec

word2vec_dict = word2vec.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin',binary = True)


embeddings_index = dict()
for word in word2vec_dict.vocab:
    embeddings_index[word] = word2vec_dict.word_vec(word)


In [15]:
vocab_size = len(tokens.word_index) + 1

embed_size = 300
embedding_matrix = np.zeros((vocab_size,embed_size))

for word,tok in tokens.word_index.items():
    if word in embeddings_index.keys():
        embedding_vector = embeddings_index[word]
        embedding_matrix[tok] =  embedding_vector


In [16]:
from tensorflow.keras.layers import Embedding,Dense,LSTM,Bidirectional
from tensorflow.keras.layers import BatchNormalization,Dropout

embedding_layer = Embedding(vocab_size,embed_size,weights = [embedding_matrix],input_length = max_length,trainable = False)

In [17]:
# Create a model

from tensorflow.keras import Sequential

model = Sequential()
model.add(embedding_layer)
model.add(Bidirectional(LSTM(64,return_sequences = True)))
model.add(BatchNormalization())
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.4))
model.add(Dense(64,activation = 'relu'))
model.add(Dense(32,activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dense(1,activation = 'sigmoid'))

model.compile(loss = 'binary_crossentropy',optimizer = 'adam',metrics = ['accuracy'])

In [18]:
# Training a model 
 
batch_size = 8192
num_epochs = 5

brain = model.fit(X_train,y_train,batch_size = batch_size,epochs = num_epochs,verbose = 1)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [19]:
# Evaluate the model

score = model.evaluate(X_test, y_test, batch_size=batch_size)
print("")
print(f"Accuracy of model is : {round(score[1]*100,2)}%")
print(f"Loss of model is :{round(score[0]*100,2)}%")


Accuracy of model is : 78.58%
Loss of model is :45.3%


In [20]:
# Ask a model to make prediction

def decode_sentiment(score):
    label = None
    if score <= 0.45:
        label = 'Negative'
    elif score >= 0.55:
        label = 'Positive'
    else:
        label = 'Neutral'
    
    return label



In [21]:
def predict(text):
    x_test = pad_sequences(tokens.texts_to_sequences([text]), maxlen=max_length,padding=padd_type,truncating=trunc_type)
    score = model.predict([x_test])[0]
    
    label = decode_sentiment(score)

    return {"label": label, "score": float(score)}

In [None]:
name = input('What is your name ?: ').upper()

print(f" Hello, {name}!, To Get Started Type 'Help or H'")

command = ""

while command != "quit" or command != "q":
    
    def goodbye():
        print(f"Goodbye, {name}!")
        
        
    def sentiment():
        message = input("What is your tweet ? ")
        dict = predict(message)
        score = round(float(dict['score']) * 100,2)
        sentiment = dict['label'].upper()
        print(f" This tweet is {sentiment} and Score of {score}%")
        print()
    
    
    
    command = input(": ").lower()

    if command == "help" or command == "h":
        print("""
            ----------------------------
                   Menu
            ----------------------------
            Tweet or T - To write Tweet
            Help  or H - To Display Menu
            Quit  or Q - To Exit Program
            ----------------------------
        """)

    elif command == "tweet" or command == "t":
        sentiment()
        
        
        while True:
            command01 = input("Do you Want to Enter Another Tweet? (Y/N): ").lower()
            print("")
            
            if command01 == "y":
                sentiment()
                
            elif command01 == "n":
                goodbye()
                break
            
            else:
                break
        
    elif command == "quit" or command == "q":
        goodbye()
        break

    else:
        print("Please, type 'Help' for proper input.!")



What is your name ?: MAYAGI NESTORY JOSEPHATY
 Hello, MAYAGI NESTORY JOSEPHATY!, To Get Started Type 'Help or H'
: Help

            ----------------------------
                   Menu
            ----------------------------
            Tweet or T - To write Tweet
            Help  or H - To Display Menu
            Quit  or Q - To Exit Program
            ----------------------------
        
: Tweet
What is your tweet ? This item is too expensive, I can't afford to buy it.
 This tweet is NEGATIVE and Score of 11.25%

Do you Want to Enter Another Tweet? (Y/N): Y

What is your tweet ? I like iphone 13 pro max, one day i will buy it.
 This tweet is POSITIVE and Score of 57.04%

Do you Want to Enter Another Tweet? (Y/N): Y

What is your tweet ? I always buy milkshake from this restaurant, it is too delicious.
 This tweet is POSITIVE and Score of 91.98%

Do you Want to Enter Another Tweet? (Y/N): N

Goodbye, MAYAGI NESTORY JOSEPHATY!
