In [1]:
from time import time
import random
import pandas as pd
import numpy as np

In [2]:


df_raw=pd.read_csv('../Dataset/training_tweets.csv',encoding = "ISO-8859-1", header=None)

 # As the data has no column titles, we will add our own
df_raw.columns = ["label", "time", "date", "query", "username", "text"]

df = df_raw[['label', 'text']]
df_pos = df[df['label'] == 4]
df_neg = df[df['label'] == 0]
print(len(df_pos), len(df_neg))

df_pos = df_pos.iloc[:int(len(df_pos)/3)]
df_neg = df_neg.iloc[:int(len(df_neg)/3)]
print(len(df_pos), len(df_neg))

# Concatenating both positive and negative groups and storing them back into a single dataframe
df = pd.concat([df_pos, df_neg])
len(df)

800000 800000
266666 266666


533332

In [2]:
from nltk.tokenize import TweetTokenizer

start_time = time()


# The reduce_len parameter will allow a maximum of 3 consecutive repeating characters, while trimming the rest
# For example, it will tranform the word: 'Helloooooooooo' to: 'Hellooo'
tk = TweetTokenizer(reduce_len=True)

In [4]:
data = []

# Separating our features (text) and our labels into two lists to smoothen our work
X = df['text'].tolist()
Y = df['label'].tolist()

In [6]:

# Building our data list, that is a list of tuples, where each tuple is a pair of the tokenized text
# and its corresponding label
for x, y in zip(X, Y):
    if y == 4:
        data.append((tk.tokenize(x), 1))
    else:
        data.append((tk.tokenize(x), 0))
        
# Printing the CPU time and the first 5 elements of our 'data' list
print('CPU Time:', time() - start_time)
data[:5]



CPU Time: 62.40374684333801


[(['I', 'LOVE', '@Health4UandPets', 'u', 'guys', 'r', 'the', 'best', '!', '!'],
  1),
 (['im',
   'meeting',
   'up',
   'with',
   'one',
   'of',
   'my',
   'besties',
   'tonight',
   '!',
   'Cant',
   'wait',
   '!',
   '!',
   '-',
   'GIRL',
   'TALK',
   '!',
   '!'],
  1),
 (['@DaRealSunisaKim',
   'Thanks',
   'for',
   'the',
   'Twitter',
   'add',
   ',',
   'Sunisa',
   '!',
   'I',
   'got',
   'to',
   'meet',
   'you',
   'once',
   'at',
   'a',
   'HIN',
   'show',
   'here',
   'in',
   'the',
   'DC',
   'area',
   'and',
   'you',
   'were',
   'a',
   'sweetheart',
   '.'],
  1),
 (['Being',
   'sick',
   'can',
   'be',
   'really',
   'cheap',
   'when',
   'it',
   'hurts',
   'too',
   'much',
   'to',
   'eat',
   'real',
   'food',
   'Plus',
   ',',
   'your',
   'friends',
   'make',
   'you',
   'soup'],
  1),
 (['@LovesBrooklyn2', 'he', 'has', 'that', 'effect', 'on', 'everyone'], 1)]

In [8]:
from nltk.tag import pos_tag
from nltk.stem.wordnet import WordNetLemmatizer

import re, string

In [3]:


# Stopwords are frequently-used words (such as “the”, “a”, “an”, “in”) that do not hold any meaning useful to extract sentiment.
# If it's your first time ever using nltk, you can download nltk's stopwords using: nltk.download('stopwords')
from nltk.corpus import stopwords
STOP_WORDS = stopwords.words('english')

# Defining a handy function in order to load a given glove file

def read_glove_vecs(glove_file):
    with open(glove_file, 'r', encoding="utf8") as f:
        words = set()
        word_to_vec_map = {}
        for line in f:
            line = line.strip().split()
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)
        
        i = 1
        words_to_index = {}
        index_to_words = {}
        for w in sorted(words):
            words_to_index[w] = i
            index_to_words[i] = w
            i = i + 1
    return words_to_index, index_to_words, word_to_vec_map

word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('glove.6B.50d.txt')

from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM, Bidirectional
from tensorflow.keras.layers import Embedding
from sklearn.model_selection import train_test_split

# Defining a function that will initialize and populate our embedding layer

def pretrained_embedding_layer(word_to_vec_map, word_to_index, max_len):
    vocab_len = len(word_to_index) + 1
    emb_dim = word_to_vec_map["unk"].shape[0] #50
    
    emb_matrix = np.zeros((vocab_len, emb_dim))
    
    for word, idx in word_to_index.items():
        emb_matrix[idx, :] = word_to_vec_map[word]
        
    embedding_layer = Embedding(vocab_len, emb_dim, trainable=False, input_shape=(max_len,))
    embedding_layer.build((None,))
    embedding_layer.set_weights([emb_matrix])
    
    return embedding_layer



In [4]:
# Further data cleaning
# A custom function defined in order to fine-tune the cleaning of the input text.
# This function is being "upgraded" such that it performs a more thourough cleaning of the data
# in order to better fit our words embedding layer
def cleaned(token):
    if token == 'u':
        return 'you'
    if token == 'r':
        return 'are'
    if token == 'some1':
        return 'someone'
    if token == 'yrs':
        return 'years'
    if token == 'hrs':
        return 'hours'
    if token == 'mins':
        return 'minutes'
    if token == 'secs':
        return 'seconds'
    if token == 'pls' or token == 'plz':
        return 'please'
    if token == '2morow' or token == '2moro' or token=='tmrw' or token=='tomorow':
        return 'tomorrow'
    if token == '2day':
        return 'today'
    if token == '4got' or token == '4gotten':
        return 'forget'
    if token in ['hahah', 'hahaha', 'hahahaha', 'hehehe', 'hahahah','hahahahaha']:
        return 'haha'
    if token == "mother's":
        return "mother"
    if token == "mom's":
        return "mom"
    if token == "dad's":
        return "dad"
    if token == 'bday' or token == 'b-day':
        return 'birthday'
    if token in ["i'm", "don't", "can't", "couldn't", "aren't", "wouldn't", "isn't", "didn't", "hadn't","doesn't", "won't", "haven't", "wasn't", "hasn't", "shouldn't", "ain't","weren't", "should've", "would've","could've" ,"here's","where's"]:
        return token.replace("'", "")
    if token in ['lmao', 'lolz', 'rofl']:
        return 'lol'
    if token == '<3':
        return 'love'
    if token == 'thanx' or token == 'thnx':
        return 'thanks'
    if token == 'goood':
        return 'good'
    if token in ['amp', 'quot', 'lt', 'gt', '½25', '..', '. .', '. . .']:
        return ''     
    if token == 'awsome' or token=='awsm':
        return 'awesome'
    if token in ["g'night","gn","gooodnight"]:
        return 'goodnight'
    if token == '#fb':
        return 'fb'
    if token in ['proly','prolly']:
        return 'probably'
    if token in ['omfg','omgg']:
        return 'omg'

    return token


# This function will be our all-in-one noise removal function
def remove_noise(tweet_tokens):

    cleaned_tokens = []

    for token in tweet_tokens:
        # Eliminating the token if it is a link
        token = re.sub(r'''(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))''', " ", token)
        # Eliminating the token if it is a mention
        token = re.sub("(@[A-Za-z0-9_]+)","", token)
        
        cleaned_token = cleaned(token.lower())
        
        if cleaned_token == "idk":
            cleaned_tokens.append('i')
            cleaned_tokens.append('dont')
            cleaned_tokens.append('know')
            continue
        if cleaned_token == "i'll":
            cleaned_tokens.append('i')
            cleaned_tokens.append('will')
            continue
        if cleaned_token == "you'll":
            cleaned_tokens.append('you')
            cleaned_tokens.append('will')
            continue
        if cleaned_token == "we'll":
            cleaned_tokens.append('we')
            cleaned_tokens.append('will')
            continue
        if cleaned_token == "it'll":
            cleaned_tokens.append('it')
            cleaned_tokens.append('will')
            continue
        #added
        if cleaned_token == "they'll" or cleaned_token== "they'l":
            cleaned_tokens.append('they')
            cleaned_tokens.append('will')
            continue
        if cleaned_token == "he'll" or cleaned_token== "he'l":
            cleaned_tokens.append('he')
            cleaned_tokens.append('will')
            continue
        if cleaned_token == "she'll" or cleaned_token== "she'l":
            cleaned_tokens.append('she')
            cleaned_tokens.append('will')
            continue
        
        if cleaned_token == "it's":
            cleaned_tokens.append('it')
            cleaned_tokens.append('is')
            continue
        if cleaned_token == "i've":
            cleaned_tokens.append('i')
            cleaned_tokens.append('have')
            continue
        if cleaned_token == "you've":
            cleaned_tokens.append('you')
            cleaned_tokens.append('have')
            continue
        if cleaned_token == "we've":
            cleaned_tokens.append('we')
            cleaned_tokens.append('have')
            continue
        if cleaned_token == "they've":
            cleaned_tokens.append('they')
            cleaned_tokens.append('have')
            continue
        if cleaned_token == "you're":
            cleaned_tokens.append('you')
            cleaned_tokens.append('are')
            continue
        if cleaned_token == "we're":
            cleaned_tokens.append('we')
            cleaned_tokens.append('are')
            continue
        if cleaned_token == "they're":
            cleaned_tokens.append('they')
            cleaned_tokens.append('are')
            continue
        if cleaned_token == "let's":
            cleaned_tokens.append('let')
            cleaned_tokens.append('us')
            continue
        if cleaned_token == "she's":
            cleaned_tokens.append('she')
            cleaned_tokens.append('is')
            continue
        if cleaned_token == "he's":
            cleaned_tokens.append('he')
            cleaned_tokens.append('is')
            continue
        if cleaned_token == "that's":
            cleaned_tokens.append('that')
            cleaned_tokens.append('is')
            continue
        if cleaned_token == "i'd":
            cleaned_tokens.append('i')
            cleaned_tokens.append('would')
            continue
        if cleaned_token == "you'd":
            cleaned_tokens.append('you')
            cleaned_tokens.append('would')
            continue
        if cleaned_token == "there's":
            cleaned_tokens.append('there')
            cleaned_tokens.append('is')
            continue
        if cleaned_token == "what's":
            cleaned_tokens.append('what')
            cleaned_tokens.append('is')
            continue
        if cleaned_token == "how's":
            cleaned_tokens.append('how')
            cleaned_tokens.append('is')
            continue
        if cleaned_token == "who's":
            cleaned_tokens.append('who')
            cleaned_tokens.append('is')
            continue
        if cleaned_token == "y'all" or cleaned_token == "ya'll":
            cleaned_tokens.append('you')
            cleaned_tokens.append('all')
            continue
        #added by me
        if cleaned_token == "hadnt":
            cleaned_tokens.append('had')
            cleaned_tokens.append('not')
            continue
        if cleaned_token == "shouldnt":
            cleaned_tokens.append('should')
            cleaned_tokens.append('not')
            continue
        if cleaned_token == "werent":
            cleaned_tokens.append('were')
            cleaned_tokens.append('not')
            continue
        if cleaned_token == "shouldve":
            cleaned_tokens.append('should')
            cleaned_tokens.append('have')
            continue
        if cleaned_token == "wouldve":
            cleaned_tokens.append('would')
            cleaned_tokens.append('have')
            continue
        if cleaned_token=="tbh":
            cleaned_tokens.append('to')
            cleaned_tokens.append('be')
            cleaned_tokens.append('honest')
            continue
        if cleaned_token == "couldve":
            cleaned_tokens.append('could')
            cleaned_tokens.append('have')
            continue
       
        if cleaned_token.strip() and cleaned_token not in string.punctuation:
            cleaned_tokens.append(cleaned_token)
            
    return cleaned_tokens

# Prevewing the remove_noise() output
# print(remove_noise(data[0][0]))


In [8]:
start_time = time()

unks = []
UNKS = []

def cleared(word):
    res = ""
    prev = None
    for char in word:
        if char == prev: continue
        prev = char
        res += char
    return res

def sentence_to_indices(sentence_words, word_to_index, max_len, i):
    global X, Y
    sentence_indices = []
    for j, w in enumerate(sentence_words):
        try:
            index = word_to_index[w]
        except:
            UNKS.append(w)
            w = cleared(w)
            try:
                index = word_to_index[w]
            except:
                index = word_to_index['unk']
                unks.append(w)
        X[i, j] = index



In [25]:


cleaned_tokens_list = []

# Removing noise from all the data, using the newly defined function
for tokens, label in data:
    x = remove_noise(tokens)
    if x:
        cleaned_tokens_list.append((x, label))

print('Removed Noise, CPU Time:', time() - start_time)
start_time = time()

list_len = [len(i) for i, j in cleaned_tokens_list]
max_len = max(list_len)
print('max_len:', max_len)


X = np.zeros((len(cleaned_tokens_list), max_len))
Y = np.zeros((len(cleaned_tokens_list), ))


for i, tk_lb in enumerate(cleaned_tokens_list):
    tokens, label = tk_lb
    sentence_to_indices(tokens, word_to_index, max_len, i)
    Y[i] = label
    
print('Data Prepared for model, CPU Time:', time() - start_time)


print(X[:5])
print(Y[:5])



Removed Noise, CPU Time: 50.908814668655396
max_len: 162
Data Prepared for model, CPU Time: 3.566823720932007
[[185457. 226278. 394475. 169754.  58997. 357266.  74390.      0.      0.
       0.      0.      0.      0.      0.      0.      0.      0.      0.
       0.      0.      0.      0.      0.      0.      0.      0.      0.
       0.      0.      0.      0.      0.      0.      0.      0.      0.
       0.      0.      0.      0.      0.      0.      0.      0.      0.
       0.      0.      0.      0.      0.      0.      0.      0.      0.
       0.      0.      0.      0.      0.      0.      0.      0.      0.
       0.      0.      0.      0.      0.      0.      0.      0.      0.
       0.      0.      0.      0.      0.      0.      0.      0.      0.
       0.      0.      0.      0.      0.      0.      0.      0.      0.
       0.      0.      0.      0.      0.      0.      0.      0.      0.
       0.      0.      0.      0.      0.      0.      0.      0.      0.
  

In [26]:
unk = word_to_index['unk']

n_unk_words = 0

for x in X:
    for y in x:
        if y == unk:
            n_unk_words += 1

print(n_unk_words)

from collections import Counter
Counter(unks).most_common(50)

156220


[('#folowfriday', 1500),
 (':/', 928),
 ('(:', 604),
 ('. .', 511),
 ('tweps', 469),
 (":'(", 446),
 (';-)', 396),
 ('->', 365),
 ('awh', 364),
 ("today's", 363),
 ('iï', 359),
 ('2morow', 350),
 ('d:', 348),
 ('#asot40', 333),
 ('urgh', 271),
 ('ahaha', 266),
 ('<-', 258),
 ('sux', 250),
 ('8:', 236),
 ('yey', 234),
 ('retwet', 232),
 ('bleh', 219),
 ('probs', 214),
 ("friend's", 199),
 ('twiterverse', 194),
 ("everyone's", 192),
 ('damnit', 191),
 ('woho', 185),
 ('=/', 180),
 (':-d', 176),
 ('itï', 175),
 ('=]', 168),
 ('#delongeday', 167),
 ('bestie', 164),
 ('lï', 160),
 ('twiterland', 159),
 ('twiterbery', 151),
 ('everyones', 151),
 ('hayfever', 151),
 ('cï', 148),
 (':\\', 147),
 ("it'd", 146),
 ('mï', 143),
 ('):', 138),
 ("that'l", 138),
 ('ahah', 137),
 ("night's", 135),
 ('nï', 133),
 ('folowfriday', 132),
 ('xox', 131)]

In [9]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0, stratify=Y)
print("splitted")

print(len(X_train))
print(len(X_test))


splitted
426665
106667


In [21]:
word_to_index['i']

185457

In [25]:
import tensorflow
tensorflow.keras.backend.clear_session()
# gpu_devices = tensorflow.config.experimental.list_physical_devices("GPU")
# for device in gpu_devices:
#     tensorflow.config.experimental.set_memory_growth(device, True)

model_clean_data = Sequential()

model_clean_data.add(pretrained_embedding_layer(word_to_vec_map, word_to_index, max_len))
model_clean_data.add(Bidirectional(LSTM(units=128, return_sequences=True)))
model_clean_data.add(Bidirectional(LSTM(units=128, return_sequences=False)))
model_clean_data.add(Dense(units=1, activation='sigmoid'))

model_clean_data.summary()

model_clean_data.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

#500 works
model_clean_data.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs = 5, batch_size = 512, shuffle=True)
model_clean_data.save("BiLSTM.h5")

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 309, 50)           20000050  
_________________________________________________________________
bidirectional (Bidirectional (None, 309, 256)          183296    
_________________________________________________________________
bidirectional_1 (Bidirection (None, 256)               394240    
_________________________________________________________________
dense (Dense)                (None, 1)                 257       
Total params: 20,577,843
Trainable params: 577,793
Non-trainable params: 20,000,050
_________________________________________________________________
Epoch 1/5
 13/624 [..............................] - ETA: 6:06:21 - loss: 0.6832 - accuracy: 0.5557

KeyboardInterrupt: 

In [None]:
history = model_clean_data.history

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)

import matplotlib.pyplot as plt
plt.plot(epochs, acc, 'bo', label = 'Training Accuracy')
plt.plot(epochs, val_acc, 'r', label = 'Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
y_arrow = max(val_acc)
x_arrow = val_acc.index(y_arrow) + 1
plt.annotate(str(y_arrow)[:6],
             (x_arrow, y_arrow),
             xytext=(x_arrow + 5, y_arrow + .02),
             arrowprops=dict(facecolor='orange', shrink=0.05))
plt.xticks(epochs)

plt.figure()
plt.plot(epochs, loss, 'bo', label = 'Training Loss')
plt.plot(epochs, val_loss, 'r', label = 'Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.xticks(epochs)
plt.show()

model_clean_data.save("BiLSTM_4.h5")



In [5]:
import tensorflow
built_model=tensorflow.keras.models.load_model('BiLSTM_tune_1_rerun.h5')

In [18]:
def sentence_to_indices(sentence_words, max_len):
    X = np.zeros((max_len))
    sentence_indices = []
    for j, w in enumerate(sentence_words):
        try:
            index = word_to_index[w]
        except:
            w = cleared(w)
            try:
                index = word_to_index[w]
            except:
                index = word_to_index['unk']
        X[j] = index
    return X

def predict_sentiment(custom_tweet):
    # Convert the tweet such that it can be fed to the model
    x_input = sentence_to_indices(remove_noise(tk.tokenize(custom_tweet)), 162) #max_len=162 for our model final
    print(len(x_input))
    
    # Retrun the model's prediction
    return round(built_model.predict(np.array([x_input])).item(),3)




In [11]:
print(predict_sentiment("I'm not happy you're here"))

print(predict_sentiment("I'm glad you're here!"))

print(predict_sentiment("I'm sad you're here!"))


<class 'numpy.ndarray'>
0.011
<class 'numpy.ndarray'>
0.99
<class 'numpy.ndarray'>
0.006


In [19]:
print(predict_sentiment(" I have been through a lot of things which have affected me"))

162
0.123


In [9]:
print(predict_sentiment(" I am feeling a little alone lately"))

[185457.  52943. 146352.  43010. 223830.  52315. 217901.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.   

In [15]:
print(predict_sentiment(" I wish I could open up about it and talk to someone"))

[185457. 388583. 185457. 110156. 270501. 373317.  44608. 193716.  54718.
 352214. 360915. 337267.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.   

In [16]:
print(predict_sentiment(" There is a lot going on at work but I guess I can handle it by myself"))

[357640. 192973.  43010. 225985. 163745. 269798.  62065. 389836.  87775.
 185457. 168566. 185457.  90548. 172590. 193716.  88126. 254554.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.   

In [17]:
print(predict_sentiment(" There is a lot going on at work which I am not able to cope with"))

[357640. 192973.  43010. 225985. 163745. 269798.  62065. 389836. 386474.
 185457.  52943. 264550.  44493. 360915. 108724. 388711.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.   

In [47]:
print(predict_sentiment(" I'm quite happy with my family , they support me when I am down"))

0.745


In [46]:
print(predict_sentiment(" I am so glad to have such supportive friends"))

0.997


In [43]:
print(predict_sentiment(" I received a promotion yesterday!"))

0.893


In [21]:
print(predict_sentiment("my dog tommy died"))

0.0


In [22]:
print(predict_sentiment("the team i support lost the tournament"))

0.192


In [24]:
print(predict_sentiment("Oh come on it is not a bad thing"))

0.548


In [23]:
print(predict_sentiment("my mental health is not that bad"))

0.8


In [25]:
print(predict_sentiment("I can’t go nowhere tonight"))

0.487


In [26]:
print(predict_sentiment("The feeling he experienced is not insignificant"))

0.284


In [48]:
print(predict_sentiment("with this act, it will be his first and probably, the last movie."))

0.829


In [50]:
print(predict_sentiment("I am not that unhappy"))

0.412


In [17]:

l1=["i'm", "don't", "can't", "couldn't", "aren't", "wouldn't", "isn't", "didn't",
                 "doesn't", "won't", "haven't", "wasn't", "hasn't", "ain't", "would've"]

for i in l1:
   
    i=i.replace("'", "")
    print(i)
    print(word_to_index[i])

im
187631
dont
127708
cant
91041
couldnt
110159
arent
59057
wouldnt
390144
isnt
193408
didnt
123557
doesnt
126852
wont
389498
havent
174666
wasnt
383633
hasnt
174194
aint
49032
wouldve


KeyError: 'wouldve'

In [None]:
#hadnt, shouldnt, werent, shouldve, wouldve

'#folowfriday'

In [12]:
dict1={'social_anx':
           ["oh no", "that's bad"],
       'social_pos':
           ['damn','sad','horrible']
      }

In [21]:
import random
print(random.choice(dict1['depression']))

horrible
