# File 07: Applying Triple Thresholding on User Timeline Tweets


### Input Files :
- model-sa/model-gpu.yaml
- model-sa/model-weights-gpu.h5
- model-sa/tokenizer.pickle
- db/05-shortlisted-tweets.csv
- db/05-shortlisted-usernames.csv

### Output File:
- db/07-timeline-tweets-with-thresholding.csv

### Steps:
1. loading required libraries
1. loading model with weights
1. loading the tokenizer
1. loading timeline tweets
1. creating "X" array 
1. using the model to pred sentiment of each tweet
1. applying triple thresholding to avoid amibuity
1. update the main dataframe
1. get usernames from shortlisted-username dataframe
1. make list of sentiment and thresholded-sentiment for all the users
1. filter out users with less than 100 tweets
1. creating final user dataframe
1. saving final dataframe

In [88]:
# loading required libraries
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
from tqdm import tqdm
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from keras.models import model_from_yaml
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [71]:
# loading model with weights
def load_model(model, weight) :
    with open(model, 'r') as file:
        yaml_model = file.read()
    
    model = tf.keras.models.model_from_yaml(yaml_model)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.load_weights(weight)
    
    return model

model = load_model('model-sa/model-gpu.yaml', 'model-sa/model-weights-gpu.h5')
model.summary()

  config = yaml.load(yaml_string)


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 48, 128)           512000    
_________________________________________________________________
spatial_dropout1d_4 (Spatial (None, 48, 128)           0         
_________________________________________________________________
cu_dnnlstm_2 (CuDNNLSTM)     (None, 48, 196)           255584    
_________________________________________________________________
spatial_dropout1d_5 (Spatial (None, 48, 196)           0         
_________________________________________________________________
cu_dnnlstm_3 (CuDNNLSTM)     (None, 196)               308896    
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 394       
Total params: 1,076,874
Trainable params: 1,076,874
Non-trainable params: 0
____________________________________________

In [132]:
# loading the tokenizer
with open('model-sa/tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

In [133]:
# loading timeline tweets
df = pd.read_csv("db/05-shortlisted-tweets.csv")

In [134]:
# creating "X" array 
X = tokenizer.texts_to_sequences(df['TWEET'].values)
X = pad_sequences(X, 48)

In [135]:
# using the model to pred sentiment of each tweet
output = model.predict(X)
C = model.predict_classes(X)

In [142]:
# applying triple thresholding to avoid amibuity
triple = []
Y = []
for value in output :
    Y.append(value[1])
    if value[1] < 0.25 :
        triple.append(0)
    elif value[1] < 0.5 :
        triple.append(1)
    elif value[1] < 0.75 :
        triple.append(2)
    elif value[1] <= 1 :
        triple.append(3)

In [152]:
# update the main dataframe
df = pd.DataFrame(
    list(zip( df.USER.values.tolist(), df.TWEET.values.tolist(), Y, C, triple )),
    columns = [ 'USER', 'TWEET', 'PREDICTION', 'SENTIMENT',  'TRIPLE_THRESH' ]
)
df.head()

Unnamed: 0,USER,TWEET,PREDICTION,SENTIMENT,TRIPLE_THRESH
0,TLeC,Do like happy endings Our Story About Finding ...,0.770715,1,3
1,TLeC,Sharing story helping Sally find new home Stor...,0.746607,1,2
2,TLeC,Words image boost caregivers life Inspiring Qu...,0.935224,1,3
3,TLeC,Our continuing saga reading pleasure Story Abo...,0.788391,1,3
4,TLeC,To leave leave New Blog Post What If Your Agin...,0.620331,1,2


In [185]:
# get usernames from shortlisted-username dataframe
user = pd.read_csv("db/05-shortlisted-usernames.csv")
user = user.USER.values.tolist()

In [186]:
# make list of sentiment and thresholded-sentiment for all the users
index = []
count = []
for name in tqdm(user) :
    array = list(df.loc[df['USER'] == name].index)
    index.append(array)
    count.append(len(array))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5370/5370 [03:03<00:00, 29.25it/s]


In [187]:
# filter out users with less than 100 tweets
temp = pd.DataFrame(list(zip(user, count)), columns=['USER', 'COUNT'])
temp = temp.drop(list(temp.loc[temp['COUNT'] < 100].index))

In [194]:
# creating final user dataframe
triple = []
sentiment = []
usernames = temp.USER.values.tolist()
for name in tqdm(usernames) :
    frame = df.loc[df['USER'] == name]
    sentiment.append(frame.SENTIMENT.values.tolist())
    triple.append(frame.TRIPLE_THRESH.values.tolist())

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4912/4912 [02:47<00:00, 29.30it/s]


In [202]:
# saving final dataframe
final = pd.DataFrame(
    list(zip(temp.USER.values.tolist(), sentiment, triple)),
    columns = ['USER', 'SENTIMENT', 'TRIPLE_THRESH']
)
final.to_csv('db/07-timeline-tweets-with-thresholding.csv', index=False)