# File 07: Applying Thresholding on User Timeline Tweets


### Input Files :
- model-sa/model-gpu.yaml
- model-sa/model-weights-gpu.h5
- model-sa/tokenizer.pickle
- db/05-shortlisted-tweets.csv
- db/05-shortlisted-usernames.csv

### Output File:
- db/07-timeline-tweets-with-thresholding.csv

### Steps:
1. loading required libraries
1. loading model with weights
1. loading the tokenizer
1. loading timeline tweets
1. creating "X" array 
1. using the model to pred sentiment of each tweet
1. applying triple thresholding to avoid amibuity
1. saving final dataframe

In [88]:
# loading required libraries
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
from tqdm import tqdm
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from keras.models import model_from_yaml
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [71]:
# loading model with weights
def load_model(model, weight) :
    with open(model, 'r') as file:
        yaml_model = file.read()

    model = tf.keras.models.model_from_yaml(yaml_model)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.load_weights(weight)

    return model

model = load_model('sentiment-analysis-model/model-gpu.yaml', 'sentiment-analysis-model/weights-gpu.h5')
model.summary()

  config = yaml.load(yaml_string)


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 48, 128)           512000    
_________________________________________________________________
spatial_dropout1d_4 (Spatial (None, 48, 128)           0         
_________________________________________________________________
cu_dnnlstm_2 (CuDNNLSTM)     (None, 48, 196)           255584    
_________________________________________________________________
spatial_dropout1d_5 (Spatial (None, 48, 196)           0         
_________________________________________________________________
cu_dnnlstm_3 (CuDNNLSTM)     (None, 196)               308896    
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 394       
Total params: 1,076,874
Trainable params: 1,076,874
Non-trainable params: 0
____________________________________________

In [132]:
# loading the tokenizer
with open('sentiment-analysis-model/tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

In [133]:
# loading timeline tweets
df = pd.read_csv("db/05-shortlisted-tweets.csv")

In [134]:
# creating "X" array 
df.TWEET = df.TWEET.astype(str)
X = tokenizer.texts_to_sequences(df['TWEET'].values)
X = pad_sequences(X, 48)

In [135]:
# using the model to pred sentiment of each tweet
pred = model.predict(X)
polarity = model.predict_classes(X)
prediction = []
for value in tqdm(pred) :
    prediction.append(value[1])

In [142]:
# applying thresholding
thresh = []
for value in tqdm(prediction) :
    if value < 0.25 :
        thresh.append(0)
    elif value < 0.5 :
        thresh.append(1)
    elif value < 0.75 :
        thresh.append(2)
    elif value <= 1 :
        thresh.append(3)

In [None]:
# saving results
final = pd.DataFrame(
    list(zip( df.USER.values.tolist(), df.TWEET.values.tolist(), df.ORIGINAL.values.tolist(), prediction, polarity, thresh )),
    columns = [ 'USER', 'TWEET', 'ORIGINAL', 'PREDICTION', 'SENTIMENT', 'THRESHOLD' ]
)
final.to_csv('db/07-timeline-tweets-with-thresholding.csv', index=False)