# 1 - Import Libraries

In [None]:
# Import Lib
import pickle
import json
import pandas as pd
import numpy as np
import tensorflow as tf

import nltk # Preprocess
import re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
nltk.download('stopwords') # Dwonload Text Preprocessing
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

# 2 - Load Files

In [None]:
# Load Models
loaded_model = tf.keras.models.load_model('bilstm')

# 3 - Model Inferencing

In [None]:
# Make List
twt = ['i want to sleep', 'do you forgive me?', 'i have not sleep for days', 'PLEASE HELP ME LOVE']

# Make Dataframe
tweet = pd.DataFrame()
tweet['content'] = twt
tweet.head()

Unnamed: 0,content
0,i want to sleep
1,do you forgive me?
2,i have not sleep for days
3,PLEASE HELP ME LOVE


## 3.1 Preprocess

In [None]:
# Define Lemmetizer and List of Stopwords
lemmetizer = WordNetLemmatizer()
stpwrd = list(set(stopwords.words('english')))

In [None]:
# Function for Text Preprocessing
def init_process(text):
  # Remove Mentions
  text = re.sub("@[A-Za-z0-9_]+", " ", text)
  
  # Remove Hashtags
  text = re.sub("#[A-Za-z0-9_]+", " ", text)
  
  # Remove Enter/Space
  text = re.sub(r"\\n", " ",text)
  
  # Remove Whitespace
  text = text.strip()

  # Remove Link
  text = re.sub(r"http\S+", " ", text)
  text = re.sub(r"www.\S+", " ", text)

  # Remove Characters That Are Not String
  text = re.sub("[^A-Za-z\s']", " ", text)

  # Remove RT
  text = re.sub("rt", " ",text)

  # Tokenizing
  tokens = word_tokenize(text)

  # Remove Stopwords
  text = ' '.join([word for word in tokens if word not in stpwrd])
  
  # Lemmetizing
  text = lemmetizer.lemmatize(text)
  
  return text

In [None]:
# Applying Text Preprocessing
tweet['processed_content'] = tweet['content'].apply(lambda x: init_process(x))

# Remove Words with Less Than 2 Letters
shortword = re.compile(r'\W*\b\w{1,3}\b')
tweet.processed_content=tweet.processed_content.apply(lambda x: shortword.sub('', x) )
tweet.head()

Unnamed: 0,content,processed_content
0,i want to sleep,want sleep
1,do you forgive me?,forgive
2,i have not sleep for days,sleep days
3,PLEASE HELP ME LOVE,PLEASE HELP LOVE


## 3.2 Predict

In [None]:
# Predict
emotion_prediction = loaded_model.predict(tweet['processed_content'])
predict_results = emotion_prediction.argmax(axis=1)
predict_results



array([3, 3, 3, 3])

In [None]:
# Show Results
tweet['predicted_emotion'] = predict_results
tweet

Unnamed: 0,content,processed_content,predicted_emotion
0,i want to sleep,want sleep,3
1,do you forgive me?,forgive,3
2,i have not sleep for days,sleep days,3
3,PLEASE HELP ME LOVE,PLEASE HELP LOVE,3
