#### <font color = 'green'> Emotion Sentiment Analysis with NLP model and Industrialization

###### Import necessary libaries

###### pip install if not already installed

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import pickle
import contractions
import string
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, Bidirectional, GRU, Flatten,Dropout

[nltk_data] Downloading package stopwords to C:\Users\Olanrewaju
[nltk_data]     Adegoke\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Olanrewaju
[nltk_data]     Adegoke\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


###### Define the path to the folders containing the preprocessing and model artifacts

In [2]:
os.getcwd()

'C:\\Users\\Olanrewaju Adegoke\\Desktop\\TechTern\\nlp_projects\\nlp_project_sentiment_analysis\\notebooks'

In [3]:
data_path = '../data'
model_path = '../models'
note_path = '../notebooks'

###### Function for cleaning the `text`

In [4]:
def convert_to_lower(features):
    lower_case_text = features.lower()
    return lower_case_text 

def remove_contractions(features):
    text_without_contraction = contractions.fix(features)
    return text_without_contraction

def remove_punctuation(features):
    string_punc = string.punctuation
    text_without_punc = "".join([word for word in features if word not in string_punc])
    return text_without_punc

def remove_number(features):
    num_removed = ''.join([char for char in features if not char.isdigit()])
    return num_removed

def remove_symbols(features):
    word_list = []
    words = features.split()
    for word in words:
        no_symbol = ''.join([char for char in word if char.isalpha()])
        word_list.append(no_symbol)
        text_no_sym = ' '.join([word for word in word_list])   
    return text_no_sym

def remove_stopwords(features):
    stop_words = stopwords.words('english')
    words = features.split()
    no_stop_word = ' '.join([word for word in words if word not in stop_words])
    return no_stop_word

def stemming(features):
    stemmer = PorterStemmer()
    tokens = [token for token in features.split()]
    stemmed_word = ' '.join([stemmer.stem(word) for word in tokens])
    return stemmed_word 

def lemmatization(features):
    lemmer =  WordNetLemmatizer()
    tokens = [token for token in features.split()]
    lemmed_word = ' '.join([lemmer.lemmatize(word) for word in tokens])
    return lemmed_word 

###### Preprocessing function that automate cleaning task

In [5]:
def text_preprocessor(features, mode=True):
    '''
    This function preprocessing a text data with the following sequence:
    convert to lower
    remove contractions
    remove punctuations
    remove numbers within text
    remove alpha-numeric symbols
    remove stopwords
    get the root word by stemming if mode = True or lemmatization if mode = False.
    Returns a cleaned text.

    ***Developed by Olanrewaju Adegoke***
    '''
    features = convert_to_lower(features)
    features = remove_contractions(features)
    features = remove_punctuation(features)
    features = remove_number(features)
    features = remove_symbols(features)
    features = remove_stopwords(features)
    if mode == True:
        features = stemming(features)
    else:
        features = lemmatization(features)
    return features   

###### Function that load the model and preprocessing artifacts

In [6]:
def load_feature_tokenizer_artifact():
    os.chdir(model_path)
    with open('feature_tokenizer_and_label_encoder.pkl', 'rb') as file:
        token_label = pickle.load(file)
    return token_label

In [7]:
def load_tokenizer_and_model_artifact():
    os.chdir(model_path)
    with open('keras_models_with_tokenization.pkl', 'rb') as file:
        tokenizer_model = pickle.load(file)
    return tokenizer_model

###### Instantiate all the instances of the artifacts

In [8]:
os.chdir(model_path)
tokenizer_model = load_tokenizer_and_model_artifact()
token_label = load_feature_tokenizer_artifact()
os.chdir(note_path)
best_model_tokenized = tokenizer_model['RNN(LSTM_IMPROVED)']
tokenizer = token_label['feature_tokenizer']

  saveable.load_own_variables(weights_store.get(inner_path))
  saveable.load_own_variables(weights_store.get(inner_path))


###### The prediction and inference pipeline

In [9]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
best_model_tokenized
tokenizer
maxlen_pad = 19

def model_industrialization_tokenizer(text, padding=True):
    
    labels = ['anger', 'fear', 'joy', 'love', 'sadness', 'surprise']
    
    text = text_preprocessor(features=text, mode=True)
    
    # convert string of text to list
    if isinstance(text, str):
        text = [text]

    test_feat_seq = tokenizer.texts_to_sequences(text)
    
    if padding == True:
        padded_test_seq = pad_sequences(test_feat_seq, maxlen=maxlen_pad, padding='post')
    else:
        padded_test_seq = pad_sequences(test_feat_seq, maxlen=maxlen_pad, padding='pre')


    predictions = best_model_tokenized.predict(padded_test_seq)

    predicted_class = np.argmax(predictions, axis=1)[0]

    predicted_label = labels[predicted_class]

    max_prediction = np.max(predictions[0])

    #pred_df = pd.DataFrame({'predicted_emotions': [max_prediction], 'emotions': [predicted_label]}, columns=['predicted_emotions', 'emotions'])
    pred_df = pd.DataFrame({'emotions': [predicted_label]}, columns=['emotions'])

    return pred_df  

###### Install gradio GUI interface

###### Launch the gradio interface for industrializing the model

In [10]:
import gradio as gr

examples=[
        ["I'm so happy about my new job!"],
        ["I'm feeling really sad today."],
        ["This makes me so angry!"],
        ["I'm terrified of what might happen."],
        ["I love spending time with my family."],
        ["Wow, I didn't expect that at all!"]]


inputs=[
        gr.Textbox(placeholder="What is on your mind...") 
    ]

outputs = [gr.Dataframe(row_count = (1, "dynamic"), col_count=(1, "fixed"), label='Thoughts and Emotions', headers=['Emotion'])]

title='Thoughts and Emotions NLP model using Tokenization with TensorFlow by Olanrewaju Adegoke'
description='Express what is on your mind and I will predict your present state of emotions.'

interface = gr.Interface(fn = model_industrialization_tokenizer, inputs = inputs, outputs = outputs, title=title, description=description, examples=examples)

interface.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://d67bf1b77668688166.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


