In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
pd.set_option("display.max_columns", None)
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm
from lib_file import lib_path
import re
import contractions
from langdetect import detect
from tensorflow.keras.models import load_model
from transformers import pipeline
from IPython.display import clear_output
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [3]:
import pickle
with open(file="models/tokens.pkl", mode="rb") as file:
    tokenizer = pickle.load(file=file)
register = pipeline("sentiment-analysis")
clear_output()
model=load_model("models/LongShortTermMemory_model.h5", compile=False)
class_labels = {0:'NEGATIVE', 1:'POSITIVE'}

In [4]:
def remove_emoticons(text):
    emoticons_to_remove = [":)", ":(", ":D", ";D", ":*", ":'(", ":/", "O:)", ":P", ":O", "&)", "^_^", ">:O", ":3", ">:(", "8|", "O.o", "-_-", "3:)", "<3", ":V", ":|]", "(^^^)", '<(")']
    for emoticon in emoticons_to_remove:
        text = text.replace(emoticon, '')
    return text

In [5]:
def text_cleaning(text):
    text = remove_emoticons(text)
    emoji_pattern = re.compile("["
                            u"\U0001F600-\U0001F64F"  # Emojis
                            u"\U0001F300-\U0001F5FF"  # Symbols & pictographs
                            u"\U0001F680-\U0001F6FF"  # Transport & map symbols
                            u"\U0001F700-\U0001F77F"  # Alphabetic presentation forms
                            u"\U0001F780-\U0001F7FF"  # Geometric shapes
                            u"\U0001F800-\U0001F8FF"  # Miscellaneous symbols"
                            u"\U0001F900-\U0001F9FF"  # Supplemental symbols & pictographs
                            u"\U0001FA00-\U0001FA6F"  # Extended-A
                            u"\U0001FA70-\U0001FAFF"  # Extended-B
                            u"\U0001F004-\U0001F0CF"  # Mahjong tiles
                            u"\U0001F170-\U0001F251"  # Enclosed characters
                            u"\U00020000-\U0002F73F"  # Chinese, Japanese, and Korean characters
                            u"\U000E0000-\U000E007F"  # Tags
                            "]+", flags=re.UNICODE)
    text = re.sub(emoji_pattern, '', text)
    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
    text = re.sub(r'(@\w+|#\w+)', '', text)
    text = contractions.fix(text)
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)

    try:
        lang = detect(text)
        if lang == 'en':
            return text.lower()
        else:
            return 'empty'
    except:
        return 'empty'

In [15]:
def text_prediction(text):
    text=text_cleaning(text)
    input_df = pd.DataFrame(data=[text], index=[0], columns=['cleaned_text'])
    num_data=tokenizer.texts_to_sequences(input_df["cleaned_text"].values)
    pad_text=pad_sequences(sequences=num_data,maxlen=50,padding="post",truncating="post")
    values, prediction = model.predict(pad_text),register(text)
    predicted_sentiment = prediction[0]['label']
    print(f"Cleaned Text: {text}","\n")
    print(f"Vectorized Text: {num_data}","\n")
    print(f"Padded Text: {pad_text}","\n")
    return predicted_sentiment

---

In [7]:
text = input("Enter your sentence to perform depression prediction:")

Enter your sentence to perform depression prediction:The day is very worst, everything happening against me


In [16]:
predicted_sentiment = text_prediction(text)
print(f"Predicted Sentiment is [- {predicted_sentiment} -]")

Cleaned Text: the day is very worst everything happening against me 

Vectorized Text: [[3, 35, 4, 106, 751, 316, 1723, 2128, 18]] 

Padded Text: [[   3   35    4  106  751  316 1723 2128   18    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0]] 

Predicted Sentiment is [- NEGATIVE -]
