In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re
from tensorflow.keras.callbacks import EarlyStopping
import warnings
warnings.filterwarnings('ignore')
import nltk
from nltk.tokenize import word_tokenize
from collections import Counter
from keras.preprocessing import sequence
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk import wordpunct_tokenize
from nltk import FreqDist
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\arsal\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\arsal\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
from keras.models import load_model, model_from_json

In [4]:
model = model_from_json(open("models/json/best_model.json", "r").read())
model.load_weights('models/json/best_model_weights.h5')

In [5]:
model = load_model('models/best/best_model.h5')

In [6]:
import pickle

with open('data/tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

In [7]:
# Define a dictionary of chat word mappings
chat_words = {
    "AFAIK": "As Far As I Know",
    "AFK": "Away From Keyboard",
    "ASAP": "As Soon As Possible",
    "ATK": "At The Keyboard",
    "ATM": "At The Moment",
    "A3": "Anytime, Anywhere, Anyplace",
    "BAK": "Back At Keyboard",
    "BBL": "Be Back Later",
    "BBS": "Be Back Soon",
    "BFN": "Bye For Now",
    "B4N": "Bye For Now",
    "BRB": "Be Right Back",
    "BRT": "Be Right There",
    "BTW": "By The Way",
    "B4": "Before",
    "B4N": "Bye For Now",
    "CU": "See You",
    "CUL8R": "See You Later",
    "CYA": "See You",
    "FAQ": "Frequently Asked Questions",
    "FC": "Fingers Crossed",
    "FWIW": "For What It's Worth",
    "FYI": "For Your Information",
    "GAL": "Get A Life",
    "GG": "Good Game",
    "GN": "Good Night",
    "GMTA": "Great Minds Think Alike",
    "GR8": "Great!",
    "G9": "Genius",
    "IC": "I See",
    "ICQ": "I Seek you (also a chat program)",
    "ILU": "ILU: I Love You",
    "IMHO": "In My Honest/Humble Opinion",
    "IMO": "In My Opinion",
    "IOW": "In Other Words",
    "IRL": "In Real Life",
    "KISS": "Keep It Simple, Stupid",
    "LDR": "Long Distance Relationship",
    "LMAO": "Laugh My A.. Off",
    "LOL": "Laughing Out Loud",
    "LTNS": "Long Time No See",
    "L8R": "Later",
    "MTE": "My Thoughts Exactly",
    "M8": "Mate",
    "NRN": "No Reply Necessary",
    "OIC": "Oh I See",
    "PITA": "Pain In The A..",
    "PRT": "Party",
    "PRW": "Parents Are Watching",
    "QPSA?": "Que Pasa?",
    "ROFL": "Rolling On The Floor Laughing",
    "ROFLOL": "Rolling On The Floor Laughing Out Loud",
    "ROTFLMAO": "Rolling On The Floor Laughing My A.. Off",
    "SK8": "Skate",
    "STATS": "Your sex and age",
    "ASL": "Age, Sex, Location",
    "THX": "Thank You",
    "TTFN": "Ta-Ta For Now!",
    "TTYL": "Talk To You Later",
    "U": "You",
    "U2": "You Too",
    "U4E": "Yours For Ever",
    "WB": "Welcome Back",
    "WTF": "What The F...",
    "WTG": "Way To Go!",
    "WUF": "Where Are You From?",
    "W8": "Wait...",
    "7K": "Sick:-D Laugher",
    "TFW": "That feeling when",
    "MFW": "My face when",
    "MRW": "My reaction when",
    "IFYP": "I feel your pain",
    "TNTL": "Trying not to laugh",
    "JK": "Just kidding",
    "IDC": "I don't care",
    "ILY": "I love you",
    "IMU": "I miss you",
    "ADIH": "Another day in hell",
    "ZZZ": "Sleeping, bored, tired",
    "WYWH": "Wish you were here",
    "TIME": "Tears in my eyes",
    "BAE": "Before anyone else",
    "FIMH": "Forever in my heart",
    "BSAAW": "Big smile and a wink",
    "BWL": "Bursting with laughter",
    "BFF": "Best friends forever",
    "CSL": "Can't stop laughing"
}

In [8]:
def replace_chat_words(text):
    words = text.split()
    for i, word in enumerate(words):
        if word.lower() in chat_words:
            words[i] = chat_words[word.lower()]
    return ' '.join(words)

def preprocess(text):
    
    text = replace_chat_words(text)

    # Remove non-alphanumeric characters
    text = re.sub(r'[^a-zA-Z\s]', '', text)

    # Convert text to lowercase
    text = text.lower()
    
    # Remove stopwords
    stop = stopwords.words('english')
    text = ' '.join([word for word in text.split() if word not in stop])
    
    # Remove numbers
    text = re.sub(r'\d+', '', text)
    
    # Remove extra whitespaces
    text = re.sub(r'\s+', ' ', text)
    
    # Remove punctuation
    text = re.sub(r'[^\w\s]', '', text)
    
    # Remove URLs
    text = re.sub(r'http\S+', '', text)
    
    # Apply stemming
    stemmer = PorterStemmer()
    words = word_tokenize(text)
    stemmed_words = [stemmer.stem(word) for word in words]
    text = ' '.join(stemmed_words)
    
    return text

In [9]:
str = 'I feel like I am very previledged to be a part of this society and want to contribute more to it.'

In [10]:
# Define a mapping dictionary
label_mapping = {'hate':0, 'neutral':1, 'anger':2, 'love':3, 'worry':4, 'relief':5, 'happiness':6,
       'fun':7, 'empty':8, 'enthusiasm':9, 'sadness':9, 'surprise':10}

emotion_dict = {v: k for k, v in label_mapping.items()}

def predict(text):
    text = preprocess(text)
    # Tokenize the preprocessed text
    X_sequences = tokenizer.texts_to_sequences([text])
    X_padded = pad_sequences(X_sequences, maxlen=79, padding='post')

    # Make predictions using the loaded model
    predict_proba = model.predict(X_padded)
    
    # Get the index of the class with the highest probability
    predicted_class_index = np.argmax(predict_proba)

    emotion = emotion_dict[predicted_class_index]

    return emotion

In [11]:
custom_text = "In the bustling city streets, she walked with a steady pace, her lovely gaze fixed on the ground ahead. Amidst the chaos, she remained untouched, a silent observer in the crowd. Her expression masked any hint of emotion, a veil of neutrality cloaking her thoughts and feelings from prying eyes and thats when I realized I am in love with her."

In [12]:
predicted_sentiment = predict(custom_text)

In [14]:
print("Predicted sentiment:", predicted_sentiment.title())

Predicted sentiment: Love


In [27]:
import pickle

with open('data/tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

In [25]:
text = preprocess(custom_text)

In [58]:
# Convert text sequences to padded sequences
X_sequences = tokenizer.texts_to_sequences([text])

In [59]:
X_sequences

[[867, 68, 18, 1105, 740, 1, 867, 503, 902]]

In [69]:
maxlen = max(len(tokens) for tokens in X_sequences)
X_padded = pad_sequences(X_sequences, maxlen=79, padding='post')

In [70]:
maxlen

9

In [71]:
# Define a mapping dictionary
label_mapping = {'hate':0, 'neutral':1, 'anger':2, 'love':3, 'worry':4, 'relief':5, 'happiness':6,
       'fun':7, 'empty':8, 'enthusiasm':9, 'sadness':9, 'surprise':10}

emotion_dict = {v: k for k, v in label_mapping.items()}

In [72]:
predicted_probabilities = model.predict(X_padded)

In [73]:
emotion_dict[np.argmax(predicted_probabilities)].title()

'Hate'

In [26]:
def get_text_info(text):
    words = wordpunct_tokenize(text)
    common_words = FreqDist(words).most_common(10)
    num_words = len(text.split())
    return common_words, num_words

In [27]:
common, num = get_text_info(text)

In [28]:
common

[('love', 2),
 ('bustl', 1),
 ('citi', 1),
 ('street', 1),
 ('walk', 1),
 ('steadi', 1),
 ('pace', 1),
 ('gaze', 1),
 ('fix', 1),
 ('ground', 1)]

In [29]:
num

32

In [30]:
count = {}
for emo, num in common:
    count[emo] = num

count = sorted(count.items(), key=lambda x: x[1], reverse=True)

top_three = [item[0] for item in count[:3]]

top_three

['love', 'bustl', 'citi']

In [34]:
def get_top(text):
    words = wordpunct_tokenize(text)
    common_words = FreqDist(words).most_common(10)
    num_words = len(text.split())
    
    count_words = {}
    for emo, num in common_words:
        count_words[emo] = num
    top = sorted(count_words.items(), key=lambda x: x[1], reverse=True)
    top = [item[0] for item in top[:5]]

    return top, num_words

In [35]:
common_words, num_words = get_top(text)

In [37]:
common_words

['love', 'bustl', 'citi', 'street', 'walk']

In [38]:
num_words

32

['love', 'bustl', 'citi']