In [10]:
import re
import string
import joblib
import numpy as np
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf

In [11]:
class ReduceMeanLayer(tf.keras.layers.Layer):
    def call(self, inputs):
        return tf.reduce_mean(inputs, axis=1)

In [12]:
#load tokenizer and model
tokenizer_save_path = 'tokenizer.joblib'
model_save_path = 'best_lstm_model.keras'

In [13]:
tokenizer = joblib.load(tokenizer_save_path)
model = load_model(model_save_path, custom_objects={'ReduceMeanLayer': ReduceMeanLayer})

In [14]:
# Function to preprocess the input text
def preprocess_text(text, tokenizer, max_length=100):
    text = text.lower()
    text = re.sub(r'http\S+|www\S+|https\S+|@\w+|#\w+', '', text)
    text = re.sub(r'@\w+', '', text)
    text = re.sub(r'\b\d+\b', '', text)
    text = ''.join([char for char in text if char not in string.punctuation])
    text = ' '.join([WordNetLemmatizer().lemmatize(word) for word in text.split() if word not in stopwords.words('english')])
    
    seq = tokenizer.texts_to_sequences([text])
    padded_seq = pad_sequences(seq, maxlen=max_length)
    
    return padded_seq

In [15]:
#function to make predictions
def predict_text(text):
    processed_text = preprocess_text(text, tokenizer)
    prediction = model.predict(processed_text)
    predicted_label = np.argmax(prediction, axis=1)[0]
    return 'Non Hate Speech' if predicted_label == 1 else 'Hate Speech'

In [18]:
for i in range(0, 5):
    input_text = str(input("Enter the text to predict: "))
    print(f"Input text {i + 1}: {input_text}")
    prediction = predict_text(input_text)
    print(f"The input text is classified as: {prediction}\n")

Input text 1: he moviwatche is literraly shit, dont watch this fckkngg%^%^% movie
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
The input text is classified as: Hate Speech

Input text 2: i dont like that black nigga
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
The input text is classified as: Hate Speech

Input text 3: movie for me was below average , although some might like it
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
The input text is classified as: Non Hate Speech

Input text 4: nice , i love it
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
The input text is classified as: Non Hate Speech

Input text 5: this movie is very gooooood
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
The input text is classified as: Non Hate Speech

