In [19]:
import re
import string
import joblib
import numpy as np
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf

In [20]:
class ReduceMeanLayer(tf.keras.layers.Layer):
    def call(self, inputs):
        return tf.reduce_mean(inputs, axis=1)

In [21]:
tokenizer_save_path = 'tokenizer.joblib'
model_save_path = 'best_lstm_model.keras'

In [22]:
tokenizer = joblib.load(tokenizer_save_path)
model = load_model(model_save_path, custom_objects={'ReduceMeanLayer': ReduceMeanLayer})

In [23]:
def preprocess_text(text, tokenizer, max_length=100):
    text = text.lower()
    text = re.sub(r'http\S+|www\S+|https\S+|@\w+|#\w+', '', text)
    text = re.sub(r'@\w+', '', text)
    text = re.sub(r'\b\d+\b', '', text)
    text = ''.join([char for char in text if char not in string.punctuation])
    text = ' '.join([WordNetLemmatizer().lemmatize(word) for word in text.split() if word not in stopwords.words('english')])
    
    seq = tokenizer.texts_to_sequences([text])
    padded_seq = pad_sequences(seq, maxlen=max_length)
    
    return padded_seq

In [30]:
def predict_text(text):
    processed_text = preprocess_text(text, tokenizer)
    prediction = model.predict(processed_text)
    predicted_label = np.argmax(prediction, axis=1)[0]
    return 'Non Hate Speech' if predicted_label == 1 else 'Hate Speech'

In [32]:
for i in range(0, 4):


    input_text = str(input("Enter the text to predict : "))
    print(input_text)
    prediction = predict_text(input_text)
    print(f"The input text is classified as: {prediction}")

this movie is the worst, anyone who likes the movie is an asshole
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
The input text is classified as: Hate Speech
Just watched the latest release, and it was amazing! Can’t wait to see it again
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
The input text is classified as: Non Hate Speech
personally i dont like the movie, but it might be okay for others
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
The input text is classified as: Non Hate Speech
shit movie, shit actors, omg i cant believe i watched this #$$%$^^ movie from the theatre
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
The input text is classified as: Hate Speech
