In [1]:
from tensorflow.keras.models import load_model
import pickle
import numpy as np

# Load the saved models
cnn_model = load_model("cnn_model_sh.keras")
lstm_model = load_model("lstm_model_sh.keras")
rnn_model = load_model("rnn_model_sh.keras")
meta_model = load_model("meta_model_sh.keras")

# Load the tokenizer
with open("tokenizer_sh.pkl", "rb") as f:
    tokenizer = pickle.load(f)

# Load the label encoder
with open("label_encoder_sh.pkl", "rb") as f:
    label_encoder = pickle.load(f)

print("Models, Tokenizer, and Label Encoder loaded successfully!")

Models, Tokenizer, and Label Encoder loaded successfully!


In [10]:
import re
import pandas as pd
import spacy
import json
nlp = spacy.blank("hi")  # blank model for tokenization
with open("safe_stopwords_hi.json", "r", encoding="utf-8") as f:
    hindi_stopwords = set(json.load(f))

# Only remove non-Hindi characters and unwanted symbols
def preprocess_text(text):
    if pd.isna(text):
        return ""
    text = str(text).lower()
    # Remove only English letters, special symbols; keep Hindi and digits if needed
    text = re.sub(r'[a-zA-Z]', '', text)
    text = re.sub(r'[^\u0900-\u097F0-9\s]', '', text)  # Keep Hindi chars only
    text = re.sub(r'\s+', ' ', text).strip()
    #return text
# 2. Tokenizer using spaCy
    doc = nlp(text)
    tokens= [token.text for token in doc if token.text not in hindi_stopwords and token.text.strip() != ""]
    return " ".join(tokens)



In [11]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

def predict_category(headline):
    # Step 1: Preprocess the input headline
    headline = preprocess_text(headline)
    sequence = tokenizer.texts_to_sequences([headline])
    padded_sequence = pad_sequences(sequence, maxlen=200)

    # Step 2: Get predictions from base models
    cnn_pred = cnn_model.predict(padded_sequence)
    lstm_pred = lstm_model.predict(padded_sequence)
    rnn_pred = rnn_model.predict(padded_sequence)

    # Step 3: Stack predictions
    stacked_pred = np.concatenate([cnn_pred, lstm_pred, rnn_pred], axis=1)

    # Step 4: Meta-model final prediction
    final_pred = meta_model.predict(stacked_pred)

    # Step 5: Decode prediction
    predicted_class = label_encoder.inverse_transform([np.argmax(final_pred)])
    confidence = float(np.max(final_pred))

    return predicted_class[0], round(confidence, 3)



In [13]:
# Example headline
headline = "नयी फिल्म 'पठान' ने बॉक्स ऑफिस पर मचाया धमाल"

# Get the predicted category
category, conf=  predicted_category = predict_category(headline)

# Print the predicted category
print(f"The predicted category for the headline is: {predicted_category}({conf*100:.2f}% confidence)")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
The predicted category for the headline is: ('Entertainment', 0.997)(99.70% confidence)


In [14]:
!pip install flask


Collecting flask
  Using cached flask-3.1.0-py3-none-any.whl.metadata (2.7 kB)
Collecting itsdangerous>=2.2 (from flask)
  Using cached itsdangerous-2.2.0-py3-none-any.whl.metadata (1.9 kB)
Collecting blinker>=1.9 (from flask)
  Using cached blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Using cached flask-3.1.0-py3-none-any.whl (102 kB)
Using cached blinker-1.9.0-py3-none-any.whl (8.5 kB)
Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB)
Installing collected packages: itsdangerous, blinker, flask
Successfully installed blinker-1.9.0 flask-3.1.0 itsdangerous-2.2.0
