**Reload Embedding**

In [3]:
import numpy as np

X_train = np.load("X_train.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train.npy")
y_test = np.load("y_test.npy")

print("Embeddings Reloaded")

Embeddings Reloaded


In [7]:
from sentence_transformers import SentenceTransformer

# Load embedding model
embedder = SentenceTransformer('all-MiniLM-L6-v2')

In [8]:
X_train.shape

(120000, 384)

**LSTM Model**

In [9]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np

# Reshape input to (batch_size, time_steps, input_dim) → (24, 16)
X_train_reshaped = X_train.reshape(-1, 24, 16).astype(np.float32)
X_test_reshaped = X_test.reshape(-1, 24, 16).astype(np.float32)

# Ensure labels are int32
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)

# Define the LSTM model
model = models.Sequential([
    layers.LSTM(128, input_shape=(24, 16), return_sequences=False),
    layers.Dense(4, activation='softmax')  # 4 classes for AG News
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train_reshaped, y_train, epochs=5, batch_size=64, validation_data=(X_test_reshaped, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test_reshaped, y_test, batch_size=64)
print(f"LSTM Accuracy: {accuracy * 100:.2f}%")

  super().__init__(**kwargs)


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 50ms/step - accuracy: 0.6478 - loss: 0.8621 - val_accuracy: 0.8130 - val_loss: 0.5161
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 43ms/step - accuracy: 0.8259 - loss: 0.4791 - val_accuracy: 0.8453 - val_loss: 0.4329
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 48ms/step - accuracy: 0.8516 - loss: 0.4196 - val_accuracy: 0.8524 - val_loss: 0.4107
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 40ms/step - accuracy: 0.8649 - loss: 0.3822 - val_accuracy: 0.8624 - val_loss: 0.3932
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 45ms/step - accuracy: 0.8717 - loss: 0.3633 - val_accuracy: 0.8668 - val_loss: 0.3694
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.8654 - loss: 0.3821
LSTM Accuracy: 86.68%


**Save Model**

In [12]:
# Save the model

model.save("lstm_model.keras")

**Test Case**

In [17]:
import numpy as np
from sentence_transformers import SentenceTransformer
from tensorflow.keras.models import load_model
import re

# Load model and embedder
model = load_model("lstm_model.keras")
embedder = SentenceTransformer('all-MiniLM-L6-v2')
labels = ["World", "Sports", "Business", "Sci/Tech"]

# Function to split text into sentences
def split_into_sentences(text):
    # You can improve this with better NLP sentence segmentation later
    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
    return [s.strip() for s in sentences if s.strip()]

# Prediction for a single news text
def predict_news_category_lstm(news_text):
    embedded = embedder.encode([news_text])  # shape: (1, 384)
    lstm_input = np.array(embedded).reshape(1, 24, 16).astype(np.float32)
    predictions = model.predict(lstm_input)
    predicted_class = np.argmax(predictions, axis=1)[0]
    return labels[predicted_class]

# ---------- TEST CASES ----------

print("Choose test case:")
print("1. Classify a single news input")
print("2. Classify each sentence from a paragraph/article")
print("3. Extract only 'Sports' news from a paragraph/article")

choice = input("Enter 1, 2, or 3: ").strip()

if choice == "1":
    # Test Case 1
    news = input("Enter a news: ")
    category = predict_news_category_lstm(news)
    print("Predicted Category (LSTM):", category)

elif choice == "2":
    # Test Case 2
    paragraph = input("Enter a full paragraph/article: ")
    sentences = split_into_sentences(paragraph)
    print("\n🧠 Predictions for each sentence:")
    for i, sentence in enumerate(sentences):
        category = predict_news_category_lstm(sentence)
        print(f"{i+1}. [{category}] {sentence}")

elif choice == "3":
    # Test Case 3 (Generalized to any category)
    print("\nAvailable categories:", ", ".join(labels))
    target_category = input("Enter the category you want to extract: ").strip().title()

    if target_category not in labels:
        print("❌ Invalid category selected.")
    else:
        paragraph = input("Enter a full paragraph/article: ")
        sentences = split_into_sentences(paragraph)
        print(f"\n🔍 Extracted '{target_category}' News:")
        found = False
        for i, sentence in enumerate(sentences):
            category = predict_news_category_lstm(sentence)
            if category == target_category:
                found = True
                print(f"{i+1}. {sentence}")
        if not found:
            print(f"No '{target_category}' content found.")
else:
    print("❌ Invalid choice. Please enter 1, 2, or 3.")


Choose test case:
1. Classify a single news input
2. Classify each sentence from a paragraph/article
3. Extract only 'Sports' news from a paragraph/article

Available categories: World, Sports, Business, Sci/Tech

🔍 Extracted 'Business' News:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 416ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
2. The stock market crashed due to inflation.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step


In [1]:
pip show tensorflow

Name: tensorflow
Version: 2.19.0
Summary: TensorFlow is an open source machine learning framework for everyone.
Home-page: https://www.tensorflow.org/
Author: Google Inc.
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: C:\Users\junai\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages
Requires: absl-py, astunparse, flatbuffers, gast, google-pasta, grpcio, h5py, keras, libclang, ml-dtypes, numpy, opt-einsum, packaging, protobuf, requests, setuptools, six, tensorboard, termcolor, typing-extensions, wrapt
Required-by: tf_keras
Note: you may need to restart the kernel to use updated packages.
