# Neural Network Model Testing

This notebook tests the neural network model for sentiment prediction on news headlines.

In [29]:
# Import necessary libraries
import sys
import os
import pandas as pd
from pathlib import Path

# Add project root to path for imports
sys.path.append(os.path.abspath('../..'))

# Import project modules
from src.models.predict_model import ModelPredictor
from src.models.train_model import ModelTrainer
from src.config import *

## 1. Initialize the Model Predictor

We'll initialize the ModelPredictor class that will use our trained neural network model.

In [30]:
models = None
# model_dir = Path(MODEL_DIR)
# models = list(model_dir.glob('*rnn_*.pkl'))
# if not models:
#     model_dir = Path(EXPERIMENT_DIR)
#     models = list(model_dir.glob('*rnn_*.pkl'))
if not models:
    trainer = ModelTrainer()
    trainer.train_neural_network(batch_size=32)

# Initialize the predictor
predictor = ModelPredictor()

Starting RNN model training...
Class weights for balancing: {0: 1.0097222222222222, 1: 0.9807095642789694, 2: 1.0101431151868834}
Epoch 1/20
[1m182/182[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 30ms/step - accuracy: 0.5125 - loss: 1.0023 - val_accuracy: 0.8136 - val_loss: 0.5213 - learning_rate: 0.0010
Epoch 2/20
[1m182/182[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - accuracy: 0.8384 - loss: 0.4894 - val_accuracy: 0.8535 - val_loss: 0.4518 - learning_rate: 0.0010
Epoch 3/20
[1m182/182[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - accuracy: 0.8960 - loss: 0.3503 - val_accuracy: 0.8501 - val_loss: 0.4274 - learning_rate: 0.0010
Epoch 4/20
[1m182/182[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - accuracy: 0.9223 - loss: 0.2596 - val_accuracy: 0.8549 - val_loss: 0.4135 - learning_rate: 0.0010
Epoch 5/20
[1m182/182[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 25ms/step - accuracy: 0.9397 - loss: 0.2206 

## 2. Test Single Headline Prediction

Let's test the model on a single headline first to check if everything is working.

In [None]:
# Test with a single positive headline
test_headline = "Company profits exceed expectations in Q1 2025"
result = predictor.predict_neural_network(test_headline)

# Display the result
if result:
    r = result[0]  # Get the first result
    print(f"\nHeadline: {r['headline']}")
    print(f"Predicted Sentiment: {r['sentiment']}")
    print(f"Confidence: {r['confidence']:.2f}")
    
    print("\nAll Probabilities:")
    for sentiment, prob in r['probabilities'].items():
        print(f"- {sentiment}: {prob:.2f}")
else:
    print("Prediction failed or no model found.")

## 3. Test Multiple Headlines

Now let's test the model on multiple headlines with different expected sentiments.

In [None]:
# Test with multiple headlines
test_headlines = [
    "Stock market reaches all-time high as investor confidence grows",
    "Major company announces significant layoffs due to economic downturn",
    "Global trade continues at steady pace despite mild fluctuations",
    "Tech giant releases new product line with innovative features",
    "Retail sales decline for third consecutive quarter"
]

results = predictor.predict_neural_network(test_headlines)

# Display the results
if results:
    for r in results:
        print(f"Headline: {r['headline']}")
        print(f"Predicted Sentiment: {r['sentiment']} (confidence: {r['confidence']:.2f})")
        print()
else:
    print("Prediction failed or no model found.")

## 4. Test on Real Dataset

Let's load a sample of the test dataset and predict sentiments.

In [None]:
# Load test dataset
test_data_path = os.path.join(EXTERNAL_DATA_PATH, "test_data.csv")
test_df = pd.read_csv(test_data_path)

print(f"Loaded test data with {len(test_df)} headlines")

# Show a few examples
test_df.head(100)


In [None]:
# Make predictions
headlines = test_df['News Headline'].tolist()
results = predictor.predict_neural_network(headlines)

# Create a dataframe with predictions
if results:
    predicted_sentiments = [r['sentiment'] for r in results]
    confidence_scores = [round(r['confidence'], 2) for r in results]
    
    # Add predictions to the dataframe
    results_df = test_df.copy()
    results_df = results_df.rename(columns={'Sentiment': 'Actual Sentiment'})
    results_df['Predicted Sentiment'] = predicted_sentiments
    results_df['Confidence'] = confidence_scores
    
    print(f"Predictions completed")
else:
    print("Prediction failed or no model found.")

# Show some results
results_df.head(3)

## 5. Model Evaluation

Let's evaluate the model's performance on the test dataset.

In [None]:
from sklearn.metrics import classification_report

# Calculate metrics
y_true = results_df['Actual Sentiment']
y_pred = results_df['Predicted Sentiment']

# Print classification report
print(classification_report(y_true, y_pred))

In [None]:
true = results_df['Predicted Sentiment'] == results_df['Actual Sentiment']
true.value_counts()