# Stock Price Prediction with LSTM - Example Usage

This notebook demonstrates how to use the stock prediction package to analyze stock prices and make future predictions.

## Features Demonstrated
- Data loading and preprocessing
- LSTM model training
- Price prediction
- Risk classification
- Visualization

## 1. Setup and Imports

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Add src directory to path
sys.path.append('../src')

from src.data_processor import StockDataProcessor
from src.model import LSTMModel
from src.predictor import StockPredictor, RiskClassifier
from src.visualizer import StockVisualizer
from src.utils import set_random_seeds, create_output_directories
import config

# Set random seeds for reproducibility
set_random_seeds(42)

# Create output directories
create_output_directories()

print("Setup complete!")

## 2. Load and Explore Data

In [None]:
# Initialize data processor
processor = StockDataProcessor(window_size=3)

# Load sample data (replace with your data path)
data_path = '../data/raw/sample_stock_data.csv'  # Update this path

# Create sample data if it doesn't exist
if not os.path.exists(data_path):
    print("Creating sample data...")
    
    # Generate sample stock data
    dates = pd.date_range('2010-01-01', '2021-12-31', freq='D')
    np.random.seed(42)
    prices = 100 + np.cumsum(np.random.randn(len(dates)) * 0.5)
    
    sample_data = pd.DataFrame({
        'Date': dates,
        'Close': prices,
        'Index': 'SAMPLE'
    })
    
    os.makedirs('../data/raw', exist_ok=True)
    sample_data.to_csv(data_path, index=False)
    print(f"Sample data created at {data_path}")

# Load and preprocess the data
df, scaler = processor.load_and_preprocess(data_path, 'SAMPLE')

print(f"Data shape: {df.shape}")
print(f"Date range: {df.index.min()} to {df.index.max()}")
df.head()

## 3. Visualize Historical Data

In [None]:
# Initialize visualizer
visualizer = StockVisualizer()

# Plot historical prices (normalized)
visualizer.plot_price_history(df, "Historical Stock Prices (Normalized)")

## 4. Prepare Data for Training

In [None]:
# Create windowed dataset
dates, X, y = processor.df_to_windowed_df_simple(df)

print(f"Windowed data shapes:")
print(f"Dates: {dates.shape}")
print(f"Features (X): {X.shape}")
print(f"Targets (y): {y.shape}")

# Split data into train/validation/test sets
data_splits = processor.split_data(dates, X, y)

print(f"\nData splits:")
print(f"Training: {data_splits['X_train'].shape[0]} samples")
print(f"Validation: {data_splits['X_val'].shape[0]} samples")
print(f"Test: {data_splits['X_test'].shape[0]} samples")

## 5. Visualize Data Splits

In [None]:
# Visualize the data splits
visualizer.plot_data_splits(
    data_splits['dates_train'], data_splits['y_train'],
    data_splits['dates_val'], data_splits['y_val'],
    data_splits['dates_test'], data_splits['y_test']
)

## 6. Train LSTM Model

In [None]:
# Initialize and train the model
model = LSTMModel(window_size=3, lstm_units=64, dense_units=32)

print("Training LSTM model...")
trained_model = model.train_simple(
    data_splits['X_train'], data_splits['y_train'],
    data_splits['X_val'], data_splits['y_val'],
    epochs=50  # Reduced for faster execution in notebook
)

print("Model training complete!")

## 7. Make Predictions and Evaluate

In [None]:
# Make predictions on all datasets
train_predictions = trained_model.predict(data_splits['X_train'], verbose=0).flatten()
val_predictions = trained_model.predict(data_splits['X_val'], verbose=0).flatten()
test_predictions = trained_model.predict(data_splits['X_test'], verbose=0).flatten()

# Evaluate model performance
from src.utils import calculate_metrics

train_metrics = calculate_metrics(data_splits['y_train'], train_predictions)
val_metrics = calculate_metrics(data_splits['y_val'], val_predictions)
test_metrics = calculate_metrics(data_splits['y_test'], test_predictions)

print("Model Performance:")
print(f"Train MAE: {train_metrics['MAE']:.6f}")
print(f"Validation MAE: {val_metrics['MAE']:.6f}")
print(f"Test MAE: {test_metrics['MAE']:.6f}")

print(f"\nTest RMSE: {test_metrics['RMSE']:.6f}")
print(f"Test MAPE: {test_metrics['MAPE']:.2f}%")

## 8. Visualize Model Performance

In [None]:
# Plot training results
visualizer.plot_training_results(
    data_splits['dates_train'], data_splits['y_train'], train_predictions,
    data_splits['dates_val'], data_splits['y_val'], val_predictions,
    data_splits['dates_test'], data_splits['y_test'], test_predictions
)

## 9. Risk Classification

In [None]:
# Initialize risk classifier
classifier = RiskClassifier()

# Denormalize test predictions for classification
test_predictions_denorm = scaler.inverse_transform(
    test_predictions.reshape(-1, 1)
).flatten()

# Generate daily and monthly classifications
daily_classification = classifier.classify_daily(
    data_splits['dates_test'], test_predictions_denorm, "SAMPLE"
)

monthly_classification = classifier.classify_monthly(
    data_splits['dates_test'], test_predictions_denorm, "SAMPLE"
)

print(f"Daily classifications: {len(daily_classification)} records")
print(f"Monthly classifications: {len(monthly_classification)} records")

# Show classification distribution
print("\nDaily Classification Distribution:")
print(daily_classification['Classification'].value_counts())

print("\nMonthly Classification Distribution:")
print(monthly_classification['Classification'].value_counts())

## 10. Visualize Classifications

In [None]:
# Plot daily classifications
visualizer.plot_daily_classifications(daily_classification)

# Plot monthly classifications
visualizer.plot_monthly_classifications(monthly_classification)

# Plot classification distribution
visualizer.plot_classification_distribution(
    daily_classification['Classification'],
    "Daily Risk Classification Distribution"
)

## 11. Future Price Predictions

In [None]:
# Initialize predictor
predictor = StockPredictor(trained_model, scaler)

# Predict future prices (next 60 days)
last_window = data_splits['X_test'][-1].flatten()
future_predictions_df = predictor.predict_future(last_window, days=60)

print(f"Future predictions shape: {future_predictions_df.shape}")
print("\nFirst 10 future predictions:")
print(future_predictions_df.head(10))

# Classify future predictions
classified_future_df = classifier.classify_future_predictions(future_predictions_df)

print("\nFuture classification distribution:")
print(classified_future_df['Classification'].value_counts())

## 12. Visualize Future Predictions

In [None]:
# Plot future predictions
visualizer.plot_future_predictions(future_predictions_df)

# Plot future predictions with risk classification
visualizer.plot_future_with_classification(classified_future_df)

## 13. Save Results

In [None]:
# Save all results to CSV files
output_dir = '../outputs'

# Save classifications
daily_classification.to_csv(f'{output_dir}/classifications/daily_classification_sample.csv', index=False)
monthly_classification.to_csv(f'{output_dir}/classifications/monthly_classification_sample.csv', index=False)

# Save future predictions
future_predictions_df.to_csv(f'{output_dir}/predictions/future_predictions_sample.csv', index=False)

# Save model
model.save_model('../data/models/sample_stock_model.keras')

print("All results saved successfully!")
print(f"- Daily classifications: {output_dir}/classifications/daily_classification_sample.csv")
print(f"- Monthly classifications: {output_dir}/classifications/monthly_classification_sample.csv")
print(f"- Future predictions: {output_dir}/predictions/future_predictions_sample.csv")
print(f"- Model: ../data/models/sample_stock_model.keras")

## 14. Summary and Next Steps

In [None]:
print("=== Stock Price Prediction Analysis Summary ===")
print(f"Stock analyzed: SAMPLE")
print(f"Data period: {df.index.min().strftime('%Y-%m-%d')} to {df.index.max().strftime('%Y-%m-%d')}")
print(f"Total data points: {len(df)}")
print(f"Window size: {processor.window_size} days")
print(f"Model architecture: LSTM({model.lstm_units}) + Dense({model.dense_units})")
print(f"Test MAE: {test_metrics['MAE']:.6f}")
print(f"Test RMSE: {test_metrics['RMSE']:.6f}")
print(f"Future predictions: {len(future_predictions_df)} days")

print("\n=== Risk Classification Summary ===")
daily_dist = daily_classification['Classification'].value_counts()
for cls, count in daily_dist.items():
    percentage = (count / len(daily_classification)) * 100
    print(f"{cls}: {count} days ({percentage:.1f}%)")

print("\n=== Next Steps ===")
print("1. Try with your own stock data by updating the data_path variable")
print("2. Experiment with different model parameters (LSTM units, window size, etc.)")
print("3. Use the main.py script for automated analysis")
print("4. Explore batch processing for multiple stocks")
print("5. Fine-tune classification thresholds based on your risk preferences")