# Stock Price Prediction with GRU

This notebook focuses specifically on stock price time series forecasting using **GRU (Gated Recurrent Unit)** networks.

## Overview

- **Task**: Stock price prediction using historical OHLCV data
- **Model**: GRU-based RNN with technical indicators
- **Data**: Synthetic stock data with realistic patterns
- **Goal**: Build an effective stock price forecasting model

## Key Features

1. **Realistic Synthetic Data**: OHLCV data with trends, volatility, and patterns
2. **Technical Indicators**: Moving averages, volatility measures
3. **GRU Architecture**: Optimized for financial time series
4. **Comprehensive Evaluation**: Multiple metrics and visualizations

## 1. Import Required Libraries

In [None]:
# Core libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Machine Learning libraries
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Deep Learning libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Configure matplotlib
plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = (12, 6)

print("Libraries imported successfully!")
print(f"TensorFlow version: {tf.__version__}")
print(f"Focus: Stock Price Prediction with GRU")

## 2. Generate Stock Data with Technical Indicators

In [None]:
def generate_stock_data(n_days=1000, start_price=100.0, volatility=0.02):
    """Generate synthetic stock price data with realistic patterns."""
    np.random.seed(42)
    
    # Generate dates
    start_date = datetime(2020, 1, 1)
    dates = [start_date + timedelta(days=i) for i in range(n_days)]
    
    # Generate price data using geometric Brownian motion
    returns = np.random.normal(0.0005, volatility, n_days)  # Small positive drift
    prices = [start_price]
    
    for i in range(1, n_days):
        price = prices[-1] * (1 + returns[i])
        prices.append(max(price, 1.0))  # Prevent negative prices
    
    # Generate OHLC data
    data = []
    for i, (date, close) in enumerate(zip(dates, prices)):
        # Open price (close to previous close)
        if i == 0:
            open_price = close
        else:
            open_price = prices[i-1] * (1 + np.random.normal(0, volatility/4))
        
        # High and low prices
        daily_range = abs(np.random.normal(0, volatility/2))
        high = max(open_price, close) * (1 + daily_range)
        low = min(open_price, close) * (1 - daily_range)
        
        # Volume
        volume = int(np.random.normal(1000000, 200000))
        volume = max(volume, 100000)
        
        data.append({
            'Date': date,
            'Open': round(open_price, 2),
            'High': round(high, 2),
            'Low': round(low, 2),
            'Close': round(close, 2),
            'Volume': volume
        })
    
    return pd.DataFrame(data)

def add_technical_indicators(df):
    """Add technical indicators to stock data."""
    # Moving averages
    df['MA_5'] = df['Close'].rolling(window=5).mean()
    df['MA_10'] = df['Close'].rolling(window=10).mean()
    df['MA_20'] = df['Close'].rolling(window=20).mean()
    
    # Price changes
    df['Returns'] = df['Close'].pct_change()
    df['Price_Change'] = df['Close'].diff()
    
    # Volatility measures
    df['Volatility_5'] = df['Returns'].rolling(window=5).std()
    df['Volatility_10'] = df['Returns'].rolling(window=10).std()
    
    # High-Low measures
    df['HL_Range'] = (df['High'] - df['Low']) / df['Close']
    df['OC_Range'] = (df['Close'] - df['Open']) / df['Open']
    
    # Volume indicators
    df['Volume_MA'] = df['Volume'].rolling(window=10).mean()
    df['Volume_Ratio'] = df['Volume'] / df['Volume_MA']
    
    return df

# Generate and enhance stock data
print("Generating stock data...")
stock_df = generate_stock_data(n_days=1200)
stock_df = add_technical_indicators(stock_df)

print(f"Generated {len(stock_df)} days of stock data")
print(f"Date range: {stock_df['Date'].min()} to {stock_df['Date'].max()}")
print(f"Price range: ${stock_df['Close'].min():.2f} - ${stock_df['Close'].max():.2f}")
print(f"Features: {list(stock_df.columns)}")
print("\nFirst few rows:")
stock_df.head()

## 3. Data Preprocessing and Sequence Creation

In [None]:
def prepare_stock_sequences(data, sequence_length=60, target_col='Close'):
    """Prepare stock data for RNN training."""
    # Select numerical features only and remove NaN values
    feature_cols = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_5', 'MA_10', 'MA_20', 
                   'Volatility_5', 'Volatility_10', 'HL_Range', 'OC_Range', 'Volume_Ratio']
    
    # Remove columns with too many NaN values or use available features
    available_cols = [col for col in feature_cols if col in data.columns]
    df = data[available_cols].dropna()
    
    print(f"Using features: {available_cols}")
    print(f"Data shape after cleaning: {df.shape}")
    
    # Scale the data
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df)
    
    # Find target column index
    target_idx = available_cols.index(target_col)
    
    # Create sequences
    X, y = [], []
    for i in range(sequence_length, len(scaled_data)):
        X.append(scaled_data[i-sequence_length:i])
        y.append(scaled_data[i, target_idx])
    
    X, y = np.array(X), np.array(y)
    
    # Split into train, validation, and test
    train_size = int(0.7 * len(X))
    val_size = int(0.85 * len(X))
    
    X_train = X[:train_size]
    y_train = y[:train_size]
    X_val = X[train_size:val_size]
    y_val = y[train_size:val_size]
    X_test = X[val_size:]
    y_test = y[val_size:]
    
    return X_train, X_val, X_test, y_train, y_val, y_test, scaler, available_cols

# Prepare stock data
SEQUENCE_LENGTH = 60
X_train, X_val, X_test, y_train, y_val, y_test, scaler, features = prepare_stock_sequences(
    stock_df, sequence_length=SEQUENCE_LENGTH
)

print("Stock data preparation complete!")
print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")
print(f"Features used: {len(features)}")

# Verify data integrity
print(f"\nData integrity check:")
print(f"No NaN in training data: {not np.isnan(X_train).any()}")
print(f"Data range: [{X_train.min():.3f}, {X_train.max():.3f}]")

## 4. Build Optimized GRU Model

In [None]:
def create_optimized_gru_model(input_shape, units=[64, 32, 16], dropout_rate=0.3):
    """Create an optimized GRU model for stock prediction."""
    model = Sequential([
        # First GRU layer
        GRU(units=units[0], return_sequences=True, input_shape=input_shape, 
            dropout=dropout_rate, recurrent_dropout=dropout_rate, name='gru_1'),
        BatchNormalization(),
        
        # Second GRU layer
        GRU(units=units[1], return_sequences=True,
            dropout=dropout_rate, recurrent_dropout=dropout_rate, name='gru_2'),
        BatchNormalization(),
        
        # Third GRU layer
        GRU(units=units[2], return_sequences=False,
            dropout=dropout_rate, recurrent_dropout=dropout_rate, name='gru_3'),
        BatchNormalization(),
        
        # Dense layers with regularization
        Dense(units=32, activation='relu', name='dense_1'),
        Dropout(dropout_rate),
        BatchNormalization(),
        
        Dense(units=16, activation='relu', name='dense_2'),
        Dropout(dropout_rate/2),
        
        # Output layer
        Dense(units=1, activation='linear', name='output')
    ])
    
    # Compile with optimized settings
    model.compile(
        optimizer=Adam(learning_rate=0.001, clipnorm=1.0),
        loss='huber',  # More robust to outliers
        metrics=['mae', 'mse']
    )
    
    return model

# Create the model
input_shape = (X_train.shape[1], X_train.shape[2])
model = create_optimized_gru_model(input_shape)

print("Optimized GRU model created!")
print(f"Input shape: {input_shape}")
print(f"Total parameters: {model.count_params():,}")
print("\nModel architecture:")
model.summary()