In [None]:
# Kelompok 11 
# ===========================
# RNN STOCK PREDICTION
# Prediksi Harga Saham dengan RNN Vanilla

# ===========================
# STEP 0: INSTALL & IMPORT
# ===========================

# Install yfinance 
!pip install yfinance

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import yfinance as yf

print("✅ All libraries imported successfully!")
print(f"TensorFlow version: {tf.__version__}")

# ===========================
# STEP 1: DOWNLOAD DATA
# ===========================

def download_stock_data():
    """Download data saham Apple dari Yahoo Finance"""
    print("📥 Downloading Apple stock data...")

    # Download 2 tahun data Apple
    ticker = "AAPL"
    data = yf.download(ticker, start="2022-01-01", end="2024-01-01", progress=False)

    print(f"✅ Data downloaded: {len(data)} rows")
    print("📊 Available columns:", data.columns.tolist())

    # Plot data asli
    plt.figure(figsize=(12, 6))
    plt.plot(data.index, data['Close'], linewidth=2, color='blue')
    plt.title('🍎 Apple Stock Price - Raw Data (2022-2024)', fontsize=16, fontweight='bold')
    plt.xlabel('Date', fontsize=12)
    plt.ylabel('Price ($)', fontsize=12)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

    return data

# ===========================
# STEP 2: DATA PREPROCESSING
# ===========================

def preprocess_data(data):
    """Preprocessing sederhana untuk RNN"""
    print("🔄 Preprocessing data...")

    # Ambil hanya kolom Close price
    prices = data['Close'].values.reshape(-1, 1)

    # Normalisasi data ke range 0-1 (penting untuk RNN!)
    scaler = MinMaxScaler(feature_range=(0, 1))
    prices_scaled = scaler.fit_transform(prices)

    print(f"💰 Original price range: ${prices.min():.2f} - ${prices.max():.2f}")
    print(f"📏 Scaled price range: {prices_scaled.min():.3f} - {prices_scaled.max():.3f}")

    return prices_scaled, scaler

# ===========================
# STEP 3: CREATE SEQUENCES
# ===========================

def create_sequences(data, sequence_length=60):
    """
    Mengubah time series jadi format sequence untuk RNN
    Input: 60 hari sebelumnya → Output: prediksi hari ke-61
    """
    print(f"🔗 Creating sequences with length {sequence_length}...")

    X, y = [], []

    for i in range(sequence_length, len(data)):
        # X: 60 hari sebelumnya
        X.append(data[i-sequence_length:i, 0])
        # y: hari yang ingin diprediksi
        y.append(data[i, 0])

    X, y = np.array(X), np.array(y)

    # Reshape untuk RNN: (samples, timesteps, features)
    X = X.reshape(X.shape[0], X.shape[1], 1)

    print(f"📦 Sequences shape: {X.shape}")
    print(f"🎯 Targets shape: {y.shape}")

    return X, y

# ===========================
# STEP 4: TRAIN-TEST SPLIT
# ===========================

def split_data(X, y, test_size=0.2):
    """Split data chronologically (penting untuk time series!)"""
    print(f"✂️ Splitting data: {100-test_size*100:.0f}% train, {test_size*100:.0f}% test...")

    # Split berdasarkan waktu (80% awal = train, 20% akhir = test)
    split_idx = int(len(X) * (1 - test_size))

    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    print(f"🏋️ Training data: {X_train.shape}")
    print(f"🧪 Test data: {X_test.shape}")

    return X_train, X_test, y_train, y_test

# ===========================
# STEP 5: BUILD RNN MODEL
# ===========================

def build_rnn_model(input_shape):
    """
    Build Simple RNN (Vanilla) Model
    Requirement: RNN/LSTM/GRU - kita pilih RNN Vanilla
    """
    print("🧠 Building RNN Vanilla Model...")

    model = keras.Sequential([
        # Layer 1: RNN dengan 50 units
        layers.SimpleRNN(50,
                         return_sequences=True,
                         input_shape=input_shape,
                         name='RNN_Layer_1'),
        layers.Dropout(0.2, name='Dropout_1'),

        # Layer 2: RNN dengan 50 units
        layers.SimpleRNN(50,
                         return_sequences=False,
                         name='RNN_Layer_2'),
        layers.Dropout(0.2, name='Dropout_2'),

        # Dense layers untuk output
        layers.Dense(25, activation='relu', name='Dense_1'),
        layers.Dense(1, name='Output')  # 1 output = prediksi harga
    ])

    # Compile model
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss='mean_squared_error',
        metrics=['mean_absolute_error']
    )

    # Print model architecture
    print("🏗️ RNN MODEL ARCHITECTURE:")
    model.summary()

    return model

# ===========================
# STEP 6: TRAINING
# ===========================

def train_rnn_model(model, X_train, y_train, X_test, y_test, epochs=50):
    """Training RNN dengan monitoring"""
    print(f"🚀 Training RNN for {epochs} epochs...")

    # Callbacks untuk optimasi
    callbacks = [
        keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=15,
            restore_best_weights=True,
            verbose=1
        ),
        keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.2,
            patience=7,
            min_lr=0.0001,
            verbose=1
        )
    ]

    # Training process
    history = model.fit(
        X_train, y_train,
        batch_size=32,
        epochs=epochs,
        validation_data=(X_test, y_test),
        callbacks=callbacks,
        verbose=1
    )

    # Plot training history
    plt.figure(figsize=(15, 5))

    # Loss plot
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss', color='blue', linewidth=2)
    plt.plot(history.history['val_loss'], label='Validation Loss', color='red', linewidth=2)
    plt.title('📉 Model Loss During Training', fontweight='bold')
    plt.xlabel('Epoch')
    plt.ylabel('Mean Squared Error')
    plt.legend()
    plt.grid(True, alpha=0.3)

    # MAE plot
    plt.subplot(1, 2, 2)
    plt.plot(history.history['mean_absolute_error'], label='Training MAE', color='blue', linewidth=2)
    plt.plot(history.history['val_mean_absolute_error'], label='Validation MAE', color='red', linewidth=2)
    plt.title('📈 Mean Absolute Error During Training', fontweight='bold')
    plt.xlabel('Epoch')
    plt.ylabel('MAE')
    plt.legend()
    plt.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    print(f"✅ Training completed in {len(history.history['loss'])} epochs!")

    return history

# ===========================
# STEP 7: EVALUATION
# ===========================

def evaluate_rnn_model(model, X_test, y_test, scaler):
    """Evaluasi comprehensive RNN model"""
    print("📊 Evaluating RNN Model Performance...")

    # Prediksi
    predictions_scaled = model.predict(X_test, verbose=0)

    # Denormalisasi ke harga asli
    predictions = scaler.inverse_transform(predictions_scaled)
    y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))

    # Metrik evaluasi
    mse = mean_squared_error(y_test_actual, predictions)
    mae = mean_absolute_error(y_test_actual, predictions)
    rmse = np.sqrt(mse)

    # Akurasi arah (naik/turun)
    actual_direction = np.diff(y_test_actual.flatten()) > 0
    pred_direction = np.diff(predictions.flatten()) > 0
    direction_accuracy = np.mean(actual_direction == pred_direction) * 100

    # MAPE (Mean Absolute Percentage Error)
    mape = np.mean(np.abs((y_test_actual - predictions) / y_test_actual)) * 100

    # Print hasil
    print("="*50)
    print("🏆 RNN MODEL EVALUATION RESULTS")
    print("="*50)
    print(f"💰 Mean Squared Error (MSE): ${mse:.2f}")
    print(f"📏 Mean Absolute Error (MAE): ${mae:.2f}")
    print(f"📐 Root Mean Squared Error (RMSE): ${rmse:.2f}")
    print(f"📊 Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
    print(f"🎯 Direction Accuracy: {direction_accuracy:.1f}%")
    print("="*50)

    return {
        'mse': mse,
        'mae': mae,
        'rmse': rmse,
        'mape': mape,
        'direction_accuracy': direction_accuracy,
        'predictions': predictions,
        'actual': y_test_actual
    }

# ===========================
# STEP 8: VISUALIZATION
# ===========================

def visualize_results(results):
    """Comprehensive visualization of results"""
    predictions = results['predictions']
    actual = results['actual']

    # Create comprehensive plots
    fig = plt.figure(figsize=(18, 12))

    # Plot 1: Full prediction comparison
    plt.subplot(3, 2, 1)
    plt.plot(actual, label='🎯 Actual Price', linewidth=2.5, color='blue', alpha=0.8)
    plt.plot(predictions, label='🤖 RNN Predictions', linewidth=2.5, color='red', alpha=0.8)
    plt.title('📈 RNN Stock Price Prediction - Full Test Period', fontweight='bold', fontsize=12)
    plt.xlabel('Time (Days)')
    plt.ylabel('Price ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)

    # Plot 2: Last 30 days detailed view
    plt.subplot(3, 2, 2)
    last_30 = -30
    days = range(len(actual[last_30:]))
    plt.plot(days, actual[last_30:], 'o-', label='🎯 Actual', linewidth=3, markersize=6, color='blue')
    plt.plot(days, predictions[last_30:], 's-', label='🤖 RNN Pred', linewidth=3, markersize=6, color='red')
    plt.title('🔍 Last 30 Days - Detailed Comparison', fontweight='bold', fontsize=12)
    plt.xlabel('Days')
    plt.ylabel('Price ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)

    # Plot 3: Prediction errors distribution
    plt.subplot(3, 2, 3)
    errors = np.abs(actual - predictions).flatten()
    plt.hist(errors, bins=25, alpha=0.7, color='orange', edgecolor='black')
    plt.title('📊 Distribution of Prediction Errors', fontweight='bold', fontsize=12)
    plt.xlabel('Absolute Error ($)')
    plt.ylabel('Frequency')