In [None]:

import yfinance as yf
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt

# Fetch and preprocess data
def fetch_and_preprocess_data(ticker="AAPL", start_date="2020-01-01", end_date="2023-01-01"):
    # Download historical data
    data = yf.download(ticker, start=start_date, end=end_date)
    
    # Compute features: log returns, rolling volatility
    data["Log_Return"] = np.log(data["Close"] / data["Close"].shift(1))
    data["Volatility"] = data["Log_Return"].rolling(window=10).std()
    
    # Drop NaN values
    data = data.dropna()
    
    # Normalize features (Z-scores)
    data["Log_Return_Norm"] = (data["Log_Return"] - data["Log_Return"].mean()) / data["Log_Return"].std()
    data["Volatility_Norm"] = (data["Volatility"] - data["Volatility"].mean()) / data["Volatility"].std()
    
    return data

# Dataset Class
class AssetDataset(Dataset):
    def __init__(self, X, Z, y):
        self.X = X
        self.Z = Z
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.Z[idx], self.y[idx]

# Stage 1 Model: Asset-specific Quantile Prediction
class AssetLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_quantiles, dropout=0.2):
        super(AssetLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, num_quantiles)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # Use the last time step
        return out

# Stage 2 Model: Market Data Scaling
class MarketScaler(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout=0.2):
        super(MarketScaler, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, z):
        out, _ = self.lstm(z)
        out = self.fc(out[:, -1, :])  # Use the last time step
        return out

# Full Model: Combining Both Stages
class QuantilePredictionModel(nn.Module):
    def __init__(self, asset_input_size, market_input_size, hidden_size, num_layers, num_quantiles, dropout=0.2):
        super(QuantilePredictionModel, self).__init__()
        self.asset_model = AssetLSTM(asset_input_size, hidden_size, num_layers, num_quantiles, dropout)
        self.market_scaler = MarketScaler(market_input_size, hidden_size, num_layers, dropout)

    def forward(self, x, z):
        quantiles = self.asset_model(x)
        scaling_factor = self.market_scaler(z)
        scaled_quantiles = quantiles * scaling_factor
        return scaled_quantiles

# Quantile Loss Function
def quantile_loss(predictions, targets, quantiles):
    errors = targets - predictions
    loss = torch.max(quantiles * errors, (quantiles - 1) * errors).mean()
    return loss

# Train Model with Graph
def train_model_with_graph():
    # Fetch data
    data = fetch_and_preprocess_data()
    
    # Prepare features and labels
    seq_len = 30
    X = []
    Z = []
    y = []
    for i in range(len(data) - seq_len):
        X.append(data.iloc[i:i+seq_len][["Log_Return_Norm"]].values)
        Z.append(data.iloc[i:i+seq_len][["Volatility_Norm"]].values)
        y.append(data.iloc[i+seq_len]["Log_Return"])
    
    X = torch.tensor(X, dtype=torch.float32)
    Z = torch.tensor(Z, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)
    
    # Dataset and DataLoader
    dataset = AssetDataset(X, Z, y)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
    
    # Model, Optimizer, and Hyperparameters
    num_features = 1
    num_quantiles = 1
    model = QuantilePredictionModel(asset_input_size=num_features, market_input_size=num_features,
                                     hidden_size=64, num_layers=2, num_quantiles=num_quantiles)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    quantiles = torch.tensor([0.5]).reshape(1, -1)  # Median quantile
    
    # Training Loop
    for epoch in range(3):
        total_loss = 0
        for batch_X, batch_Z, batch_y in dataloader:
            optimizer.zero_grad()
            predictions = model(batch_X, batch_Z)
            loss = quantile_loss(predictions, batch_y, quantiles)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch + 1}, Loss: {total_loss / len(dataloader):.4f}")
    
    # Plot predictions vs. actuals (on a small test sample)
    test_X, test_Z, test_y = next(iter(dataloader))
    predictions = model(test_X, test_Z).detach().numpy()
    test_y = test_y.numpy()

    plt.figure(figsize=(10, 6))
    plt.plot(predictions, label="Predicted Median")
    plt.plot(test_y, label="Actual Returns", alpha=0.6)
    plt.title("Predicted vs Actual Returns")
    plt.legend()
    plt.show()

train_model_with_graph()
