In [9]:
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, LeakyReLU
from tensorflow.keras.optimizers import Adam
from scipy.stats import norm
from scipy.linalg import svd
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import os 

# Fetch stock data from Yahoo Finance
def fetch_stock_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    return data

# Compute technical indicators
def compute_technical_indicators(data):
    data['SMA30'] = data['Close'].rolling(window=30).mean()
    data['SMA100'] = data['Close'].rolling(window=100).mean()
    vol_window = 21
    data['Volatility'] = data['Close'].pct_change().rolling(window=vol_window).std() * np.sqrt(252)
    return data.dropna()

# Black-Scholes option pricing model
def black_scholes(S, K, T, r, sigma, option_type='call'):
    d1 = (np.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    if option_type == 'call':
        return S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
    elif option_type == 'put':
        return K * np.exp(-r * T) * norm.cdf(-d2) - S * norm.cdf(-d1)

# Black-Scholes PDE using Finite Difference Method (Backward)
def black_scholes_pde(S, K, T, r, sigma, M, N):
    dt = T / M  # Time step
    dS = (max(S) - min(S)) / N  # Price step
    S_range = np.linspace(min(S), max(S), N+1)  # Create stock price range with N+1 steps
    grid = np.zeros((M+1, N+1))  # Initialize the grid
    
    # Boundary conditions
    grid[M, :] = np.maximum(S_range - K, 0)  # Call option payoff at maturity
    grid[:, 0] = 0  # Option price is 0 when stock price is 0
    grid[:, N] = S_range[N] - K * np.exp(-r * (T - np.linspace(0, T, M+1)))  # Large S boundary condition

    # Resample S to fit the grid
    S_resampled = np.interp(S, np.linspace(min(S), max(S), N+1), S_range)
    
    # Finite difference scheme (Backward)
    for i in range(M-1, -1, -1):  # Loop over time steps
        for j in range(1, N):  # Loop over stock prices
            delta = (grid[i+1, j+1] - grid[i+1, j-1]) / (2*dS)
            gamma = (grid[i+1, j+1] - 2*grid[i+1, j] + grid[i+1, j-1]) / (dS**2)
            grid[i, j] = grid[i+1, j] - dt * (r * S_resampled[j] * delta + 0.5 * sigma**2 * S_resampled[j]**2 * gamma)
    
    return grid



# Apply Black-Scholes PDE to generate option prices
def add_option_pricing_features_with_pde(data, S, K, T, r, sigma):
    M = 840  # Number of time steps
    N = 840  # Number of price steps
    option_price_grid = black_scholes_pde(S, K, T, r, sigma, M, N)
    # Extract the option price at t=0 (current time)
    option_prices_at_t0 = option_price_grid[0, :]
    
    data['Option_Price_PDE'] = option_prices_at_t0  # Add PDE-based option price as a feature
    return data

# Normalize and apply PCA for dimensionality reduction
def preprocess_data(data, n_pca_components=2):
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(data)
    return data_scaled, scaler

# Create sequences for time-series prediction
def create_sequences(data, seq_len, pred_len):
    sequences = []
    for i in range(len(data) - seq_len - pred_len + 1):
        sequences.append(data[i:i + seq_len + pred_len])
    return np.array(sequences)

# Function to create a Hankel matrix
def create_hankel_matrix(time_series, window_size):
    hankel_matrix = np.array([time_series[i:i + window_size] for i in range(len(time_series) - window_size + 1)])
    return hankel_matrix

# Function to perform SVD on the Hankel matrix
def hankel_svd(hankel_matrix):
    U, Sigma, Vt = svd(hankel_matrix, full_matrices=False)
    return U, Sigma, Vt

# Build a neural network model
def build_model(input_shape, output_len):
    model = Sequential([
        Flatten(input_shape=input_shape),
        Dense(180),
        LeakyReLU(),
        Dense(360),
        LeakyReLU(),
        Dense(360),
        LeakyReLU(),
        Dense(output_len)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

# Plot predictions and calculate RMSE
def predict_and_plot(m, ticker, data, s, model, seq_len, pred_len, scaler, cut_off):
    plt.figure(figsize=(14, 7))

    rmse_values = []  # To store RMSE values for each section
    section_start_idx = []  # To store the start indices of each 30-day section for placement of RMSE labels
    for idx in range(0, cut_off + pred_len, pred_len):
        future_input = s[idx, :seq_len, :].reshape(1, seq_len, -1)
        future = model.predict(future_input)[0]
        future_padded = np.hstack((future.reshape(-1, 1), np.zeros((future.shape[0], data.shape[1] - 3))))  

        future_transform = scaler.inverse_transform(future_padded)[:, 0]  
        future_true = s[idx, seq_len:, 0].reshape(-1, 1)
        future_true_padded = np.hstack((future_true, np.zeros((future_true.shape[0], data.shape[1] - 3))))
        future_true_transform = scaler.inverse_transform(future_true_padded)[:, 0]

        rmse = np.sqrt(mean_squared_error(future_true_transform, future_transform))
        rmse_values.append(rmse)
        section_start_idx.append(idx)

        plt.plot(np.arange(idx, idx + pred_len), future_transform, color="red" if idx < cut_off else "blue")
        plt.plot(np.arange(idx, idx + pred_len), future_true_transform, color="black" if idx < cut_off else "green")
    
    plt.plot([], [], color="red", label="Train Prediction")
    plt.plot([], [], color="black", label="Train True")
    plt.plot([], [], color="blue", label="Test Prediction")
    plt.plot([], [], color="green", label="Test True")
    plt.legend()
    for idx in range(0, s.shape[0], pred_len):
        plt.axvline(x=idx + pred_len, color='gray', linestyle='--', linewidth=1)

    plt.title(f"{seq_len}-{pred_len} Predictions for {ticker}, model {m}")
    plt.xlabel("Trading Days")
    plt.ylabel("Price")

    for i, rmse in zip(section_start_idx, rmse_values):
        plt.text(i + pred_len / 2, min(plt.ylim()), f"{rmse:.0f}", color="teal", fontsize=10, ha='center', va='top')
    plt.text(-5 - pred_len / 2, min(plt.ylim()), f"RMSE", color="teal", fontsize=10, ha='center', va='top')
    plt.grid(True)
    
    if m == "BSPINN w/ 8 features":
        dir = f"data/{seq_len}-{pred_len}/BSpinn_8"
        if not os.path.exists(dir):
            os.makedirs(dir)
        plt.savefig(f"{dir}/{ticker}.png")
    return rmse_values[-1]

# Main workflow
def main(tickers):
    m = "BSPINN w/ 8 features"
    test_rmse = []
    for ticker in tickers:
        tsla = fetch_stock_data(ticker, start_date="2021-01-01", end_date="2024-09-30")
        tsla = compute_technical_indicators(tsla)

        strike_price = tsla['Close'] * 1.05
        time_to_maturity = 30 / 252
        risk_free_rate = 0.01
        implied_volatility = tsla['Volatility']
        tsla = add_option_pricing_features_with_pde(tsla, tsla['Close'], strike_price, time_to_maturity, risk_free_rate, implied_volatility)

        features = ['Close', 'Volume', 'SMA30', 'SMA100', 'Volatility', 'Option_Price_PDE']
        data = tsla[features]
        data_scaled, scaler = preprocess_data(data, n_pca_components=2)
        sequences = create_sequences(data_scaled, seq_len=30, pred_len=5)

        seq_len = 180
        pred_len = 20
        x_train, y_train = sequences[:, :-pred_len, :], sequences[:, -pred_len:, 0]

        model = build_model(input_shape=(x_train.shape[1], x_train.shape[2]), output_len=pred_len)
        model.fit(x_train, y_train, epochs=10, batch_size=32, verbose=1)

        rmse = predict_and_plot(m, ticker, data, sequences, model, seq_len, pred_len, scaler, cut_off=len(tsla) - 50)
        test_rmse.append(rmse)

    return test_rmse

# Running the code for selected tickers
tickers = ['AAPL', 'TSLA', 'GOOG']
test_rmse = main(tickers)
print("Test RMSE for all tickers:", test_rmse)


[*********************100%***********************]  1 of 1 completed


ValueError: setting an array element with a sequence.