In [6]:
import numpy as np
import tensorflow as tf
import yfinance as yf
from sklearn.cluster import AgglomerativeClustering

# Function to fetch stock data from yfinance
def fetch_stock_data(tickers, start_date, end_date):
    data = yf.download(tickers, start=start_date, end=end_date)['Adj Close']
    data = data.fillna(method='ffill')  # Fill missing data
    return data

# Function to compute correlation matrix and clusters
def create_hyperedges_from_correlation(stock_data, num_clusters):
    returns = stock_data.pct_change().dropna()  # Compute daily returns
    correlation_matrix = returns.corr()  # Correlation matrix of returns

    # Convert correlation matrix to distance matrix (1 - correlation)
    distance_matrix = 1 - correlation_matrix

    # Perform hierarchical clustering to group similar stocks into hyperedges
    clustering = AgglomerativeClustering(n_clusters=num_clusters, affinity='precomputed', linkage='average')
    labels = clustering.fit_predict(distance_matrix)

    # Create incidence matrix based on clustering
    num_tickers = len(stock_data.columns)
    H = np.zeros((num_tickers, num_clusters))

    for ticker_idx, cluster_label in enumerate(labels):
        H[ticker_idx, cluster_label] = 1  # Assign ticker to its cluster (hyperedge)

    return H, correlation_matrix

# Custom LSTM-Hypergraph Model
class LSTMHypergraphModel(tf.keras.Model):
    def __init__(self, lstm_units, num_tickers, num_clusters):
        super(LSTMHypergraphModel, self).__init__()
        self.lstm = tf.keras.layers.LSTM(lstm_units, return_sequences=False, name='lstm_layer')
        self.dense = tf.keras.layers.Dense(num_tickers, name='dense_output')  # Output for each ticker

    def call(self, inputs, incidence_matrix):
        stock_data_seq = inputs  # Time series input data
        lstm_output = self.lstm(stock_data_seq)  # LSTM processes the sequential data
        stock_features = self.dense(lstm_output)  # Output layer, one for each ticker

        # Apply the incidence matrix to model relationships between stocks
        # Using matrix multiplication of stock features with the incidence matrix
        hypergraph_output = tf.matmul(stock_features, incidence_matrix)

        # Make sure the final output matches the number of tickers
        final_output = tf.matmul(hypergraph_output, tf.transpose(incidence_matrix))
        return final_output

def calculate_returns(stock_data):
    """Calculates daily returns."""
    returns = stock_data.pct_change().dropna()
    return returns

# Test function with RMSE calculation
def test_model_with_hypergraph(model, stock_data, incidence_matrix):
    time_steps = stock_data.shape[0] - 1
    current_features = stock_data[:-1].values.reshape(1, time_steps, stock_data.shape[1])  # Shape: [1, time_steps, num_tickers]
    next_day_returns = calculate_returns(stock_data).values.reshape(1, time_steps, stock_data.shape[1])  # Shape: [1, time_steps, num_tickers]

    # Make predictions using the trained model
    predictions = model(current_features, incidence_matrix)

    # Calculate the RMSE
    mse_loss = tf.keras.losses.MeanSquaredError()
    test_mse = mse_loss(next_day_returns, predictions)

    # Root Mean Squared Error
    test_rmse = tf.sqrt(test_mse)

    print(f"Test RMSE: {test_rmse.numpy()}")


# Main function to test the model
def main():
    # Define tickers and date range
    tickers = ['AAPL', 'MSFT', 'GOOG', 'AMZN', 'TSLA', 'NFLX']
    start_date = '2020-01-01'
    end_date = '2023-01-01'

    # Fetch stock data from yfinance
    stock_data = fetch_stock_data(tickers, start_date, end_date)

    # Generate hypergraph incidence matrix based on correlation
    num_clusters = 3  # Define how many hyperedges you want to generate
    H, _ = create_hyperedges_from_correlation(stock_data, num_clusters)

    # Define the LSTM-Hypergraph model
    lstm_units = 64
    num_tickers = len(tickers)
    model = LSTMHypergraphModel(lstm_units=lstm_units, num_clusters=num_clusters, num_tickers=num_tickers)

    # Test the model
    test_model_with_hypergraph(model, stock_data, H)

if __name__ == "__main__":
    main()

[*********************100%%**********************]  6 of 6 completed


Test RMSE: 1.3226253986358643
