In [1]:
import numpy as np
import tensorflow as tf
import yfinance as yf
from sklearn.cluster import AgglomerativeClustering
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Function to fetch stock data from yfinance
def fetch_stock_data(tickers, start_date, end_date):
    data = yf.download(tickers, start=start_date, end=end_date)['Adj Close']
    data = data.fillna(method='ffill')
    return data

# Function to visualize correlation matrix and clusters
def visualize_correlations_and_clusters(stock_data, labels):
    # Calculate correlation matrix
    returns = stock_data.pct_change().dropna()
    correlation_matrix = returns.corr()
    
    # Plot correlation heatmap
    plt.figure(figsize=(12, 8))
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
    plt.title('Stock Correlation Matrix')
    plt.show()
    
    # Visualize clusters
    plt.figure(figsize=(10, 6))
    for i, ticker in enumerate(stock_data.columns):
        plt.scatter(i, 0, c=f'C{labels[i]}', s=100, label=f'{ticker} (Cluster {labels[i]})')
    plt.title('Stock Clusters')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.yticks([])
    plt.show()

# Function to create hyperedges from correlation (with visualization)
def create_hyperedges_from_correlation(stock_data, num_clusters):
    returns = stock_data.pct_change().dropna()
    correlation_matrix = returns.corr()
    distance_matrix = 1 - correlation_matrix
    
    clustering = AgglomerativeClustering(n_clusters=num_clusters, 
                                       affinity='precomputed', 
                                       linkage='average')
    labels = clustering.fit_predict(distance_matrix)
    
    # Create incidence matrix
    num_tickers = len(stock_data.columns)
    H = np.zeros((num_tickers, num_clusters))
    for ticker_idx, cluster_label in enumerate(labels):
        H[ticker_idx, cluster_label] = 1
    
    # Visualize correlations and clusters
    visualize_correlations_and_clusters(stock_data, labels)
    
    return H, correlation_matrix

# Custom LSTM-Hypergraph Model (with model structure visualization)
class LSTMHypergraphModel(tf.keras.Model):
    def __init__(self, lstm_units, num_tickers, num_clusters):
        super(LSTMHypergraphModel, self).__init__()
        self.lstm = tf.keras.layers.LSTM(lstm_units, return_sequences=False)
        self.dense = tf.keras.layers.Dense(num_tickers)
        
        # Store architecture for visualization
        self.architecture = {
            'lstm_units': lstm_units,
            'num_tickers': num_tickers,
            'num_clusters': num_clusters
        }
    
    def call(self, inputs, incidence_matrix):
        lstm_output = self.lstm(inputs)
        stock_features = self.dense(lstm_output)
        hypergraph_output = tf.matmul(stock_features, incidence_matrix)
        final_output = tf.matmul(hypergraph_output, tf.transpose(incidence_matrix))
        return final_output
    
    def visualize_architecture(self):
        plt.figure(figsize=(15, 8))
        
        # Define components
        components = ['Input', 'LSTM', 'Dense', 'Hypergraph', 'Output']
        y_positions = np.linspace(0, 1, len(components))
        
        # Plot components
        for i, comp in enumerate(components):
            plt.plot([0.2, 0.8], [y_positions[i], y_positions[i]], 'b-', linewidth=2)
            plt.text(0.1, y_positions[i], comp, ha='right', va='center')
        
        # Add arrows
        for i in range(len(components)-1):
            plt.arrow(0.5, y_positions[i], 0, y_positions[i+1]-y_positions[i],
                     head_width=0.02, head_length=0.02, fc='k', ec='k')
        
        plt.title('LSTM-Hypergraph Model Architecture')
        plt.axis('off')
        plt.show()

def calculate_returns(stock_data):
    returns = stock_data.pct_change().dropna()
    return returns

# Test function with visualization
def test_model_with_hypergraph(model, stock_data, incidence_matrix):
    time_steps = stock_data.shape[0] - 1
    current_features = stock_data[:-1].values.reshape(1, time_steps, stock_data.shape[1])
    next_day_returns = calculate_returns(stock_data).values.reshape(1, time_steps, stock_data.shape[1])
    
    # Make predictions
    predictions = model(current_features, incidence_matrix)
    
    # Calculate RMSE
    mse_loss = tf.keras.losses.MeanSquaredError()
    test_mse = mse_loss(next_day_returns, predictions)
    test_rmse = tf.sqrt(test_mse)
    print(f"Test RMSE: {test_rmse.numpy()}")
    
    # Visualize predictions vs actual
    plt.figure(figsize=(15, 10))
    for i, ticker in enumerate(stock_data.columns):
        plt.subplot(3, 2, i+1)
        plt.plot(next_day_returns[0, :, i], label='Actual', alpha=0.7)
        plt.plot(predictions[0, :, i], label='Predicted', alpha=0.7)
        plt.title(f'{ticker} Returns')
        plt.legend()
    plt.tight_layout()
    plt.show()

def main():
    # Define tickers and date range
    tickers = ['AAPL', 'MSFT', 'GOOG', 'AMZN', 'TSLA', 'NFLX']
    start_date = '2020-01-01'
    end_date = '2023-01-01'
    
    # Fetch stock data
    stock_data = fetch_stock_data(tickers, start_date, end_date)
    
    # Create and visualize hypergraph structure
    num_clusters = 3
    H, _ = create_hyperedges_from_correlation(stock_data, num_clusters)
    
    # Create and visualize model
    lstm_units = 64
    num_tickers = len(tickers)
    model = LSTMHypergraphModel(lstm_units=lstm_units, 
                               num_clusters=num_clusters, 
                               num_tickers=num_tickers)
    model.visualize_architecture()
    
    # Test model and visualize results
    test_model_with_hypergraph(model, stock_data, H)

if __name__ == "__main__":
    main()

ModuleNotFoundError: No module named 'tensorflow'