In [None]:
import ipywidgets as widgets
from IPython.display import display
import dask.dataframe as dd
from dask_ml.preprocessing import MinMaxScaler
from dask_ml.model_selection import HyperbandSearchCV
from dask.distributed import Client
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
# Initialize Dask client
client = Client(n_workers=4, threads_per_worker=2)

In [None]:
stock_names = dd.read_csv('./data/full_history/*.csv')['StockName'].unique().compute()
stock_dropdown = widgets.Dropdown(options=stock_names, description='Select Stock:')
display(stock_dropdown)

In [None]:
selected_stock = stock_dropdown.value
print(f"Selected stock: {selected_stock}")

In [None]:
def create_sequences(data, window=100):
    """Create time series sequences using Dask"""
    X, y = [], []
    for i in range(window, len(data)):
        X.append(data[i-window:i])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

def build_lstm_model(hp_units=50, hp_dropout=0.2):
    """LSTM model builder with tunable hyperparameters"""
    model = Sequential([
        LSTM(hp_units, return_sequences=True, input_shape=(100, 1)),
        Dropout(hp_dropout),
        LSTM(int(hp_units*1.2), return_sequences=True),
        Dropout(hp_dropout*1.5),
        LSTM(int(hp_units*1.5)),
        Dropout(hp_dropout*2),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

In [None]:
def train_and_evaluate(stock_name):
    # Load data with Dask
    dask_df = dd.read_csv('./data/full_history/*.csv', 
                        dtype={'close': 'float64'})
    stock_data = dask_df[dask_df.StockName == stock_name].compute()
    
    # Preprocessing with Dask-ML
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(stock_data[['close']].values.reshape(-1, 1))
    
    # Create sequences
    X, y = create_sequences(scaled_data)
    split = int(0.8 * len(X))
    X_train, X_test = X[:split], X[split:]
    y_train, y_test = y[:split], y[split:]
    
    # LSTM Hyperparameter Tuning
    param_grid = {
        'hp_units': [50, 100, 150],
        'hp_dropout': [0.2, 0.3, 0.4]
    }
    
    lstm_search = HyperbandSearchCV(
        build_lstm_model,
        param_grid,
        max_iter=100,
        random_state=42
    )
    
    lstm_search.fit(X_train, y_train, epochs=50, verbose=0)
    best_lstm = lstm_search.best_estimator_
    
    # Random Forest Hyperparameter Tuning
    rf_param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20]
    }
    
    rf_search = HyperbandSearchCV(
        estimator=RandomForestRegressor(),
        parameters=rf_param_grid,
        max_iter=100
    )
    
    # Reshape data for Random Forest
    X_train_rf = X_train.reshape(X_train.shape[0], -1)
    X_test_rf = X_test.reshape(X_test.shape[0], -1)
    
    rf_search.fit(X_train_rf, y_train)
    best_rf = rf_search.best_estimator_
    
    # Generate predictions
    lstm_pred = best_lstm.predict(X_test).flatten()
    rf_pred = best_rf.predict(X_test_rf)
    
    # Inverse scaling
    lstm_pred = scaler.inverse_transform(lstm_pred.reshape(-1, 1))
    rf_pred = scaler.inverse_transform(rf_pred.reshape(-1, 1))
    y_test_orig = scaler.inverse_transform(y_test.reshape(-1, 1))
    
    # Calculate metrics
    def calculate_metrics(y_true, y_pred):
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        r2 = r2_score(y_true, y_pred)
        return rmse, r2
    
    lstm_rmse, lstm_r2 = calculate_metrics(y_test_orig, lstm_pred)
    rf_rmse, rf_r2 = calculate_metrics(y_test_orig, rf_pred)
    
    # Plot results
    plt.figure(figsize=(15, 6))
    plt.plot(y_test_orig, label='Actual Prices', color='blue')
    plt.plot(lstm_pred, label='LSTM Predictions', color='red', linestyle='--')
    plt.plot(rf_pred, label='RF Predictions', color='green', linestyle='-.')
    plt.title(f'{stock_name} Stock Price Predictions')
    plt.xlabel('Time Steps')
    plt.ylabel('Price')
    plt.legend()
    plt.grid(True)
    plt.show()
    
    # Print metrics
    print(f"\n{' Model ':-^40}")
    print(f"{'LSTM RMSE:':<15}{lstm_rmse:.4f}")
    print(f"{'LSTM R²:':<15}{lstm_r2:.4f}")
    print(f"\n{'Random Forest RMSE:':<15}{rf_rmse:.4f}")
    print(f"{'Random Forest R²:':<15}{rf_r2:.4f}")

In [None]:
train_and_evaluate(selected_stock)