In [None]:
import pandas as pd
import numpy as np
from stockdex import Ticker
import matplotlib.pyplot as plt
import warnings
from gaussian_proces import GaussianProcessStockPredictor
warnings.filterwarnings("ignore")

from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

def get_data(range, dataGranularity, stock):
    ticker = Ticker(stock)
    try:
        data = ticker.yahoo_api_price(range=range, dataGranularity=dataGranularity)
        df = pd.DataFrame({
            'timestamp': data['timestamp'],
            'open': data['open'],
            'high': data['high'],
            'low': data['low'],
            'close': data['close'],
            'volume': data['volume'],
        })
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df.set_index('timestamp', inplace=True)
        return df
    except Exception as e:
        print(f"An error occurred while fetching data for {stock}: {e}")
        return None

def create_sequences(data, target, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i + sequence_length])
        y.append(target[i + sequence_length])
    return np.array(X), np.array(y)

def add_enhanced_features(df):
    df_enhanced = df.copy()

    # Lagged features
    df_enhanced['close_lag_1'] = df_enhanced['close'].shift(1)
    df_enhanced['return_1d'] = df_enhanced['close'].pct_change()

    # Moving averages
    df_enhanced['ma_5'] = df_enhanced['close'].rolling(window=5).mean()
    df_enhanced['ma_10'] = df_enhanced['close'].rolling(window=10).mean()

    # Momentum indicators
    df_enhanced['price_momentum'] = (df_enhanced['close'] - df_enhanced['ma_5']) / df_enhanced['ma_5']

    # Volume features
    df_enhanced['volume_ma_5'] = df_enhanced['volume'].rolling(window=5).mean()
    df_enhanced['volume_momentum'] = (df_enhanced['volume'] - df_enhanced['volume_ma_5']) / df_enhanced['volume_ma_5']

    # Volatility
    df_enhanced['intraday_vol'] = (df_enhanced['high'] - df_enhanced['low']) / df_enhanced['close']

    # Drop rows with NaN values
    df_enhanced = df_enhanced.dropna()

    return df_enhanced

def select_best_features(X, y, feature_names, k=8):
    correlations = []

    for i in range(X.shape[2]):
        feature_data = X[:, -1, i]
        correlation = np.corrcoef(feature_data, y.ravel())[0, 1]
        correlations.append(abs(correlation))

    k = min(k, len(feature_names))
    top_k_indices = np.argsort(correlations)[-k:]

    selected_feature_names = [feature_names[i] for i in top_k_indices]

    print(f"Selected {k} best features:")
    for i, name in enumerate(selected_feature_names):
        idx = top_k_indices[i]
        print(f"  {i+1}. {name}: {correlations[idx]:.3f}")

    return X[:, :, top_k_indices], selected_feature_names

def create_enhanced_model(input_shape):
    model = Sequential([
        LSTM(32, return_sequences=False, input_shape=input_shape),
        Dropout(0.2),
        Dense(1)
    ])

    optimizer = Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    return model

def train_model(X_train, y_train, X_test, y_test, close_scaler):
    model = create_enhanced_model((X_train.shape[1], X_train.shape[2]))

    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=0
    )

    history = model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=32,
        validation_split=0.2,
        callbacks=[early_stopping],
        verbose=1
    )

    predictions = model.predict(X_test, verbose=0)
    predictions = close_scaler.inverse_transform(predictions)
    y_test_actual = close_scaler.inverse_transform(y_test)

    return predictions, y_test_actual, history

def calculate_metrics(actual, predicted):
    mse = mean_squared_error(actual, predicted)
    mae = mean_absolute_error(actual, predicted)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((actual - predicted) / actual)) * 100

    actual_direction = np.diff(actual.flatten()) > 0
    predicted_direction = np.diff(predicted.flatten()) > 0
    directional_accuracy = np.mean(actual_direction == predicted_direction) * 100

    return {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'Directional_Accuracy': directional_accuracy
    }

def plot_results(dates, actual, predicted, metrics):

    # Plot actual and predicted prices with line and dots
    plt.plot(dates, actual, label='Actual Price', linewidth=2, alpha=0.8, marker='o', markersize=4)
    plt.plot(dates, predicted, label='Predicted Price', linewidth=2, alpha=0.8, marker='o', markersize=4)

    plt.title('Price Prediction', fontsize=14, fontweight='bold')
    plt.xlabel('Date')
    plt.ylabel('Close Price ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

def print_performance_summary(metrics):


    print(f"Prediction Accuracy Metrics:")
    print(f"Root Mean Squared Error:      {metrics['RMSE']:.4f}")
    print(f"Mean Absolute Error (MAE):    {metrics['MAE']:.4f}")
    print(f"Mean Absolute Percentage Error: {metrics['MAPE']:.2f}%")
    print(f"Mean Squared Error (MSE):     {metrics['MSE']:.4f}")
    print(f"Directional Accuracy:         {metrics['Directional_Accuracy']:.2f}%")


if __name__ == "__main__":
    data = get_data('1y', '1d', 'GOOG')

    if data is not None:

        data_enhanced = add_enhanced_features(data)

        feature_cols = [
            'open', 'high', 'low', 'close', 'volume',
            'close_lag_1', 'return_1d', 'ma_5', 'ma_10',
            'price_momentum', 'volume_momentum', 'intraday_vol'
        ]

        feature_scaler = StandardScaler()
        scaled_features = feature_scaler.fit_transform(data_enhanced[feature_cols])

        close_scaler = MinMaxScaler()
        scaled_close = close_scaler.fit_transform(data_enhanced[['close']])

        sequence_length = 25
        X, y = create_sequences(scaled_features, scaled_close, sequence_length)

        print(f"\nSelecting best features...")
        X_selected, selected_features = select_best_features(X, y, feature_cols, k=8)

        train_size = len(X_selected) - 10
        X_train = X_selected[:train_size]
        y_train = y[:train_size]
        X_test = X_selected[train_size:]
        y_test = y[train_size:]

        predictions, actual, history = train_model(X_train, y_train, X_test, y_test, close_scaler)

        metrics = calculate_metrics(actual, predictions)

        dates = data_enhanced.index[sequence_length + train_size : sequence_length + train_size + len(predictions)]

        print("\nLSTM Model Performance Summary:")
        print_performance_summary(metrics)

        fig, ax = plt.subplots(figsize=(14, 6))
        predictor = GaussianProcessStockPredictor(data)
        dates, preds_gpr, stds = predictor.predict_n_days(10, normalize_y=True, n_restarts_optimizer=10)
        preds_gpr = np.array(preds_gpr).reshape(-1, 1)  # Ensure it's a column vector

        predictor.plot_predictions(dates, preds_gpr, stds, ax)

        plot_results(dates, actual, predictions, metrics)

        metrics_gpr = calculate_metrics(actual, preds_gpr)
        print("\nGaussian Process Regression Metrics:")
        print_performance_summary(metrics_gpr)
       # avg_preds = (predictions.flatten()*0.8 + preds_gpr.flatten()*0.2)

       # metrics_combined = calculate_metrics(actual, avg_preds)    
   



Selecting best features...
Selected 8 best features:
  1. price_momentum: 0.221
  2. ma_10: 0.894
  3. ma_5: 0.931
  4. close_lag_1: 0.935
  5. open: 0.952
  6. high: 0.959
  7. low: 0.963
  8. close: 0.967
Epoch 1/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - loss: 0.2186 - val_loss: 0.0920
Epoch 2/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0888 - val_loss: 0.0167
Epoch 3/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0508 - val_loss: 0.0233
Epoch 4/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0480 - val_loss: 0.0513
Epoch 5/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0473 - val_loss: 0.0225
Epoch 6/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0348 - val_loss: 0.0061
Epoch 7/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.027