**Applying ML models for stock returns prediction**

**Import required library**

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
from datetime import datetime
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.inspection import permutation_importance
import statsmodels.api as sm
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

**Data Collection and Preprocessing**

In [2]:
# Define tickers
tickers = ['AAPL', 'AMZN', 'JPM', 'PG', 'UNH']
market_index = '^GSPC'  # S&P 500
# Define time period (5 years)
period = '5y'
# Function to fetch data and calculate features
def fetch_and_process_data(ticker, market_index, period):
    # Fetch stock data
    stock_data = yf.download(ticker, period=period)
    market_data = yf.download(market_index, period=period)
    # Ensure both datasets have the same dates
    stock_data = stock_data.loc[stock_data.index.isin(market_data.index)]
    market_data = market_data.loc[market_data.index.isin(stock_data.index)]
    # Calculate stock returns (target variable)
    stock_data['Returns'] = stock_data['Close'].pct_change().shift(-1)
    # Feature Engineering
    # Compute technical indicators
    stock_data['SMA_10'] = stock_data['Close'].rolling(window=10, min_periods=1).mean()
    stock_data['SMA_50'] = stock_data['Close'].rolling(window=50, min_periods=1).mean()
    # Relative Strength Index (RSI)
    delta = stock_data['Close'].diff()
    gain = delta.where(delta > 0, 0).rolling(window=14, min_periods=1).mean()
    loss = -delta.where(delta < 0, 0).rolling(window=14, min_periods=1).mean()
    rs = gain / loss
    stock_data['RSI_14'] = 100 - (100 / (1 + rs))
    # Moving Average Convergence Divergence (MACD)
    stock_data['MACD'] = stock_data['Close'].ewm(span=12, adjust=False).mean() - stock_data['Close'].ewm(span=26, adjust=False).mean()
    # Bollinger Bands
    stock_data['Rolling_std'] = stock_data['Close'].rolling(window=10).std()
    stock_data['BB_Upper'] = stock_data['SMA_10'] + 2 * stock_data['Rolling_std']
    stock_data['BB_Lower'] = stock_data['SMA_10'] - 2 * stock_data['Rolling_std']
    # Average True Range (ATR)
    high_low = stock_data['High'] - stock_data['Low']
    high_close = (stock_data['High'] - stock_data['Close']).abs()
    low_close = (stock_data['Low'] - stock_data['Close']).abs()
    true_range = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
    stock_data['ATR'] = true_range.rolling(window=14).mean()
    # Market indicators
    market_data['MarketReturns'] = market_data['Close'].pct_change()
    market_features = market_data[['MarketReturns']]
    # Add market returns to stock data
    combined_data = pd.merge(stock_data, market_features, left_index=True, right_index=True)
    # Drop NaN values
    combined_data = combined_data.dropna()

    return combined_data

In [None]:
#check for null or na
for ticker in tickers:
    print(f"Processing {ticker}...")
    data = fetch_and_process_data(ticker, market_index, period)
    print(data.isna().sum())

**Define evaluation metrics ,features and target**

In [4]:
results = {}
models = {}
predictions = {}
importances = {}
eval_metrics = pd.DataFrame(columns=['Model', 'MSE', 'RMSE', 'MAE', 'R2'])
cv_results = {}
# Define features and target
target_col = 'Returns'
feature_cols = ['SMA_10', 'SMA_50', 'RSI_14', 'MACD', 'BB_Upper', 'BB_Lower', 'ATR','MarketReturns']

**Model Implementation**


* Linear Regression

In [None]:
# Process data for each ticker
for ticker in tickers:
    print(f"Processing {ticker}...")
    #Data Preprocessing
    data = fetch_and_process_data(ticker, market_index, period)
    X = data[feature_cols]
    y = data[target_col]
    # Split data (keeping time order) - 80% training, 20% testing
    train_size = int(len(X) * 0.8)
    X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
    y_train, y_test = y.iloc[:train_size], y.iloc[train_size:]
    # Standardize features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    # Model Implementation
    # 1. Linear Regression
    print("Training Linear Regression...")
    lr_model = LinearRegression()
    lr_model.fit(X_train_scaled, y_train)
    lr_pred = lr_model.predict(X_test_scaled)
    models['Linear Regression'] = lr_model
    predictions['Linear Regression'] = lr_pred
    # Add constant for intercept
    X_train_with_const = sm.add_constant(X_train_scaled)
    # Fit OLS model
    ols_model = sm.OLS(y_train, X_train_with_const).fit()
    # Get p-values
    p_values = ols_model.pvalues[1:]  # Skip intercept p-value
    # Create importance DataFrame with both coefficients and p-values
    importances['Linear Regression'] = pd.DataFrame({
      'Feature': feature_cols,
      'Importance': np.abs(lr_model.coef_),
      'Coefficient': ols_model.params[1:],  # Skip intercept
      'Abs_Coefficient': np.abs(ols_model.params[1:]),
      'p_value': p_values,
      'Significant': p_values < 0.05  # Flag for significant features
    }).sort_values('Abs_Coefficient', ascending=False)
    print(importances['Linear Regression'].sort_values('Feature', ascending=True)[['Feature','p_value', 'Significant']])
    # Model Evaluation
    # Create evaluation metrics dataframe
    for model_name, y_pred in predictions.items():
        mse = mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        eval_metrics = eval_metrics._append({
            'Model': model_name,
            'MSE': mse,
            'MAE': mae,
            'R2': r2
        }, ignore_index=True)
    # Print metrics
    print(f"Performance metrics {ticker}...")
    print(f"  MSE: {mse:.6f}")
    print(f"  MAE: {mae:.6f}")
    print(f"  R2: {r2:.6f}")
    # Time Series Cross-Validation
    # Set up TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=5)
    cv_mse = []
    cv_mae = []
    cv_r2 = []
    for train_idx, test_idx in tscv.split(X):
      X_cv_train, X_cv_test = X.iloc[train_idx], X.iloc[test_idx]
      y_cv_train, y_cv_test = y.iloc[train_idx], y.iloc[test_idx]
      # Standardize
      scaler = StandardScaler()
      X_cv_train_scaled = scaler.fit_transform(X_cv_train)
      X_cv_test_scaled = scaler.transform(X_cv_test)
      model_cv = LinearRegression()
      model_cv.fit(X_cv_train_scaled, y_cv_train)
      y_cv_pred = model_cv.predict(X_cv_test_scaled)
      # Calculate metrics
      cv_mse.append(mean_squared_error(y_cv_test, y_cv_pred))
      cv_mae.append(mean_absolute_error(y_cv_test, y_cv_pred))
      cv_r2.append(r2_score(y_cv_test, y_cv_pred))
    cv_results[model_name] = {
        'mse': np.mean(cv_mse),
        'mae': np.mean(cv_mae),
        'r2': np.mean(cv_r2),
        'mse_std': np.std(cv_mse),
        'mae_std': np.std(cv_mae),
        'r2_std': np.std(cv_r2)
        }
    print(f"Cross-Validation {ticker}...")
    print(f"  Avg MSE: {np.mean(cv_mse):.6f} (±{np.std(cv_mse):.6f})")
    print(f"  Avg MAE: {np.mean(cv_mae):.6f} (±{np.std(cv_mae):.6f})")
    print(f"  Avg R2: {np.mean(cv_r2):.6f} (±{np.std(cv_r2):.6f})")
    # Visualize the results
    # Plot predictions vs actual
    plt.figure(figsize=(14, 10))
    for i, (model_name, y_pred) in enumerate(predictions.items(), 1):
        plt.subplot(2, 2, i)
        plt.plot(y_test.index, y_test.values, 'b-', label='Actual')
        plt.plot(y_test.index, y_pred, 'r--', label=f'{model_name} Prediction')
        plt.title(f'{ticker} - {model_name} Predictions')
        plt.xlabel('Date')
        plt.ylabel('Returns')
        plt.legend()
        plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'{ticker}_predictions.png')
    # Plot feature importance for each model
    plt.figure(figsize=(14, 10))
    for i, (model_name, importance_df) in enumerate(importances.items(), 1):
        top_features = importance_df.head(10)
        plt.subplot(2, 2, i)
        plt.barh(top_features['Feature'], top_features['Importance'])
        plt.title(f'{ticker} - {model_name} Feature Importance')
        plt.xlabel('Importance')
        plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'{ticker}_feature_importance.png')
    # Store results for the ticker
    results[ticker] = {
        'data': data,
        'eval_metrics': eval_metrics,
        'cv_results': cv_results,
        'importances': importances,
        'predictions': predictions,
        'y_test': y_test
    }

* Support Vector Machines (SVM)

In [None]:
for ticker in tickers:
    print(f"Processing {ticker}...")
    #Data Preprocessing
    data = fetch_and_process_data(ticker, market_index, period)
    X = data[feature_cols]
    y = data[target_col]
    # Split data (keeping time order) - 80% training, 20% testing
    train_size = int(len(X) * 0.8)
    X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
    y_train, y_test = y.iloc[:train_size], y.iloc[train_size:]
    # Standardize features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    # Model Implementation
    # Support Vector Machines (SVM)
    print("Training SVM...")
    # Hyperparameter tuning with TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=5)
    param_grid = {
      'C': [0.01,0.1, 1, 10],  # Only 3 values
      'gamma': [0.0001,0.001, 0.01, 0.1],
      'kernel': ['rbf', 'poly','linear'],
      'degree': [2, 3],  # Only used when kernel='poly'
      'epsilon': [0.001,0.01, 0.1]
    }
    scoring = {
      'neg_mean_absolute_error': 'neg_mean_absolute_error',
      'r2': 'r2',
      'neg_mean_squared_error': 'neg_mean_squared_error'
    }
    svr = SVR()
    grid_search = GridSearchCV(svr, param_grid, cv=tscv,
                         scoring=scoring,
                         refit='neg_mean_absolute_error',
                         n_jobs=-1)
    grid_search.fit(X_train_scaled, y_train)
    best_svr = grid_search.best_estimator_
    svr_pred = best_svr.predict(X_test_scaled)
    models['SVM'] = best_svr
    predictions['SVM'] = svr_pred
    print(f"\nBest SVM parameters for {ticker}: {grid_search.best_params_}")
    print("Best score (MSE):", -grid_search.best_score_)
    # Model Evaluation
    # Create evaluation metrics dataframe
    for model_name, y_pred in predictions.items():
        mse = mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        eval_metrics = eval_metrics._append({
            'Model': model_name,
            'MSE': mse,
            'MAE': mae,
            'R2': r2
        }, ignore_index=True)
    # Print metrics
    print(f"Performance metrics {ticker}...")
    print(f"  MSE: {mse:.6f}")
    print(f"  MAE: {mae:.6f}")
    print(f"  R2: {r2:.6f}")
    # Time Series Cross-Validation
    # Set up TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=5)
    cv_mse = []
    cv_mae = []
    cv_r2 = []
    for train_idx, test_idx in tscv.split(X):
      X_cv_train, X_cv_test = X.iloc[train_idx], X.iloc[test_idx]
      y_cv_train, y_cv_test = y.iloc[train_idx], y.iloc[test_idx]
      # Standardize
      scaler = StandardScaler()
      X_cv_train_scaled = scaler.fit_transform(X_cv_train)
      X_cv_test_scaled = scaler.transform(X_cv_test)
      model_cv = SVR(**best_svr.get_params())
      model_cv.fit(X_cv_train_scaled, y_cv_train)
      y_cv_pred = model_cv.predict(X_cv_test_scaled)
      # Calculate metrics
      cv_mse.append(mean_squared_error(y_cv_test, y_cv_pred))
      cv_mae.append(mean_absolute_error(y_cv_test, y_cv_pred))
      cv_r2.append(r2_score(y_cv_test, y_cv_pred))
    cv_results[model_name] = {
        'MSE': np.mean(cv_mse),
        'MAE': np.mean(cv_mae),
        'R2': np.mean(cv_r2),
        'mse_std': np.std(cv_mse),
        'mae_std': np.std(cv_mae),
        'r2_std': np.std(cv_r2)
        }
    print(f"Cross-Validation {ticker}...")
    print(f"  Avg MSE: {np.mean(cv_mse):.6f} (±{np.std(cv_mse):.6f})")
    print(f"  Avg MAE: {np.mean(cv_mae):.6f} (±{np.std(cv_mae):.6f})")
    print(f"  Avg R2: {np.mean(cv_r2):.6f} (±{np.std(cv_r2):.6f})")
    # Visualize the results
    # Plot predictions vs actual
    plt.figure(figsize=(14, 10))
    for i, (model_name, y_pred) in enumerate(predictions.items(), 1):
        plt.subplot(2, 2, i)
        plt.plot(y_test.index, y_test.values, 'b-', label='Actual')
        plt.plot(y_test.index, y_pred, 'r--', label=f'{model_name} Prediction')
        plt.title(f'{ticker} - {model_name} Predictions')
        plt.xlabel('Date')
        plt.ylabel('Returns')
        plt.legend()
        plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'{ticker}_predictions.png')
    # Store results for the ticker
    results[ticker] = {
        'data': data,
        'eval_metrics': eval_metrics,
        'cv_results': cv_results,
        'predictions': predictions,
        'y_test': y_test
    }

* Random Forest

In [None]:
for ticker in tickers:
    print(f"Processing {ticker}...")
    # Data Preprocessing
    data = fetch_and_process_data(ticker, market_index, period)
    X = data[feature_cols]
    y = data[target_col]
    # Split data (keeping time order) - 80% training, 20% testing
    train_size = int(len(X) * 0.8)
    X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
    y_train, y_test = y.iloc[:train_size], y.iloc[train_size:]
    # Standardize features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    # Model Implementation
    # Random Forest
    print("Training Random Forest...")
    # Hyperparameter tuning
    param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': ['sqrt', 'log2', None],
        'oob_score': [True]
    }
    rf = RandomForestRegressor(random_state=42)
    grid_search = GridSearchCV(rf, param_grid, cv=tscv, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_train_scaled, y_train)
    best_rf = grid_search.best_estimator_
    rf_pred = best_rf.predict(X_test_scaled)
    models['Random Forest'] = best_rf
    predictions['Random Forest'] = rf_pred
    # Feature importance for Random Forest
    importances['Random Forest'] = pd.DataFrame({
        'Feature': feature_cols,
        'Importance': best_rf.feature_importances_
    }).sort_values('Importance', ascending=False)
    print(f"\nBest Random Forest parameters for {ticker}: {grid_search.best_params_}")
    # Model Evaluation
    # Create evaluation metrics dataframe
    for model_name, y_pred in predictions.items():
        mse = mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        eval_metrics = eval_metrics._append({
            'Model': model_name,
            'MSE': mse,
            'MAE': mae,
            'R2': r2
        }, ignore_index=True)
     # Print metrics
    print(f"Performance metrics {ticker}...")
    print(f"  MSE: {mse:.6f}")
    print(f"  MAE: {mae:.6f}")
    print(f"  R2: {r2:.6f}")
    # Time Series Cross-Validation
    # Set up TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=5)
    cv_mse = []
    cv_mae = []
    cv_r2 = []
    for train_idx, test_idx in tscv.split(X):
      X_cv_train, X_cv_test = X.iloc[train_idx], X.iloc[test_idx]
      y_cv_train, y_cv_test = y.iloc[train_idx], y.iloc[test_idx]
      # Standardize
      scaler = StandardScaler()
      X_cv_train_scaled = scaler.fit_transform(X_cv_train)
      X_cv_test_scaled = scaler.transform(X_cv_test)
      model_cv = RandomForestRegressor(**best_rf.get_params())
      model_cv.fit(X_cv_train_scaled, y_cv_train)
      y_cv_pred = model_cv.predict(X_cv_test_scaled)
      # Calculate metrics
      cv_mse.append(mean_squared_error(y_cv_test, y_cv_pred))
      cv_mae.append(mean_absolute_error(y_cv_test, y_cv_pred))
      cv_r2.append(r2_score(y_cv_test, y_cv_pred))
    cv_results[model_name] = {
        'MSE': np.mean(cv_mse),
        'MAE': np.mean(cv_mae),
        'R2': np.mean(cv_r2),
        'mse_std': np.std(cv_mse),
        'mae_std': np.std(cv_mae),
        'r2_std': np.std(cv_r2)
        }
    print(f"Cross-Validation {ticker}...")
    print(f"  Avg MSE: {np.mean(cv_mse):.6f} (±{np.std(cv_mse):.6f})")
    print(f"  Avg MAE: {np.mean(cv_mae):.6f} (±{np.std(cv_mae):.6f})")
    print(f"  Avg R2: {np.mean(cv_r2):.6f} (±{np.std(cv_r2):.6f})")
    # Visualize the results
    # Plot predictions vs actual
    plt.figure(figsize=(14, 10))
    for i, (model_name, y_pred) in enumerate(predictions.items(), 1):
        plt.subplot(2, 2, i)
        plt.plot(y_test.index, y_test.values, 'b-', label='Actual')
        plt.plot(y_test.index, y_pred, 'r--', label=f'{model_name} Prediction')
        plt.title(f'{ticker} - {model_name} Predictions')
        plt.xlabel('Date')
        plt.ylabel('Returns')
        plt.legend()
        plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'{ticker}_predictions.png')
    # Plot feature importance for each model
    plt.figure(figsize=(14, 10))
    for i, (model_name, importance_df) in enumerate(importances.items(), 1):
        top_features = importance_df.head(10)
        plt.subplot(2, 2, i)
        plt.barh(top_features['Feature'], top_features['Importance'])
        plt.title(f'{ticker} - {model_name} Feature Importance')
        plt.xlabel('Importance')
        plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'{ticker}_feature_importance.png')
    # Store results for the ticker
    results[ticker] = {
        'data': data,
        'eval_metrics': eval_metrics,
        'cv_results': cv_results,
        'importances': importances,
        'predictions': predictions,
        'y_test': y_test
    }

* Neural Network

In [None]:
nn_results = {}
for ticker in tickers:
  print(f"Processing {ticker}...")
  # Data Preprocessing
  data = fetch_and_process_data(ticker, market_index, period)
  X = data[feature_cols]
  y = data[target_col]
  # Split data (keeping time order)
  train_size = int(len(X) * 0.8)
  X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
  y_train, y_test = y.iloc[:train_size], y.iloc[train_size:]
  # Standardize features
  scaler = StandardScaler()
  X_train_scaled = scaler.fit_transform(X_train)
  X_test_scaled = scaler.transform(X_test)
  # Neural Network Model
  print("Training Neural Network...")
  model = Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dropout(0.2),
        Dense(32, activation='relu'),
        Dropout(0.2),
        Dense(16, activation='relu'),
        Dense(1)
    ])
  model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
  early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
  history = model.fit(
        X_train_scaled, y_train,
        epochs=100,
        batch_size=32,
        validation_split=0.2,
        callbacks=[early_stopping],
        verbose=1
    )
  # Make predictions
  nn_pred = model.predict(X_test_scaled).flatten()
  models['Neural Networks'] = model
  predictions['Neural Networks'] = nn_pred
  # Calculate metrics
  nn_mse = mean_squared_error(y_test, nn_pred)
  nn_mae = mean_absolute_error(y_test, nn_pred)
  nn_r2 = r2_score(y_test, nn_pred)
  # Store results
  nn_results[ticker] = {
            'model': model,
            'predictions': nn_pred,
            'mse': nn_mse,
            'mae': nn_mae,
            'r2': nn_r2,
            'history': history
        }
  # Print metrics
  print(f"{ticker}...")
  print(f"  MSE: {nn_mse:.6f}")
  print(f"  MAE: {nn_mae:.6f}")
  print(f"  R2: {nn_r2:.6f}")
  # Time Series Cross-Validation
  # Set up TimeSeriesSplit
  tscv = TimeSeriesSplit(n_splits=5)
  cv_mse = []
  cv_mae = []
  cv_r2 = []
  for train_idx, test_idx in tscv.split(X):
        X_cv_train, X_cv_test = X.iloc[train_idx], X.iloc[test_idx]
        y_cv_train, y_cv_test = y.iloc[train_idx], y.iloc[test_idx]
        # Standardize
        scaler = StandardScaler()
        X_cv_train_scaled = scaler.fit_transform(X_cv_train)
        X_cv_test_scaled = scaler.transform(X_cv_test)
        # Create and train NN
        model_cv = Sequential([
            Dense(64, activation='relu', input_shape=(X_cv_train_scaled.shape[1],)),
            Dropout(0.2),
            Dense(32, activation='relu'),
            Dropout(0.2),
            Dense(16, activation='relu'),
            Dense(1)
        ])
        model_cv.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
        early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        model_cv.fit(
            X_cv_train_scaled, y_cv_train,
            epochs=100,
            batch_size=32,
            validation_data=(X_cv_test_scaled, y_cv_test),
            callbacks=[early_stopping],
            verbose=0
        )
        y_cv_pred = model_cv.predict(X_cv_test_scaled).flatten()
        # Calculate metrics
        cv_mse.append(mean_squared_error(y_cv_test, y_cv_pred))
        cv_mae.append(mean_absolute_error(y_cv_test, y_cv_pred))
        cv_r2.append(r2_score(y_cv_test, y_cv_pred))
  cv_results['Neural Networks'] = {
        'MSE': np.mean(cv_mse),
        'MAE': np.mean(cv_mae),
        'R2': np.mean(cv_r2),
        'mse_std': np.std(cv_mse),
        'mae_std': np.std(cv_mae),
        'r2_std': np.std(cv_r2)
    }
  print(f"Cross-Validation {ticker}...")
  print(f"  Avg MSE: {np.mean(cv_mse):.6f} (±{np.std(cv_mse):.6f})")
  print(f"  Avg MAE: {np.mean(cv_mae):.6f} (±{np.std(cv_mae):.6f})")
  print(f"  Avg R2: {np.mean(cv_r2):.6f} (±{np.std(cv_r2):.6f})")
    # Visualize the results
    # Plot predictions vs actual
  plt.figure(figsize=(14, 10))
  for i, (model_name, y_pred) in enumerate(predictions.items(), 1):
        plt.subplot(2, 2, i)
        plt.plot(y_test.index, y_test.values, 'b-', label='Actual')
        plt.plot(y_test.index, y_pred, 'r--', label=f'{model_name} Prediction')
        plt.title(f'{ticker} - {model_name} Predictions')
        plt.xlabel('Date')
        plt.ylabel('Returns')
        plt.legend()
        plt.grid(True)
  plt.tight_layout()
  plt.savefig(f'{ticker}_predictions.png')
  # Store results for the ticker
  results[ticker] = {
        'data': data,
        'eval_metrics': eval_metrics,
        'cv_results': cv_results,
        'predictions': predictions,
        'y_test': y_test
    }