##### Import

In [1]:
import warnings
import papermill as pm
import scrapbook as sb
import pandas as pd
import numpy as np
from scipy.stats import spearmanr
from tqdm import tqdm
import shap
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
# from sklearn.linear_model import LinearRegression, Ridge, Lasso
# from sklearn.pipeline import Pipeline
import os
import gc
import sys

# Filter out warning messages
warnings.filterwarnings('ignore')

# Set pandas display options
pd.set_option('display.max_columns', 10000)
pd.set_option('display.max_rows', 10000)

# Set seaborn style
sns.set_style('whitegrid')

# Add the parent directory to sys.path
sys.path.insert(1, os.path.join(sys.path[0], '..'))

# Index and deciles for data slicing
idx = pd.IndexSlice

Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)


In [2]:
# from pathlib import Path
# import pandas as pd
# from utils import rank_stocks_and_quantile
# # UNSEEN_KEY = '/data/YEAR_20220803_20230803'
# top = 250  # parameters -> papermill
# DATA_STORE = Path(f'data/{top}_dataset.h5')
# with pd.HDFStore(DATA_STORE) as store:
#     # unseen = store[UNSEEN_KEY]
#     print(store.keys())

In [3]:
import gc
import pandas as pd
from utils import rank_stocks_and_quantile

top = 250  # parameters -> papermill

DATA_STORE = Path(f'data/{top}_dataset.h5')
dataset_keys = [
    '/data/YEAR_20200930_20220802',
    '/data/YEAR_20181024_20200929',
    '/data/YEAR_20161116_20181023',
    '/data/YEAR_20141210_20161115'
]
target = 'TARGET_ret_fwd'  # no longer a parameter

# Initialize empty dataset
dataset = pd.DataFrame()

with pd.HDFStore(DATA_STORE) as store:
    for key in dataset_keys:
        df = store[key]
        dataset = pd.concat([dataset, df], ignore_index=False)
        del df
        gc.collect()  # Explicitly call garbage collector

# Rank stocks and quantile
dataset = rank_stocks_and_quantile(dataset, target_substring=target)

# Adjust timezone
dataset.index.set_levels(dataset.index.levels[0].tz_localize(None), level=0, inplace=True)
dataset.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 486114 entries, (Timestamp('2014-12-10 00:00:00'), 'AA') to (Timestamp('2022-08-02 00:00:00'), 'ZTS')
Columns: 622 entries, FEATURE_open to TARGET_ret_fwd_252d_rank_quantiled
dtypes: float32(360), float64(43), int32(198), int64(12), int8(9)
memory usage: 1.2+ GB


In [4]:
import pandas as pd

# Extract the first and last dates from the MultiIndex
start_date = dataset.index.get_level_values(0).min()
end_date = dataset.index.get_level_values(0).max()

# Generate business dates between the start and end date
business_dates = pd.bdate_range(start_date, end_date)

# Count the number of business days
num_business_days = len(business_dates)

print(f"Number of business days between {start_date} and {end_date}: {num_business_days}")

Number of business days between 2014-12-10 00:00:00 and 2022-08-02 00:00:00: 1995


In [5]:
# def get_fi(model):
#     fi = model.feature_importance(importance_type='gain')
#     return pd.Series(fi / fi.sum(), index=model.feature_name())

# def ic_lgbm(preds, train_data):
#     """Custom IC eval metric for lightgbm"""
#     return 'ic', spearmanr(preds, train_data.get_label())[0], True


# def custom_eval_metrics(preds, train_data):
#     from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
#     labels = train_data.get_label()
#     mae = mean_absolute_error(labels, preds)
#     mse = mean_squared_error(labels, preds)
#     rmse = np.sqrt(mse)
#     r2 = r2_score(labels, preds)
#     return [('MAE', mae, False), 
#             ('MSE', mse, False), 
#             ('RMSE', rmse, False), 
#             ('R2', r2, True)]

# def combined_eval_metrics(preds, train_data):
#     ic_result = ic_lgbm(preds, train_data)
#     sharpe_ratio_result = sharpe_ratio_lgbm(preds, train_data)
#     custom_metrics_results = custom_eval_metrics(preds, train_data)
#     return [ic_result, sharpe_ratio_result] + custom_metrics_results

def sharpe_ratio_lgbm(preds, train_data):
    """Custom Sharpe ratio eval metric for lightgbm."""
    labels = train_data.get_label()
    if not isinstance(train_data.data.index, pd.MultiIndex):
        raise ValueError("Expecting a MultiIndex with date as the first level")

    grouped_labels = pd.Series(labels, index=train_data.data.index).groupby(level=0)
    grouped_preds = pd.Series(preds, index=train_data.data.index).groupby(level=0)

    daily_scores = []
    for (_, actuals_for_day), (_, preds_for_day) in zip(grouped_labels, grouped_preds):
        score_for_day = spearmanr(actuals_for_day, preds_for_day)[0]
        daily_scores.append(score_for_day if not np.isnan(score_for_day) else 0)

    sharpe_ratio = np.mean(daily_scores) / (np.std(daily_scores) + 1e-9)
    return 'sharpe_ratio', sharpe_ratio, True


def mean_ic_for_fold(preds, train_data):
    """Compute average IC for the entire fold."""
    labels = train_data.get_label()
    if not isinstance(train_data.data.index, pd.MultiIndex):
        raise ValueError("Expecting a MultiIndex with date as the first level")

    grouped_labels = pd.Series(labels, index=train_data.data.index).groupby(level=0)
    grouped_preds = pd.Series(preds, index=train_data.data.index).groupby(level=0)

    daily_ic_scores = []
    for (_, actuals_for_day), (_, preds_for_day) in zip(grouped_labels, grouped_preds):
        ic_score_for_day = spearmanr(actuals_for_day, preds_for_day)[0]
        daily_ic_scores.append(ic_score_for_day if not np.isnan(ic_score_for_day) else 0)

    return 'IC', np.mean(daily_ic_scores), True

def mean_custom_metrics_for_fold(preds, train_data):
    """Compute average MAE, MSE, RMSE, and R^2 for the entire fold."""
    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

    labels = train_data.get_label()
    if not isinstance(train_data.data.index, pd.MultiIndex):
        raise ValueError("Expecting a MultiIndex with date as the first level")

    grouped_labels = pd.Series(labels, index=train_data.data.index).groupby(level=0)
    grouped_preds = pd.Series(preds, index=train_data.data.index).groupby(level=0)

    mae_scores, mse_scores, rmse_scores, r2_scores = [], [], [], []
    for (_, actuals_for_day), (_, preds_for_day) in zip(grouped_labels, grouped_preds):
        mae_scores.append(mean_absolute_error(actuals_for_day, preds_for_day))
        mse_scores.append(mean_squared_error(actuals_for_day, preds_for_day))
        rmse_scores.append(np.sqrt(mse_scores[-1]))
        r2_scores.append(r2_score(actuals_for_day, preds_for_day))

    return [('MAE', np.mean(mae_scores), False),
            ('MSE', np.mean(mse_scores), False),
            ('RMSE', np.mean(rmse_scores), False),
            ('R2', np.mean(r2_scores), True)]

def combined_fold_metrics(preds, train_data):
    ic_result = mean_ic_for_fold(preds, train_data)
    sharpe_ratio_result = sharpe_ratio_lgbm(preds, train_data)
    custom_metrics_results = mean_custom_metrics_for_fold(preds, train_data)
    
    return [ic_result, sharpe_ratio_result] + custom_metrics_results

In [6]:
def metrics_on_fold(era_scores, weights=None):
    era_scores = pd.Series(era_scores)
    
    # Calculate metrics
    mean_correlation = np.mean(era_scores)
    std_deviation = np.std(era_scores)
    sharpe_ratio = mean_correlation / std_deviation
    max_dd = (era_scores.cummax() - era_scores).max()

    # Smart Sharpe
    smart_sharpe = mean_correlation \
        / (std_deviation + np.std(era_scores.diff()))
    
    # Autocorrelation
    autocorrelation = era_scores.autocorr()

    metrics = pd.Series({
        'mean_correlation': mean_correlation,
        'std_deviation': std_deviation,
        'sharpe_ratio': sharpe_ratio,
        'smart_sharpe': smart_sharpe,
        'autocorrelation': autocorrelation,
        'max_dd': max_dd,
        'min_correlation': era_scores.min(),
        'max_correlation': era_scores.max(),
    })

    if weights:
        normalized_metrics = (metrics - metrics.min()) / (metrics.max() - metrics.min())
        weighted_values = normalized_metrics.multiply(pd.Series(weights))
        metrics["weighted_score"] = weighted_values.sum()

    _ = gc.collect()

    return metrics

In [7]:
import numpy as np

def custom_loss(preds, dataset):
    y = dataset.get_label()
    mse_gradient = 2 * (preds - y)
    corr_gradient = - (np.mean(y) - preds)
    
    gradient = mse_gradient + corr_gradient

    # For simplicity, setting hessian to ones.
    hessian = np.ones_like(y)

    return gradient, hessian

In [8]:
import numpy as np
import pandas as pd
import lightgbm as lgb
import optuna
import mlflow
import mlflow.lightgbm
from optuna.integration import LightGBMPruningCallback
from scipy.stats import spearmanr
import warnings

warnings.filterwarnings('ignore')

evals_result = {}

def get_categoricals(dataset, threshold):
    return [col for col in dataset.columns if \
            dataset[col].nunique() < threshold and \
            dataset[col].ge(0).all() and col.startswith("FEATURE_")]

def objective(trial, data, features):
    # Dynamic categoricals based on the trial's suggested threshold
    cat_threshold = trial.suggest_int('cat_threshold', 5, 50)
    categoricals = get_categoricals(data, cat_threshold)
    # Define the features and label columns
    # features = [col for col in dataset.columns if col.startswith('FEATURE_')]
    params = {
        'boosting': 'gbdt',
        'objective': custom_loss,
        'verbose': -1,
        'metric': 'None',
        'device': 'gpu',
        'num_leaves': trial.suggest_int('num_leaves', 30, 150),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
        'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
        # 'lookahead': trial.suggest_int('lookahead', [1, 5, 21])  # Suggest lookahead as a parameter
        'lookahead': trial.suggest_categorical('lookahead', [1, 5, 21])

    }

    early_stopping = lgb.early_stopping(stopping_rounds=500, verbose=True, first_metric_only=True)
    daily_scores_in_fold = []

    # Instantiate the CV object
    cv = CustomBackwardMultipleTimeSeriesCV(dataset, train_period_length=142, 
                                            test_period_length=21, 
                                            lookahead=1,  # Starting value; we'll adjust it next.
                                            date_idx='date')

    # Update the CV's lookahead based on the trial's suggestion
    cv.update_lookahead(params["lookahead"])

    for train_idx, val_idx in cv:
        # Dynamic target based on the suggested lookahead
        label = f'TARGET_ret_fwd_{params["lookahead"]:02d}d_rank_quantiled'

        train_features = data.loc[train_idx, features]
        train_labels = data.loc[train_idx, label]
        lgb_train = lgb.Dataset(data=train_features, label=train_labels, 
                        categorical_feature=categoricals, free_raw_data=False)

        val_features = data.loc[val_idx, features]
        val_labels = data.loc[val_idx, label]
        lgb_val = lgb.Dataset(data=val_features, label=val_labels, 
                categorical_feature=categoricals, free_raw_data=False)

        model = lgb.train(params=params,
                          train_set=lgb_train,
                          num_boost_round=5000,
                          valid_sets=[lgb_train, lgb_val],
                          valid_names=['train', 'valid_0'],
                          feval=combined_fold_metrics, 
                          callbacks=[lgb.record_evaluation(evals_result),
                                     early_stopping,
                                     LightGBMPruningCallback(trial, 'sharpe_ratio')])

        daily_scores_in_fold.extend(evals_result['valid_0']['sharpe_ratio'])



    weights = {
        'sharpe_ratio': 0.95,       # Primary objective, so highest weight
        'max_dd': -0.1,             # Major risk metric, negative to penalize higher drawdowns
        'autocorrelation': -0.1,    # Penalize strategies showing signs of overfitting
        'std_deviation': -0.025,    # Mild penalty for higher volatility 
        'smart_sharpe': 0.075       # Supplementary to Sharpe Ratio but considering autocorrelation
    }


    metrics = metrics_on_fold(daily_scores_in_fold, weights=weights)
    # score = metrics['sharpe_ratio']
    score = metrics["weighted_score"]
    # Log parameters, metrics, and evaluation results to MLflow
    with mlflow.start_run():
        mlflow.log_params(params)
        
        # Log metrics from metrics dictionary
        mlflow.log_metrics(metrics)
        
        # Logging each score and metric in evals_result
        for valid_set, metrics_dict in evals_result.items():
            for metric, values in metrics_dict.items():
                for idx, value in enumerate(values):
                    metric_name = f"{valid_set}_{metric}_{idx}"
                    mlflow.log_metric(metric_name, value)

        # Log the average sharpe ratio
        mlflow.log_metric("avg_score_across_folds", score)

        # mlflow.lightgbm.log_model(model, "lightgbm_model")

    # score = metrics['sharpe_ratio']
    # print(score)
    return score if not np.isnan(score) else 1e-9

In [9]:
import os
import optuna
from utils import CustomBackwardMultipleTimeSeriesCV


def progress_bar(study, trial, n_trials):
    progress = (trial.number + 1) / n_trials
    best_trial_msg = ""
    if study.best_trial is not None:
        best_trial_msg = f"Best is trial {study.best_trial.number} \
            with value: {study.best_trial.value}."
    print(f'Trial {trial.number + 1}/{n_trials} finished with value: \
        {trial.value} and parameters: {trial.params}. {best_trial_msg}')

# Check if 'study' directory exists, if not, create it.
if not os.path.exists("study"):
    os.makedirs("study")

# Use SQLite to store optimization results.
# The study results are stored in the "study" folder as "study.db".
storage_name = "sqlite:///study/study.db"

# Name of the study. This should be consistent for resuming the study later.
study_name = "lgbm_optimization"

# Try to load the study. If it doesn't exist, create a new one.
study = optuna.create_study(study_name=study_name,
                            storage=storage_name,
                            direction='maximize',
                            load_if_exists=True, 
                            pruner=optuna.pruners.MedianPruner(n_startup_trials=10, \
                            n_warmup_steps=5))

n_trials = 5
features = [col for col in dataset.columns if col.startswith('FEATURE_')]
study.optimize(lambda trial: objective(trial, dataset, features), 
               n_trials=n_trials, 
               callbacks=[lambda study, trial: progress_bar(study, trial, n_trials)])

# Printing the optimization results
print(f'Best trial score: {study.best_trial.value}')
print('Best hyperparameters:')
for key, value in study.best_trial.params.items():
    print(f'{key}: {value}')

best_params = study.best_params
print("Best parameters found by Optuna:")
print(best_params)

# Remove the study database
os.remove("study/study.db")
print("Database has been deleted.")

[I 2023-10-04 14:06:56,118] A new study created in RDB with name: lgbm_optimization


Training until validation scores don't improve for 500 rounds


In [None]:
# Assuming evals_result contains MAE, MSE, RMSE, R2 as well
metrics = ['IC', 'sharpe_ratio', 'MAE', 'MSE', 'RMSE', 'R2']

fig, axes = plt.subplots(len(metrics), 1, figsize=(12, 4 * len(metrics)))

for ax, metric in zip(axes, metrics):
    cv_result = pd.DataFrame({'Train Set': evals_result['train'][metric], 
                              'Validation Set': evals_result['valid_0'][metric]})
    
    ax1 = ax
    ax2 = ax1.twinx()  # instantiate a second axes sharing the same x-axis
    
    ax1.plot(cv_result.index, cv_result['Train Set'], 'g-', label=f'Train Set {metric}')
    ax2.plot(cv_result.index, cv_result['Validation Set'], 'b-', label=f'Validation Set {metric}')
    
    ax1.set_ylabel(f'Train Set {metric}', color='g')
    ax2.set_ylabel(f'Validation Set {metric}', color='b')
    
    if metric != 'R2':
        ax1.axvline(cv_result['Validation Set'].idxmin(), c='k', ls='--', lw=1)
    else:
        ax1.axvline(cv_result['Validation Set'].idxmax(), c='k', ls='--', lw=1)

    ax1.legend(loc='upper left')
    ax2.legend(loc='upper right')

plt.tight_layout()
plt.show()

In [None]:
# # Extract cat_threshold from best_params
# cat_threshold = best_params.get('cat_threshold', 50)  # default to 50 if not in best_params

# Extract cat_threshold from best_params
cat_threshold = best_params.pop('cat_threshold', 50)  # default to 50 if not in best_params


# Determine the categorical columns based on cat_threshold
categoricals = [col for col in dataset.columns if dataset[col].nunique() < cat_threshold 
                and dataset[col].ge(0).all() and col.startswith("FEATURE_")]

# Create the training dataset
lgb_train_all = lgb.Dataset(data=dataset[features], label=dataset[label], 
                            categorical_feature=categoricals, free_raw_data=False)

best_params['force_col_wise'] = True
# Get the best iteration from your previous training
# Get the best iteration from your previous training
optimal_boosting_rounds = evals_result['valid_0']['sharpe_ratio'].index(max(evals_result['valid_0']['sharpe_ratio']))

if optimal_boosting_rounds == 0:
    optimal_boosting_rounds = 1
    print(optimal_boosting_rounds)

best_model = lgb.train(params=best_params,
                       train_set=lgb_train_all,
                       num_boost_round=optimal_boosting_rounds,  # Use the optimal number of rounds
                       feval=combined_eval_metrics,  # Updated feval
                       callbacks=[lgb.record_evaluation(evals_result)])

In [None]:
from pathlib import Path

# Define the models folder path
models = Path("./models")

# Ensure the folder exists
models.mkdir(exist_ok=True)

# Ensure that dataset_key doesn't contain invalid characters like slashes
clean_dataset_key = dataset_key.replace("/", "_")

# Formulate the clean save path
save_path = models / f"{top}{clean_dataset_key}_best_model_{target}.txt"

# Try saving again
best_model.save_model(save_path)
print(f"Model saved to {save_path}")

#### Test on unseen data

In [None]:
from pathlib import Path
import pandas as pd
from utils import rank_stocks_and_quantile

top = 250  # parameters -> papermill
UNSEEN_KEY = '/data/YEAR_20220803_20230803'
UNSEEN_STORE = Path(f'data/{top}_unseen_dataset.h5')
with pd.HDFStore(UNSEEN_STORE) as store:
    test_data = store[UNSEEN_KEY]
    test_data = rank_stocks_and_quantile(test_data, TARGET_col=target)
    # print(store.keys())

In [None]:
test_features = test_data[features]
test_labels = test_data[label]

model_path = f"/home/sayem/Desktop/Project/{save_path}"
# Load the model
best_model = lgb.Booster(model_file=model_path)

y_pred = best_model.predict(test_features)

preds = test_labels.reset_index(name=\
    'actual').assign(predicted=y_pred).set_index(['date', 'ticker'])

# Rename columns to add 'feature_' prefix
cols_to_rename = ['open', 'high', 'low', 'close', 'volume']
new_col_names = ["FEATURE_" + col for col in cols_to_rename]
rename_dict = dict(zip(cols_to_rename, new_col_names))

test_data_renamed = test_data.rename(columns=rename_dict)

# Using the 'merge' method to join on MultiIndex levels 'date' and 'ticker'
preds = preds.reset_index().merge(test_data_renamed[new_col_names].reset_index(), 
                                  on=['ticker', 'date'], 
                                  how='left')

### Only select columns of interest
preds = preds[['date', 'ticker', 'actual', 'predicted'] \
    + new_col_names].set_index(['ticker', 'date'])


In [None]:
preds

In [None]:
preds['predicted'].value_counts()
# print(value_counts)

In [None]:
def daily_spearman(group):
    return spearmanr(group['actual'], group['predicted'])[0]

daily_correlations = preds.groupby('date').apply(daily_spearman)

In [None]:
daily_correlations.mean()

In [None]:
daily_correlations.std()

In [None]:
# Calculate the mean of daily correlations
mean_daily_correlation = daily_correlations.mean()

# Calculate Sharpe ratio for each date
fold_sharpe_ratio = papermill_fold_scores = daily_correlations / daily_correlations.std()

papermill_fold_scores_df = papermill_fold_scores.to_frame()
papermill_fold_scores_df.columns = papermill_fold_scores_df.columns.astype(str)
sb.glue("papermill_era_scores", papermill_fold_scores_df, display=True)

In [None]:
# import matplotlib.pyplot as plt

# # Create a list of colors based on the sign of the Sharpe Ratios
# colors = ['blue' if value > 0 else 'red' for value in fold_sharpe_ratio]

# plt.figure(figsize=(12,6))
# fold_sharpe_ratio.plot(kind='bar', color=colors)
# plt.title('Daily Sharpe Ratios')
# plt.xlabel('Date')
# plt.ylabel('Sharpe Ratio')
# plt.grid(axis='y')
# plt.tight_layout()
# plt.axhline(y=0, color='black', linestyle='-')  # Here's where we add the horizontal line at y=0
# plt.xticks(rotation=45)  # rotates the x-axis labels for better visibility
# plt.show()

In [None]:
# plot_dir = Path("plots")
# plot_dir.mkdir(exist_ok=True)
# plot_path = plot_dir / f"sharpe_ratios_{key}.png"
# plt.savefig(plot_path)
# plt.close()

# papermill_plot_path_str = str(plot_path)  # Convert to string
# sb.glue("papermill_plot_path", papermill_plot_path_str, display=True)  # Glue the string

In [None]:
lr_r, lr_p = spearmanr(preds.actual, preds.predicted)
print(f'Information Coefficient (overall): {lr_r:.3%} (p-value: {lr_p:.8%})')

# Return the Information Coefficient and its p-value
information_coefficient = lr_r
p_value = lr_p

# information_coefficient = papermill_information_coefficient, p_value = papermill_p_value
sb.glue("information_coefficient", information_coefficient, display=True)
sb.glue("p_value", p_value, display=True)

In [None]:
# papermill_plot_path_str = str(plot_path)  # Convert to string
# sb.glue("papermill_plot_path", papermill_plot_path_str)  # Glue the string

# sb.glue("information_coefficient", information_coefficient)
# sb.glue("p_value", p_value)

In [None]:
# import pandas as pd
# import alphalens as al

# def generate_alphalens_tearsheet(df: pd.DataFrame, label_col: str, price_col: str) -> None:
#     """
#     Generate the Alphalens full tearsheet given the input dataframe, 
#     label column, and price data column.
#     """
#     # Extract the factor and trade_prices series
#     factor = df[label_col]
#     trade_prices = df[price_col]
    
#     # Remove duplicated indices from factor and trade_prices
#     factor = factor[~factor.index.duplicated(keep='first')]
#     trade_prices = trade_prices[~trade_prices.index.duplicated(keep='first')]
    
#     # Convert the trade_prices series into unstacked format
#     trade_prices_unstacked = trade_prices.unstack(level='ticker')
    
#     # Ensure the factor's DatetimeIndex level has the same timezone as trade_prices_unstacked
#     if isinstance(factor.index, pd.MultiIndex):
#         level_0 = factor.index.get_level_values(0)
#         if hasattr(level_0, 'tz') and level_0.tz is not None:
#             if level_0.tz != trade_prices_unstacked.index.tz:
#                 factor.index.set_levels(level_0.tz_convert(trade_prices_unstacked.index.tz), \
#                     level=0, inplace=True)
#         factor = factor.swaplevel().sort_index()
#     else:
#         if hasattr(factor.index, 'tz') and factor.index.tz is not None:
#             if factor.index.tz != trade_prices_unstacked.index.tz:
#                 factor.index = factor.index.tz_convert(trade_prices_unstacked.index.tz)

#     # Create the factor_data dataframe with forward returns
#     alphalen_analysis = al.utils.get_clean_factor_and_forward_returns(
#         factor=factor,
#         prices=trade_prices_unstacked,
#         periods=[1, 5, 10],
#         max_loss=0.6
#     )
#     factor_returns = al.performance.factor_returns(alphalen_analysis)
#     sharpe_ratios = factor_returns.mean() / factor_returns.std()

#     print("\nSharpe Ratios:\n", sharpe_ratios)
#     alphalen_analysis = alphalen_analysis[~alphalen_analysis.index.duplicated(keep='first')]
#     alphalen_analysis = alphalen_analysis.groupby(level=[0, 1]).mean()
#     return alphalen_analysis

# # target = 'actual'
# alphalens_analysis = generate_alphalens_tearsheet(preds, \
#     label_col='actual', price_col='FEATURE_close')

# import alphalens as al
# al.tears.create_full_tear_sheet(alphalens_analysis)