In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import xgboost as xgb

import bt
class SelectWhere(bt.Algo):

    """
    Selects securities where the value is True on the current date (target.now).
    Args:
        * signal: DataFrame containing the signal (boolean DataFrame)
    """
    def __init__(self, signal):
        self.signal = signal
    # - - - - - - - - - -
    def __call__(self, target):
        # get signal on target.now
        if target.now in self.signal.index: 
            sig = self.signal.loc[target.now]

            # get indices where true as list
            selected = list(sig.index[sig])

            # save in temp - this will be used by the weighing algo
            target.temp['selected'] = selected  

        # return True because we want to keep on moving down the stack
        return True

In [None]:
df = pd.read_csv('ticker/it_tickers.csv')
tickers= np.array(df['Symbol'])
tickers

In [None]:
best_trend_strategy = {}
trend_strategy = {}
ticker_strategies={}

In [None]:
def positive_return(df):
    df['positive_return_dummy'] = (df[f'Daily Return(%)_{ticker}'] > 0).astype(int)

def calculate_best_ema_ratio(df, ticker):
    ema_ratios = {}
    '''
    ema_ratios['EMA5/10'] = (df[f'EMA5_{ticker}'] / df[f'EMA10_{ticker}']).mean() / (df[f'EMA5_{ticker}'] / df[f'EMA10_{ticker}']).std()
    ema_ratios['EMA10/30'] = (df[f'EMA10_{ticker}'] / df[f'EMA30_{ticker}']).mean() / (df[f'EMA10_{ticker}'] / df[f'EMA30_{ticker}']).std()
    ema_ratios['EMA30/60'] = (df[f'EMA30_{ticker}'] / df[f'EMA60_{ticker}']).mean() / (df[f'EMA30_{ticker}'] / df[f'EMA60_{ticker}']).std()
    ema_ratios['EMA30/100'] = (df[f'EMA30_{ticker}'] / df[f'EMA100_{ticker}']).mean() / (df[f'EMA30_{ticker}'] / df[f'EMA100_{ticker}']).std()
    '''
    df['EMA5/10'] = df[f'EMA5_{ticker}'] / df[f'EMA10_{ticker}']
    df['EMA10/30'] = df[f'EMA10_{ticker}'] / df[f'EMA30_{ticker}']
    df['EMA30/60'] = df[f'EMA30_{ticker}'] / df[f'EMA60_{ticker}']
    df['EMA30/100'] = df[f'EMA30_{ticker}'] / df[f'EMA100_{ticker}']
    
    #df['SMA5/10'] = np.where()
    df['forward_return'] = df[f'Daily_Return_{ticker}']
    df['R_EMA5/10'] = (df['EMA5/10']>1).astype(int)*2-1
    df['R_EMA10/30'] = (df['EMA10/30']>1).astype(int)*2-1
    df['R_EMA30/60'] = (df['EMA30/60']>1).astype(int)*2-1
    df['R_EMA30/100'] = (df['EMA30/100']>1).astype(int)*2-1
    ema_ratios['EMA5/10'] = (df['R_EMA5/10'].shift(1)*df['forward_return']).mean() /(df['R_EMA5/10'].shift(1)*df['forward_return']).std()*math.sqrt(260)
    ema_ratios['EMA10/30'] = (df['R_EMA10/30'].shift(1)*df['forward_return']).mean() /(df['R_EMA10/30'].shift(1)*df['forward_return']).std()*math.sqrt(260)
    ema_ratios['EMA30/60'] = (df['R_EMA30/60'].shift(1)*df['forward_return']).mean() /(df['R_EMA30/60'].shift(1)*df['forward_return']).std()*math.sqrt(260)
    ema_ratios['EMA30/100'] = (df['R_EMA30/100'].shift(1)*df['forward_return']).mean() /(df['R_EMA30/100'].shift(1)*df['forward_return']).std()*math.sqrt(260)

    
    print(ticker)
    print(type(ticker))
    best_ratio = max(ema_ratios, key=ema_ratios.get)
    print(ema_ratios[best_ratio])
    best_trend_strategy[ticker].add((best_ratio, ema_ratios[best_ratio]))
    return best_ratio

def calculate_ema_ratio(df, ticker,selected_ratio):
    print(selected_ratio)
    if selected_ratio == 'EMA5/10':
        df['ema_ratio'] = df[f'EMA5_{ticker}'] / df[f'EMA10_{ticker}']
    elif selected_ratio == 'EMA10/30':
        df['ema_ratio'] = df[f'EMA10_{ticker}'] / df[f'EMA30_{ticker}']
    elif selected_ratio == 'EMA30/60':
        df['ema_ratio'] = df[f'EMA30_{ticker}'] / df[f'EMA60_{ticker}']
    elif selected_ratio == 'EMA30/100':
        df['ema_ratio'] = df[f'EMA30_{ticker}'] / df[f'EMA100_{ticker}']
    else:
        raise ValueError("Invalid ratio selected.")
    df['R_ema_ratio'] = (df['ema_ratio']>1).astype(int)*2-1
    df['ret_ema_ratio'] = df['R_ema_ratio'].shift(1)*df[f'Daily_Return_{ticker}']
    
def calculate_best_sma_ratio(df, ticker):
    sma_ratios = {}
    #sma_ratios['SMA5/10'] = (df[f'SMA5_{ticker}'] / df[f'SMA10_{ticker}']).mean() / (df[f'SMA5_{ticker}'] / df[f'SMA10_{ticker}']).std()
    df['SMA5/10'] = df[f'SMA5_{ticker}'] / df[f'SMA10_{ticker}']
    df['SMA10/30'] = df[f'SMA10_{ticker}'] / df[f'SMA30_{ticker}']
    df['SMA30/60'] = df[f'SMA30_{ticker}'] / df[f'SMA60_{ticker}']
    df['SMA30/100'] = df[f'SMA30_{ticker}'] / df[f'SMA100_{ticker}']
    
    #df['SMA5/10'] = np.where()
    df['forward_return'] = df[f'Daily_Return_{ticker}']
    df['R_SMA5/10'] = (df['SMA5/10']>1).astype(int)*2-1
    df['R_SMA10/30'] = (df['SMA10/30']>1).astype(int)*2-1
    df['R_SMA30/60'] = (df['SMA30/60']>1).astype(int)*2-1
    df['R_SMA30/100'] = (df['SMA30/100']>1).astype(int)*2-1
    sma_ratios['SMA5/10'] = (df['R_SMA5/10'].shift(1)*df['forward_return']).mean() /(df['R_SMA5/10'].shift(1)*df['forward_return']).std()*math.sqrt(260)
    sma_ratios['SMA10/30'] = (df['R_SMA10/30'].shift(1)*df['forward_return']).mean() /(df['R_SMA10/30'].shift(1)*df['forward_return']).std()*math.sqrt(260)
    sma_ratios['SMA30/60'] = (df['R_SMA30/60'].shift(1)*df['forward_return']).mean() /(df['R_SMA30/60'].shift(1)*df['forward_return']).std()*math.sqrt(260)
    sma_ratios['SMA30/100'] = (df['R_SMA30/100'].shift(1)*df['forward_return']).mean() /(df['R_SMA30/100'].shift(1)*df['forward_return']).std()*math.sqrt(260)

    #sma_ratios['SMA30/100'] = (df[f'SMA30_{ticker}'] / df[f'SMA100_{ticker}']).mean() / (df[f'SMA30_{ticker}'] / df[f'SMA100_{ticker}']).std()
    #print(ticker)
    #print(type(ticker))
    
    best_ratio = max(sma_ratios, key=sma_ratios.get)
    print(sma_ratios[best_ratio])
    df['RetMACD'] = df[f'RetMACD_{ticker}'].shift(1)
    best_trend_strategy[ticker]= set()
    best_trend_strategy[ticker].add((best_ratio, sma_ratios[best_ratio]))
    best_trend_strategy[ticker].add(('RetMACD',df['RetMACD'].mean()/df['RetMACD'].std()*math.sqrt(260)))
    return best_ratio

def calculate_sma_ratio(df, ticker,selected_ratio):
    print(selected_ratio)
    if selected_ratio == 'SMA5/10':
        df['sma_ratio'] =  df[f'SMA5_{ticker}'] / df[f'SMA10_{ticker}']
        
    elif selected_ratio == 'SMA10/30':
        df['sma_ratio'] =  df[f'SMA10_{ticker}'] / df[f'SMA30_{ticker}']
        
    elif selected_ratio == 'SMA30/60':
        df['sma_ratio'] =  df[f'SMA30_{ticker}'] / df[f'SMA60_{ticker}']

    elif selected_ratio == 'SMA30/100':
        df['sma_ratio'] =  df[f'SMA30_{ticker}'] / df[f'SMA100_{ticker}']  
    else:
        raise ValueError("Invalid ratio selected.")
    df['RetMACD'] = df[f'RetMACD_{ticker}'].shift(1)
    df['R_sma_ratio'] = (df['sma_ratio']>1).astype(int)*2-1
    df['ret_sma_ratio'] = df['R_sma_ratio'].shift(1)*df[f'Daily_Return_{ticker}']
def calculate_best_volatility_ratio(df, ticker):
    vol_ratios = {}
    '''
    vol_ratios['Volatility5/10'] = (df[f'Vol5_{ticker}'] / df[f'Vol10_{ticker}']).mean()/(df[f'Vol5_{ticker}'] / df[f'Vol10_{ticker}']).std()
    vol_ratios['Volatility10/20'] = (df[f'Vol10_{ticker}'] / df[f'Vol20_{ticker}']).mean()/(df[f'Vol10_{ticker}'] / df[f'Vol20_{ticker}']).std()
    vol_ratios['Volatility20/60'] = (df[f'Vol20_{ticker}'] / df[f'Vol60_{ticker}']).mean()/(df[f'Vol20_{ticker}'] / df[f'Vol60_{ticker}']).std()
    '''
    df['Vol5/10'] = df[f'Vol5_{ticker}'] / df[f'Vol10_{ticker}']
    df['Vol10/20'] = df[f'Vol10_{ticker}'] / df[f'Vol20_{ticker}']
    df['Vol20/60'] = df[f'Vol20_{ticker}'] / df[f'Vol60_{ticker}']
    #df['SMA30/100'] = df[f'SMA30_{ticker}'] / df[f'SMA100_{ticker}']
    
    #df['SMA5/10'] = np.where()
    df['forward_return'] = df[f'Daily_Return_{ticker}']
    df['R_Vol5/10'] = (df['Vol5/10']>1).astype(int)*2-1
    df['R_Vol10/20'] = (df['Vol10/20']>1).astype(int)*2-1
    df['R_Vol20/60'] = (df['Vol20/60']>1).astype(int)*2-1
    #df['R_SMA30/100'] = (df['SMA30/100']>1).astype(int)*2-1
    vol_ratios['Vol5/10'] = (df['R_Vol5/10'].shift(1)*df['forward_return']).mean() /(df['R_Vol5/10'].shift(1)*df['forward_return']).std()*math.sqrt(260)
    vol_ratios['Vol10/20'] = (df['R_Vol10/20'].shift(1)*df['forward_return']).mean() /(df['R_Vol10/20'].shift(1)*df['forward_return']).std()*math.sqrt(260)
    vol_ratios['Vol20/60'] = (df['R_Vol20/60'].shift(1)*df['forward_return']).mean() /(df['R_Vol20/60'].shift(1)*df['forward_return']).std()*math.sqrt(260)
    #sma_ratios['SMA30/100'] = (df['R_SMA30/100'].shift(1)*df['forward_return']).mean() /(df['R_SMA30/100'].shift(1)*df['forward_return']).std()*math.sqrt(260)

    best_ratio = max(vol_ratios, key=vol_ratios.get)
    best_trend_strategy[ticker].add((best_ratio, vol_ratios[best_ratio]))
    return best_ratio
def calculate_volitility_ratio(df,ticker, selected_ratio):
    print(selected_ratio)
    if selected_ratio == 'Vol5/10':
        df['volatility_ratio'] = df[f'Vol5_{ticker}'] / df[f'Vol10_{ticker}']
    elif selected_ratio == 'Vol10/20':
        df['volatility_ratio'] = df[f'Vol10_{ticker}'] / df[f'Vol20_{ticker}']
    elif selected_ratio == 'Vol20/60':
        df['volatility_ratio'] = df[f'Vol20_{ticker}'] / df[f'Vol60_{ticker}']
    else:
        raise ValueError("Invalid ratio selected.")
    df['R_vol_ratio'] = (df['volatility_ratio']>1).astype(int)*2-1
    df['ret_vol_ratio'] = df['R_vol_ratio'].shift(1)*df[f'Daily_Return_{ticker}']
def calculate_sharpe_ratio(df,signals_df, signal, ticker):
    column_name = f'Daily_Return_{ticker}_L01d'
    df[f'ret_{signal}'] = signals_df[f'signal_{signal}'].shift(1) * df[column_name]
    df[F'{signal}'] = signals_df[f'signal_{signal}']
    daily_return = df[f'ret_{signal}'].dropna()
    sharpe_ratio = (daily_return.mean() / daily_return.std()) * math.sqrt(260)  # Assuming 252 trading days in a year
    
    return sharpe_ratio

def calculate_best_strategy(test_df, signals, ticker):
    sharpe_ratios = {}
    
    for signal in signals:
        sharpe_ratio = calculate_sharpe_ratio(test_df, signals_df,signal, ticker)
        sharpe_ratios[signal] = sharpe_ratio
    
    best_strategy = max(sharpe_ratios, key=sharpe_ratios.get)
    
    return best_strategy, sharpe_ratios

def merge_data(df, ticker, new_csv, date_column):
    
    df2 = pd.read_csv(new_csv)
    
    # Rename and convert date column to datetime
    df2 = df2.rename(columns={'time': date_column})
    df2[date_column] = pd.to_datetime(df2[date_column])
    
    # Merge sentiment data with existing DataFrame
    merged_df = df.merge(df2, how='outer', on=date_column)
    
    # Drop rows with NaN values in the 'Open_{ticker}' column
    merged_df = merged_df.dropna(subset=[f'Open_{ticker}'])
    
    return merged_df


In [None]:
avoid_tickers=['ANET','CDW']

In [None]:
for ticker in tickers:
    file_path = f'ticker/data/{ticker}.csv'
    df = pd.read_csv(file_path)
    sheet_name = ticker
    if ticker in avoid_tickers:
        continue
    date_column = f'Date_{sheet_name}'
    df[date_column] = pd.to_datetime(df[date_column])

    df.drop(columns=[f'Hit?_{sheet_name}', f'Hit?_{sheet_name}',f'Hit?_{sheet_name}', f'HitAt_2.2_{sheet_name}',
                    f'HitAt_2.0_{sheet_name}',f'HitAt_1.8_{sheet_name}',f'ExitPr_2.2_{sheet_name}',
                    f'ExitPr_2.0_{sheet_name}',f'ExitPr_1.8_{sheet_name}', f'ret_2.2_{sheet_name}',
                    f'Ret2.0_{sheet_name}', f'Ret_1.8_{sheet_name}'], inplace=True)
    df.drop(columns=[f'Hit?_{sheet_name}.1'], inplace=True)
    df.drop(columns = [f'Hit?_{sheet_name}.2'], inplace=True)

    #print(df)
    #output_path =f'ticker/{sheet_name}.csv'
    #df.to_csv(output_path,index=False)
    target_variable = 'positive_return_dummy'
    positive_return(df)
    df = df.rename(columns = {f'Daily Return(%)_{ticker}':f'Daily_Return_{ticker}'})

    start_date = '2018-01-01'
    end_date = '2022-12-31'
    train_df = train_df = df[(df[date_column] >= start_date) & (df[date_column] <= end_date)]
    train_df['positive_return_dummy'] = train_df['positive_return_dummy'].shift(-1)

    start_date_test = '2023-01-01'
    end_date_test = '2023-12-31'
    start_date_out_of_sample = '2024-01-01'

    test_df = df[(df[date_column] >= start_date_test) & (df[date_column] <= end_date_test)]
    test_df['positive_return_dummy'] = test_df['positive_return_dummy'].shift(-1)
    test_df.dropna()
    print(len(test_df))

    out_of_sample_df = df[df[date_column] >= start_date_out_of_sample]
    out_of_sample_df['positive_return_dummy'] = out_of_sample_df['positive_return_dummy'].shift(-1)
    out_of_sample_df.dropna()
    print(len(out_of_sample_df))

    calculate_sma_ratio(train_df,ticker, calculate_best_sma_ratio(train_df,ticker))
    calculate_sma_ratio(test_df, ticker,calculate_best_sma_ratio(train_df,ticker))
    calculate_sma_ratio(out_of_sample_df,ticker, calculate_best_sma_ratio(train_df,ticker))

    calculate_ema_ratio(train_df,ticker, calculate_best_ema_ratio(train_df,ticker))
    calculate_ema_ratio(test_df,ticker, calculate_best_ema_ratio(train_df,ticker))
    calculate_ema_ratio(out_of_sample_df,ticker, calculate_best_ema_ratio(train_df,ticker))

    calculate_volitility_ratio(train_df,ticker,calculate_best_volatility_ratio(train_df,ticker))
    calculate_volitility_ratio(test_df,ticker,calculate_best_volatility_ratio(train_df,ticker))
    calculate_volitility_ratio(out_of_sample_df,ticker,calculate_best_volatility_ratio(train_df,ticker))
    print(test_df)


    train_df[f'Date_{ticker}'] = pd.to_datetime(train_df[f'Date_{ticker}'])
    test_df[f'Date_{ticker}'] = pd.to_datetime(test_df[f'Date_{ticker}'])
    out_of_sample_df[f'Date_{ticker}'] = pd.to_datetime(out_of_sample_df[f'Date_{ticker}'])

    train_df = train_df.dropna()
    test_df = test_df.dropna()
    out_of_sample_df = out_of_sample_df.dropna()

    date_column = f'Date_{ticker}'

    train_df = merge_data(train_df, ticker, f'ticker/sentiment_count_{ticker}.csv', date_column)
    test_df = merge_data(test_df, ticker, f'ticker/sentiment_count_{ticker}.csv', date_column)
    out_of_sample_df = merge_data(out_of_sample_df, ticker, f'ticker/sentiment_count_{ticker}.csv', date_column)

    train_df.fillna(0, inplace = True)
    test_df.fillna(0, inplace = True)
    out_of_sample_df.fillna(0,inplace=True)
    
    
    for col in train_df.columns:
        if col != 'positive_return_dummy' and not col.endswith('_L01d'):
            new_col_name = col + '_L01d'
            train_df.rename(columns={col: new_col_name}, inplace=True)
            test_df.rename(columns={col: new_col_name}, inplace=True)
            out_of_sample_df.rename(columns={col: new_col_name}, inplace=True)
    
    included_columns = ['volatility_ratio_L01d',
                        'ema_ratio_L01d',
                        'sma_ratio_L01d',
                        #f'RetMACD_{ticker}_L01d',
                        #'negative_L01d','neutral_L01d', 'positive_L01d', 'negative_max_L01d','negative_avg_L01d', 'positive_max_L01d', 'positive_avg_L01d',
                        ]
    selected_columns_formula = f"{target_variable} ~ {' + '.join(included_columns)}"
    print(selected_columns_formula)
    #best_trend_strategy[ticker].appned(('RetMACD', train_df[f'RetMACD_{ticker}_L01d'].mean()/ train_df[f'RetMACD_{ticker}_L01d'].std()))
    #logit_model
    mod = smf.logit(formula=selected_columns_formula, data=train_df)
    logit_model = mod.fit()
    print(logit_model.summary())
    
    
    y_true = test_df['positive_return_dummy']
    #print(roc_auc_logit)
    
    y_train = train_df['positive_return_dummy']
    X_train = train_df[included_columns]
    tree_model = DecisionTreeClassifier(random_state=42)
    tree_model.fit(X_train, y_train)
    rf_model = RandomForestClassifier()
    rf_model.fit(X_train, y_train)
    
    dtrain = xgb.DMatrix(X_train, label=y_train)
    params = {
        'eval_metric': 'logloss',
        'learning_rate': 0.01,
        'n_estimators': 200,
    }
    xgb_model = xgb.train(params, dtrain, num_boost_round=100)
    dtest = xgb.DMatrix(test_df[included_columns], label = test_df['positive_return_dummy'])
    y_pred_logit = logit_model.predict(test_df[included_columns])
    y_pred_tree = tree_model.predict(test_df[included_columns])
    y_pred_rf = rf_model.predict(test_df[included_columns])
    y_pred_xgb = xgb_model.predict(dtest)
    
    ensemble_predictions = np.maximum.reduce([y_pred_logit,y_pred_tree,y_pred_rf,y_pred_xgb])
    
    
    
    roc_auc_logit = roc_auc_score(y_true, y_pred_logit)
    roc_auc_tree = roc_auc_score(y_true,y_pred_tree)
    roc_auc_rf = roc_auc_score(y_true, y_pred_rf)
    roc_auc_xgb = roc_auc_score(y_true, y_pred_xgb)
    
    from sklearn.metrics import accuracy_score

    
    
    threshold = 0.5
    signal_logit = (y_pred_logit>threshold).astype(bool)
    signal_tree = (y_pred_tree > threshold).astype(bool)
    signal_rf = (y_pred_rf > threshold).astype(bool)
    signal_model_xgb = (y_pred_xgb > threshold).astype(bool)
    signal_ensemble = (ensemble_predictions>threshold).astype(bool)
    
    pred_logit = pd.DataFrame({'y_pred':signal_logit, 'y_true':test_df['positive_return_dummy']})
    pred_tree = pd.DataFrame({'y_pred':signal_tree, 'y_true':test_df['positive_return_dummy']})
    pred_rf = pd.DataFrame({'y_pred':signal_rf, 'y_true':test_df['positive_return_dummy']})
    pred_xgb = pd.DataFrame({'y_pred':signal_model_xgb, 'y_true':test_df['positive_return_dummy']})
    
    accuracy_logit = accuracy_score(pred_logit['y_true'], pred_logit['y_pred'])
    accuracy_tree = accuracy_score(pred_tree['y_true'], pred_tree['y_pred'])
    accuracy_rf = accuracy_score(pred_rf['y_true'], pred_rf['y_pred'])
    accuracy_xgb = accuracy_score(pred_xgb['y_true'],pred_xgb['y_pred'])
    
    signal_logit = pd.DataFrame({'signal_logit': signal_logit,'Date': test_df[f'Date_{ticker}_L01d']})
    signal_logit.set_index('Date', inplace=True)
    signal_tree = pd.DataFrame({'signal_tree': signal_tree,'Date': test_df[f'Date_{ticker}_L01d']})
    signal_tree.set_index('Date', inplace=True)
    signal_rf = pd.DataFrame({'signal_rf': signal_rf, 'Date': test_df[f'Date_{ticker}_L01d']})
    signal_rf.set_index('Date', inplace=True)
    signals_df_xgb = pd.DataFrame({'signal_xgb': signal_model_xgb, 'Date': test_df[f'Date_{ticker}_L01d']})
    signals_df_xgb.set_index('Date', inplace=True)
    signals_df_ensemble = pd.DataFrame({'signal_ensemble': signal_ensemble, 'Date': test_df[f'Date_{ticker}_L01d']})
    signals_df_ensemble.set_index('Date', inplace=True)
    
    #print(signal_tree.head())
    #print(signal_rf.head())
    #print(signal_model_xgb.head())
    
    price_logit = pd.DataFrame({'Date': test_df[f'Date_{ticker}_L01d'],'signal_logit': test_df[f'Close_{ticker}_L01d']})
    price_logit.set_index('Date', inplace=True)
    price_tree = pd.DataFrame({'Date': test_df[f'Date_{ticker}_L01d'],'signal_tree': test_df[f'Close_{ticker}_L01d']})
    price_tree.set_index('Date', inplace=True)
    price_rf = pd.DataFrame({'Date': test_df[f'Date_{ticker}_L01d'],'signal_rf': test_df[f'Close_{ticker}_L01d']})
    price_rf.set_index('Date', inplace=True)
    price_df_xgb = pd.DataFrame({'signal_xgb': test_df[f'Close_{ticker}_L01d'], 'Date': test_df[f'Date_{ticker}_L01d']})
    price_df_xgb.set_index('Date', inplace=True)
    price_df_ensemble = pd.DataFrame({'signal_ensemble': test_df[f'Close_{ticker}_L01d'], 'Date': test_df[f'Date_{ticker}_L01d']})
    price_df_ensemble.set_index('Date', inplace=True)
    
    
    stratergy_logit = bt.Strategy(
        'Strategy_logit',
        [bt.algos.SelectWhere(signal=signal_logit), bt.algos.WeighEqually(), bt.algos.Rebalance()]
    )
    bt_result_logit = bt.Backtest(stratergy_logit, price_logit)
    res_logit = bt.run(bt_result_logit)
    
    stratergy_tree = bt.Strategy(
        'Strategy_tree',
        [bt.algos.SelectWhere(signal=signal_tree), bt.algos.WeighEqually(), bt.algos.Rebalance()]
    )
    bt_result_tree = bt.Backtest(stratergy_tree, price_tree)
    res_tree = bt.run(bt_result_tree)
    
    stratergy_rf = bt.Strategy(
        'Strategy_rf',
        [bt.algos.SelectWhere(signal=signal_rf), bt.algos.WeighEqually(), bt.algos.Rebalance()]
    )
    bt_result_rf = bt.Backtest(stratergy_rf, price_rf)
    res_rf = bt.run(bt_result_rf)

    stratergy_xgb = bt.Strategy(
        'Strategy_xgb',
        [bt.algos.SelectWhere(signal=signals_df_xgb), bt.algos.WeighEqually(), bt.algos.Rebalance()]
    )
    bt_result_xgb = bt.Backtest(stratergy_xgb, price_df_xgb)
    res_xgb = bt.run(bt_result_xgb)
    
    stratergy_ensemble = bt.Strategy(
        'Strategy_ensemble',
        [bt.algos.SelectWhere(signal=signals_df_ensemble), bt.algos.WeighEqually(), bt.algos.Rebalance()]
    )
    bt_result_ensemble = bt.Backtest(stratergy_ensemble, price_df_ensemble)
    res_ensemble = bt.run(bt_result_ensemble)
    
    
    from tabulate import tabulate
    table_data = [
        ['LOGIT', res_logit.stats['Strategy_logit']['total_return'], res_logit.stats['Strategy_logit']['calmar'], res_logit.stats['Strategy_logit']['daily_sharpe'], res_logit.stats['Strategy_logit']['daily_sortino'],res_logit.stats['Strategy_logit']['max_drawdown'], res_logit.stats['Strategy_logit']['daily_vol']],
        ['DT', res_tree.stats['Strategy_tree']['total_return'], res_tree.stats['Strategy_tree']['calmar'], res_tree.stats['Strategy_tree']['daily_sharpe'], res_tree.stats['Strategy_tree']['daily_sortino'],res_tree.stats['Strategy_tree']['max_drawdown'], res_tree.stats['Strategy_tree']['daily_vol']],
        ['RF', res_rf.stats['Strategy_rf']['total_return'], res_rf.stats['Strategy_rf']['calmar'], res_rf.stats['Strategy_rf']['daily_sharpe'], res_rf.stats['Strategy_rf']['daily_sortino'], res_rf.stats['Strategy_rf']['max_drawdown'],res_rf.stats['Strategy_rf']['daily_vol']],
        ['XGB', res_xgb.stats['Strategy_xgb']['total_return'], res_xgb.stats['Strategy_xgb']['calmar'], res_xgb.stats['Strategy_xgb']['daily_sharpe'], res_xgb.stats['Strategy_xgb']['daily_sortino'], res_xgb.stats['Strategy_xgb']['max_drawdown'],res_xgb.stats['Strategy_xgb']['daily_vol']],
        ['ENSEMBLE', res_ensemble.stats['Strategy_ensemble']['total_return'], res_ensemble.stats['Strategy_ensemble']['calmar'], res_ensemble.stats['Strategy_ensemble']['daily_sharpe'], res_ensemble.stats['Strategy_ensemble']['daily_sortino'], res_ensemble.stats['Strategy_ensemble']['max_drawdown'],res_ensemble.stats['Strategy_ensemble']['daily_vol']],

    ]
    
    headers = ['Model', 'Total Return', 'Calmar', 'Daily Sharpe', 'Sortino','Drawdown', 'Daily Volatility']
    print(tabulate(table_data, headers=headers, tablefmt="grid"))
    
    #logit_signal = (y_pred_logit > threshold).astype(int)
    #tree_signal = (y_pred_tree > threshold).astype(int)
    #rf_signal = (y_pred_rf>threshold).astype(int)
    #xgb_signal = (y_pred_xgb>threshold).astype(int)
    
    signals_df = pd.DataFrame({
        'signal_logit': (y_pred_logit > threshold).astype(int)* 2 - 1,
        'signal_tree': (y_pred_tree > threshold).astype(int)* 2 - 1,
        'signal_rf': (y_pred_rf > threshold).astype(int)* 2 - 1,
        'signal_xgb': (y_pred_xgb > threshold).astype(int)* 2 - 1,
        'signal_ensemble':(ensemble_predictions>threshold).astype(int)*2-1
    })
    
    plt.figure(figsize=(10, 6))
    plt.plot(res_logit.prices, label = 'logit')
    plt.plot(res_tree.prices, label='Decision Tree')
    plt.plot(res_rf.prices, label='Random Forest')
    plt.plot(res_xgb.prices, label = 'XGB')
    plt.plot(res_ensemble.prices, label = 'ENSEMBLE')
    plt.title(f'{ticker}: logit vs Decision_tree vs Random forest vs XGB vs ensemble')
    plt.xticks(rotation=45)
    plt.legend()
    plt.savefig(f'backtest/{ticker}_with_news_w_ensemble.png',format='png')
    plt.show()
    signals = ['logit', 'tree', 'xgb', 'rf','ensemble']
    #ticker = 'NVDA'
    best_strategy, sharpe_ratios = calculate_best_strategy(test_df, signals, ticker)
    print(f"Best strategy for {ticker}: {best_strategy}, Sharpe ratio: {sharpe_ratios[best_strategy]}")
    test_df.to_csv(f'ticker/backtest-test/{ticker}_with_news_w_ensemble.csv', index = False)
    ticker_strategies[ticker] = {'Best Strategy': best_strategy, 'Sharpe Ratio': sharpe_ratios[best_strategy]}
    
    best_strategy_test = ticker_strategies[ticker]['Best Strategy']
    #print(best_strategy_test)
    
    model_name =  f'{best_strategy_test}_model'
    if best_strategy_test == 'ensemble':
        dtest = xgb.DMatrix(out_of_sample_df[included_columns], label = out_of_sample_df['positive_return_dummy'])
        y_pred_logit = logit_model.predict(out_of_sample_df[included_columns])
        y_pred_tree = tree_model.predict(out_of_sample_df[included_columns])
        y_pred_rf = rf_model.predict(out_of_sample_df[included_columns])
        y_pred_xgb = xgb_model.predict(dtest)
        y_pred_best = np.maximum.reduce([y_pred_logit,y_pred_tree,y_pred_rf,y_pred_xgb])
    elif best_strategy_test == 'xgb':
        best_model = eval(model_name)
        dtest = xgb.DMatrix(out_of_sample_df[included_columns], label = out_of_sample_df['positive_return_dummy'])
        y_pred_best = best_model.predict(dtest)
    else:
        best_model = eval(model_name)
        y_pred_best = best_model.predict(out_of_sample_df[included_columns])
    signals_df = pd.DataFrame({
        'signal_best': (y_pred_best > threshold).astype(int)* 2 - 1,
    })
    sharpe_ratio=calculate_sharpe_ratio(out_of_sample_df, signals_df,'best', ticker)
    print(sharpe_ratio)
    #print(out_of_sample_df)
    out_of_sample_df.to_csv(f'ticker/backtest-oos/{ticker}_with_news_w_ensemble.csv', index = False)
    best_tuple = max(best_trend_strategy[ticker], key=lambda x: x[1])

    trend_strategy[ticker]={'strategy':best_tuple[0],'sharpe':best_tuple[1],'logit':accuracy_logit,'tree':accuracy_tree,
                           'rf':accuracy_rf,'xgb':accuracy_xgb}

In [None]:
trend_strategy

In [None]:
ticker_strategies

In [None]:
import pandas as pd

# Initialize the concatenated DataFrame and other variables
concatenated_df = pd.DataFrame()
ticker_final = None

# Assuming ticker_strategies is a dictionary that contains ticker information and best strategy
for ticker, strategy_info in ticker_strategies.items():
    # Get the best strategy for the current ticker
    ticker_final = ticker
    best_strategy = strategy_info['Best Strategy']
    
    # Open the file for the current ticker
    file_name = f'backtest/{ticker}_with_news_w_ensemble.csv'
    df = pd.read_csv(file_name)
    
    # Extract the column named f'ret_{Best_Strategy}' and rename it
    column_name = f'ret_{best_strategy}'
    df[f'ret_{ticker}'] = df.get(column_name, pd.NA)  # Uses `pd.NA` if the column is missing

    # Concatenate the extracted column to the DataFrame
    concatenated_df = pd.concat([concatenated_df, df[f'ret_{ticker}']], axis=1)

# Check the shapes of the DataFrames before merging
print(f"Shape of concatenated_df: {concatenated_df.shape}")
print(f"Shape of test_df: {test_df.shape}")

# Check the column names in concatenated_df and test_df
print(f"concatenated_df columns: {concatenated_df.columns}")
print(f"test_df columns: {test_df.columns}")

# Ensure 'Date' column exists in concatenated_df
if 'Date' not in concatenated_df.columns:
    # Check if any column might be a date column by inspecting the columns
    date_columns = [col for col in concatenated_df.columns if 'Date' in col]
    if date_columns:
        # Renaming the first date-like column found to 'Date'
        concatenated_df.rename(columns={date_columns[0]: 'Date'}, inplace=True)
    else:
        print("Error: No date column found in concatenated_df.")
        # Further handling, such as exiting or additional logic, can be added here

# Check if 'Date' exists in test_df and if the other necessary column is there
if 'Date' not in test_df.columns or f'Date_{ticker_final}_L01d' not in test_df.columns:
    print(f"Error: One or more required date columns are missing in test_df.")
    # Examine test_df to handle missing columns accordingly

# After ensuring both DataFrames have the 'Date' column, check if the rows align
if len(concatenated_df) == len(test_df):
    concatenated_df.set_index(test_df[f'Date_{ticker_final}_L01d'], inplace=True)
else:
    print(f"Row length mismatch: {len(concatenated_df)} vs {len(test_df)}. Aligning the DataFrames.")
    
    # Merge DataFrames on the 'Date' column from both DataFrames
    # Make sure both DataFrames have the 'Date' column for merging
    if 'Date' in concatenated_df.columns:
        concatenated_df = pd.merge(concatenated_df, test_df[['Date', f'Date_{ticker_final}_L01d']], on='Date', how='inner')
        # After merging, set the index to the new Date column from test_df
        concatenated_df.set_index(f'Date_{ticker_final}_L01d', inplace=True)
    else:
        print("Error: Unable to find 'Date' column for merging.")

# Display the resulting concatenated DataFrame
print(concatenated_df.head())


In [None]:
concatenated_df.to_csv('test_portfolio_final_with_news_w_ensemble.csv', index = True)

In [None]:
concatenated_df = pd.DataFrame()
ticker_final = None
# Iterate through each ticker and its corresponding best strategy
for ticker, strategy_info in trend_strategy.items():
    # Get the best strategy for the current ticker
    ticker_final = ticker
    best_strategy = strategy_info['strategy']
    strategy = best_strategy[:3].lower()
    # Open the file for the current ticker
    file_name = f'ticker/backtest-test/{ticker}_with_news_w_ensemble.csv'
    df = pd.read_csv(file_name)
    
    # Extract the column named f'ret_{Best_Strategy}' and rename it
    if strategy == 'ret':
        column_name = 'RetMACD_L01d'
    else:
        column_name = f'ret_{strategy}_ratio_L01d'
    df[f'ret_{ticker}'] = df[column_name]
    
    # Concatenate the extracted column to the DataFrame
    concatenated_df = pd.concat([concatenated_df, df[f'ret_{ticker}']], axis=1)
concatenated_df.set_index(test_df[f'Date_{ticker_final}_L01d'], inplace=True)

# Display the concatenated DataFrame
print(concatenated_df.head())

In [None]:
concatenated_df.to_csv('trend_portfolio_final_with_news_w_ensemble.csv', index = True)

In [None]:
concatenated_df = pd.DataFrame()
ticker_final = None
# Iterate through each ticker and its corresponding best strategy
for ticker, _ in ticker_strategies.items():
    # Get the best strategy for the current ticker
    ticker_final = ticker
    #best_strategy = strategy_info['Best Strategy']
    
    # Open the file for the current ticker
    file_name = f'ticker/backtest-oos/{ticker}_with_news_w_ensemble.csv'
    df = pd.read_csv(file_name)
    
    # Extract the column named f'ret_{Best_Strategy}' and rename it
    column_name = f'ret_best'
    df[f'ret_{ticker}'] = df[column_name]
    
    # Concatenate the extracted column to the DataFrame
    concatenated_df = pd.concat([concatenated_df, df[f'ret_{ticker}']], axis=1)
concatenated_df.set_index(out_of_sample_df[f'Date_{ticker_final}_L01d'], inplace=True)

# Display the concatenated DataFrame
print(concatenated_df.head())

In [None]:
concatenated_df.to_csv('oos_portfolio_final_with_news_w_ensemble.csv', index = True)

In [None]:
concatenated_df = pd.DataFrame()
ticker_final = None
# Iterate through each ticker and its corresponding best strategy
for ticker, strategy_info in trend_strategy.items():
    # Get the best strategy for the current ticker
    ticker_final = ticker
    #best_strategy = strategy_info['Best Strategy']
    best_strategy = strategy_info['strategy']
    # Open the file for the current ticker
    strategy = best_strategy[:3].lower()
    
    file_name = f'ticker/backtest-oos/{ticker}_with_news_w_ensemble.csv'
    df = pd.read_csv(file_name)
    
    
    if strategy == 'ret':
        column_name = 'RetMACD_L01d'
    else:
        column_name = f'ret_{strategy}_ratio_L01d'
    df[f'ret_{ticker}'] = df[column_name]
    
    # Concatenate the extracted column to the DataFrame
    concatenated_df = pd.concat([concatenated_df, df[f'ret_{ticker}']], axis=1)
concatenated_df.set_index(out_of_sample_df[f'Date_{ticker_final}_L01d'], inplace=True)

# Display the concatenated DataFrame
print(concatenated_df.head())

In [None]:
concatenated_df.to_csv('oos_trend_portfolio_final_with_news_w_ensemble.csv', index = True)