In [2]:
import duckdb
import pandas as pd
import numpy as np
from scipy.stats import spearmanr
%load_ext sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


# Feature Selection Process

- IC < 0.01 (pure noise),

- p-value > 0.10 (statistically insignificant),

- Sharpe < 0.3 (non-tradable).

In [4]:
!duckdb -ui

[90m┌[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m┐[0m[90m
[0m[90m│[0m[90m                [0mresult[90m                [0m[90m│[0m[90m
[0m[90m│[0m[90m               [0mvarchar[90m                [0m[90m│[0m[90m
[0m[90m├[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m─[0m[90m┤[0m[90m
[0m[90m│[

# Connect to Local Database

In [7]:

# Create connection to local duckdb database
con = duckdb.connect('../db/duck.db', read_only=True)
%sql con --alias duckdb

# 1. Information Coefficient (IC) Test

In [None]:
def calculate_ic_and_select_factors(con, columns):
    ic_results_list = []

    for feature in columns:
        query = f"""
            SELECT f.date, f.symbol, f.id AS factor_id,
                f.value AS factor_value,
                r.value AS fwd_return_3m
            FROM features_normalized2 f
            JOIN features_normalized2 r ON f.symbol = r.symbol AND f.date = r.date 
            WHERE r.id = 'fwd_return_3m'
                AND f.id = '{feature}'
            """
        df = con.execute(query).fetchdf().dropna()
        if not df.empty:
            ic, p_value = spearmanr(df['factor_value'], df['fwd_return_3m']) #[0]
            ic_results_list.append({'factor_id': feature, 'IC': ic, 'p_value': p_value})
            print(f"{feature}: {ic} || {p_value}")

    ic_results = pd.DataFrame(ic_results_list)
    
    return ic_results

# Example usage:
columns = con.execute("""SELECT distinct id FROM features_normalized2 where id not in ('fwd_return_1m', 'fwd_return_1w', 'fwd_return_3m', 'fwd_return_6m') 
--and id in ('others_cr', 'others_dlr', 'others_dr', 'price_to_52w_close_high', 'price_to_52w_close_low', 'trend_adx', 'trend_adx_neg', 'trend_adx_pos', 'trend_kst_diff', 'volatility_atr', 'volatility_bbhi', 'volatility_bbli', 'volatility_kchi', 'volatility_kcli', 'volume_em', 'volume_nvi') 
order by id desc""").fetchall()
columns = [col[0] for col in columns]
selected_factors = calculate_ic_and_select_factors(con, columns)

wr: -0.013704185821619664 || 0.0
vpt: -0.015463836588293905 || 0.0
volume_vwap: 0.015679184480933157 || 0.0
volume_vpt: -0.03901476126688441 || 0.0
volume_sma_em: 0.015515609874104051 || 0.0
volume_obv: 0.04990422407343124 || 0.0
volume_nvi: 0.06618190254438291 || 0.0
volume_mfi: -0.0008833420947766373 || 0.0001229326502443899
volume_fi: -0.006730858315137649 || 3.11094928274895e-188
volume_em: 0.0069558731525107 || 1.337257417477949e-194
volume_cmf: 0.01795430672987612 || 0.0
volume_adi: 0.062347660358782336 || 0.0
volume: 0.03760456675750141 || 0.0
volatility_ui: -0.07446453167380936 || 0.0
volatility_kcw: -0.08531870821987977 || 0.0
volatility_kcp: 0.003603862575510632 || 9.684947615004907e-56
volatility_kcli: 0.0028732848144845994 || 4.5319135253342036e-36
volatility_kcl: 0.018464276999176295 || 0.0
volatility_kchi: 0.013077117509605708 || 0.0
volatility_kch: 0.014667648094085538 || 0.0
volatility_kcc: 0.016357293148634173 || 0.0
volatility_dcw: -0.0815813453850572 || 0.0
volatilit

In [12]:
# Input data as a string
data = """
wr: -0.013704185821619664 || 0.0 
vpt: -0.015463836588293905 || 0.0 
volume_vwap: 0.015679184480933157 || 0.0 
volume_vpt: -0.03901476126688441 || 0.0 
volume_sma_em: 0.015515609874104051 || 0.0 
volume_obv: 0.04990422407343124 || 0.0 
volume_nvi: 0.06618190254438291 || 0.0 
volume_mfi: -0.0008833420947766373 || 0.0001229326502443899 
volume_fi: -0.006730858315137649 || 3.11094928274895e-188 
volume_em: 0.0069558731525107 || 1.337257417477949e-194 
volume_cmf: 0.01795430672987612 || 0.0 
volume_adi: 0.062347660358782336 || 0.0 
volume: 0.03760456675750141 || 0.0 
volatility_ui: -0.07446453167380936 || 0.0 
volatility_kcw: -0.08531870821987977 || 0.0 
volatility_kcp: 0.003603862575510632 || 9.684947615004907e-56 
volatility_kcli: 0.0028732848144845994 || 4.5319135253342036e-36 
volatility_kcl: 0.018464276999176295 || 0.0 
volatility_kchi: 0.013077117509605708 || 0.0 
volatility_kch: 0.014667648094085538 || 0.0 
volatility_kcc: 0.016357293148634173 || 0.0 
volatility_dcw: -0.0815813453850572 || 0.0 
volatility_dcp: 0.021221466678520474 || 0.0 
volatility_dcm: 0.014699081375571748 || 0.0 
volatility_dcl: 0.0197467625066741 || 0.0 
volatility_dch: 0.011029120702967944 || 0.0 
volatility_bbw: -0.07555063956706681 || 0.0 
volatility_bbp: 0.008795559760768198 || 0.0 
volatility_bbm: 0.016020554336795136 || 0.0 
volatility_bbli: 0.001000076887712773 || 4.391941477216925e-05 
volatility_bbl: 0.02103866949526729 || 0.0 
volatility_bbhi: 0.0027400710348424702 || 3.8458685307598197e-32 
volatility_bbh: 0.012738670580500416 || 0.0 
volatility_atr: -0.03209634073003384 || 0.0 
vol_vo_p_1m: -0.027435433825906932 || 0.0 
vol_vo_p_12m: -0.045007177475853775 || 0.0 
trend_vortex_ind_pos: 0.004444608892231372 || 3.896004172653618e-83 
trend_vortex_ind_neg: -0.022832546005177427 || 0.0 
trend_vortex_ind_diff: 0.014497667565401589 || 0.0 
trend_visual_ichimoku_b: 0.012611506516853947 || 0.0 
trend_visual_ichimoku_a: 0.016574193857840255 || 0.0 
trend_trix: 0.023019283055012725 || 0.0 
trend_stc: -0.01007761088320349 || 0.0 
trend_sma_slow: 0.01573283903172571 || 0.0 
trend_sma_fast: 0.016338733898129253 || 0.0 
trend_psar_up_indicator: -0.0009472373404150161 || 3.560439765768314e-05 
trend_psar_up: 0.016644519016483037 || 0.0 
trend_psar_down_indicator: 0.0029563193994538526 || 4.3219407884071475e-38 
trend_psar_down: 0.009189434488290777 || 2.9224339535097933e-172 
trend_mass_index: 0.004224653825104116 || 3.7101318673542075e-74 
trend_macd_signal: 0.022330067926982382 || 0.0 
trend_macd_diff: -0.011197893924044909 || 0.0 
trend_macd: 0.019648460204411335 || 0.0 
trend_kst_sig: 0.00631633637558849 || 6.560474080643416e-166 
trend_kst_diff: 0.0007027261741205204 || 0.002258939291105518 
trend_kst: 0.006036699482150455 || 1.013721727491223e-151 
trend_ichimoku_conv: 0.01580893741036325 || 0.0 
trend_ichimoku_base: 0.014047370331981978 || 0.0 
trend_ichimoku_b: 0.012122463944134819 || 0.0 
trend_ichimoku_a: 0.014728673207215312 || 0.0 
trend_ema_slow: 0.0154649402020573 || 0.0 
trend_ema_fast: 0.016202751091176196 || 0.0 
trend_dpo: 0.002459670883199798 || 1.3460833155634874e-26 
trend_cci: 0.01045339748368245 || 0.0 
trend_aroon_up: 0.02084458275500858 || 0.0 
trend_aroon_ind: 0.019577265778204594 || 0.0 
trend_aroon_down: -0.01332559728721294 || 0.0 
trend_adx_pos: 0.018812997293452646 || 0.0 
trend_adx_neg: 0.017802846233634335 || 0.0 
trend_adx: -0.009480384058223304 || 0.0 
price_to_52w_low: -0.0071831601892307275 || 9.166368776888275e-216 
price_to_52w_high: 0.09062431297497182 || 0.0 
price_to_52w_close_low: 5.5742510423340316e-05 || 0.8077951143918478 
price_to_52w_close_high: 0.08639519618831074 || 0.0 
others_dr: -0.0007462073924690402 || 0.0014109681793518887 
others_dlr: -0.0008485932433137617 || 0.00028295870302123 
others_cr: 0.07549875561541965 || 0.0 
open: 0.01645454330107275 || 0.0 
month: -0.013024773073061137 || 0.0 
momentum_wr: 0.016123968586806165 || 0.0 
momentum_uo: 0.013240603965065599 || 0.0 
momentum_tsi: 0.029500362888584954 || 0.0 
momentum_stoch_signal: 0.020738264511975146 || 0.0 
momentum_stoch_rsi_k: -0.0004507354377256869 || 0.051094256857018884 
momentum_stoch_rsi_d: 0.0018301757829815002 || 2.4560542551369117e-15 
momentum_stoch_rsi: -0.0030598112730510327 || 4.497182471175203e-40 
momentum_stoch: 0.016123969552303278 || 0.0 
momentum_rsi: 0.020190028473859607 || 0.0 
momentum_roc: 0.0052059537783874735 || 1.7629190561304304e-113 
momentum_pvo_signal: 0.015420050400160342 || 0.0 
momentum_pvo_hist: 0.008947678824389347 || 0.0 
momentum_pvo: 0.01776698424965543 || 0.0 
momentum_ppo_signal: 0.025074146945049745 || 0.0 
momentum_ppo_hist: -0.010133910240197011 || 0.0 
momentum_ppo: 0.021433837499414018 || 0.0 
momentum_kama: 0.016113904639991655 || 0.0 
momentum_ao: 0.015158425054211791 || 0.0 
momentum_9m: 0.06504777148310881 || 0.0 
momentum_6m: 0.0565090706453264 || 0.0 
momentum_5y: 0.057018375057656764 || 0.0 
momentum_3m: 0.02933544781368769 || 0.0 
momentum_1m: 0.00916486541815098 || 0.0 
momentum_12m_1m: 0.06565879151941563 || 0.0 
momentum_12m: 0.06241806860025787 || 0.0 
momentum_10y: 0.051676275314291094 || 0.0 
low: 0.017358440928260114 || 0.0 
lottery: -0.0779334243568043 || 0.0 
log_volume: 0.04152397632125238 || 0.0 
log_price: 0.014332100116892203 || 0.0 
high: 0.015501005703785615 || 0.0 
dayofyear: -0.01312024016683221 || 0.0 
dayofweek: 0.0002156179581929017 || 0.3466626694196152 
dayofmonth: -0.0002701186616405776 || 0.2384160368723561 
close: 0.01639495140282547 || 0.0 
amihud: -0.05476204432367255 || 0.0 
adtv_30d: 0.03436676341493021 || 0.0
"""

# Split the data into lines
lines = data.strip().split('\n')

# Initialize lists to store the data
features = []
ics = []
p_values = []

# Process each line
for line in lines:
    parts = line.split(' || ')
    feature_part = parts[0].split(': ')
    feature = feature_part[0]
    ic = float(feature_part[1])
    p_value = float(parts[1])
    
    features.append(feature)
    ics.append(ic)
    p_values.append(p_value)

# Create a DataFrame
ic_results_df = pd.DataFrame({
    'feature': features,
    'ic': ics,
    'p_value': p_values
})

# Display the DataFrame
ic_results_df

Unnamed: 0,feature,ic,p_value
0,wr,-0.013704,0.000000
1,vpt,-0.015464,0.000000
2,volume_vwap,0.015679,0.000000
3,volume_vpt,-0.039015,0.000000
4,volume_sma_em,0.015516,0.000000
...,...,...,...
111,dayofweek,0.000216,0.346663
112,dayofmonth,-0.000270,0.238416
113,close,0.016395,0.000000
114,amihud,-0.054762,0.000000


In [17]:
selected_factors_ic = ic_results_df[(ic_results_df['ic'] >= 0.01) | (ic_results_df['ic'] <= -0.01)]
print(f"Selected Factors After IC Test: {len(selected_factors_ic)}")

Selected Factors After IC Test: 86


In [15]:
selected_factors_ic

Unnamed: 0,feature,ic,p_value
0,wr,-0.013704,0.0
1,vpt,-0.015464,0.0
2,volume_vwap,0.015679,0.0
3,volume_vpt,-0.039015,0.0
4,volume_sma_em,0.015516,0.0
...,...,...,...
109,high,0.015501,0.0
110,dayofyear,-0.013120,0.0
113,close,0.016395,0.0
114,amihud,-0.054762,0.0


# 2. Sharpe Ratio Test

In [41]:
fs = con.sql("select * from features_selected").fetchdf()
fs

Unnamed: 0,id
0,volume_vpt
1,volume_obv
2,volume_nvi
3,volume_adi
4,volume
5,volatility_ui
6,volatility_kcw
7,volatility_dcw
8,volatility_bbw
9,volatility_atr


Quarterly

In [57]:
def annualized_sharpe_ratio(returns, periods_per_year=252):
    """
    Compute annualized Sharpe ratio from time series of returns
    periods_per_year: 12 for monthly, 252 for daily
    """
    if len(returns) < 2:
        return 0.0  # Not enough data
    
    excess_returns = returns  # Assuming returns are already excess returns (over risk-free rate)
    mean_return = excess_returns.mean()
    std_return = excess_returns.std()
    
    if std_return == 0:
        return 0.0
    
    # Annualize
    annualized_mean = mean_return * periods_per_year
    annualized_std = std_return * np.sqrt(periods_per_year)
    
    return annualized_mean / annualized_std

factor_sharpe = []

# selected_factors_ic = list(selected_factors['factor_id'].values)
# selected_factors_ic = fs['id'].to_list()
selected_factors_ic = selected_factors['factor_id'].to_list() 

for feature in selected_factors_ic:

    query = f"""
    SELECT f.date, f.symbol, f.id AS factor_id,
        f.value AS factor_value,
        r.value AS fwd_return_3m
    FROM features_normalized2 f 
    JOIN features_cleaned r on f.symbol = r.symbol and f.date = r.date
    --JOIN features_winsorized_id_date r on f.symbol = r.symbol and f.date = r.date
        WHERE r.id = 'fwd_return_3m'
        AND f.id = '{feature}'
    """
    df = con.execute(query).fetchdf().dropna()

    if feature in [
        "volume_nvi", "volume_adi", "volatility_atr", "price_to_52w_high", "price_to_52w_close_high", "others_cr", 
        "momentum_9m", "momentum_6m", "momentum_5y", "momentum_12m_1m", "momentum_12m", "momentum_6m", "momentum_10y"
        ]:
        df['factor_value'] = -df['factor_value']

    # df_factor = df.copy()

    # Step 3: Convert dates to quarterly periods
    df['date'] = pd.to_datetime(df['date'])
    df['quarter'] = df['date'].dt.to_period('Q')  # Convert to quarters
    # Step 4: Get all quarterly rebalancing dates (start of each quarter) and sort them
    quarterly_rebalancing_dates = sorted(df['date'].drop_duplicates().dt.to_period('Q').unique().tolist())

    # Step 5: Backtest loop
    quarterly_returns = []

    for quarter in quarterly_rebalancing_dates:
        # Step 5.1: Get data available **at the start of the quarter**
        rebalance_date = df[df['quarter'] == quarter]['date'].min()
        df_rebalance = df[df['date'] == rebalance_date].copy()
        
        # Rank stocks into quintiles
        df_rebalance['rank'] = df_rebalance['factor_value'].rank(pct=True)
        df_rebalance['long'] = df_rebalance['rank'] >= 0.9  # Top 10%
        df_rebalance['short'] = df_rebalance['rank'] <= 0.1 # Bottom 10%

        # Step 5.4: Compute portfolio return using fwd_return_3m
        long_returns = df_rebalance[df_rebalance['long']]['fwd_return_3m'].mean()
        short_returns = df_rebalance[df_rebalance['short']]['fwd_return_3m'].mean()
        portfolio_return = long_returns #- short_returns

        # Store the return for this quarter
        quarterly_returns.append({'quarter': quarter, 'return': portfolio_return})

    # Step 6: Convert to DataFrame and **SORT BY QUARTER**
    portfolio_returns = pd.DataFrame(quarterly_returns).sort_values(by='quarter')
    sr = annualized_sharpe_ratio(portfolio_returns['return'], periods_per_year=4)
    print(f"{feature}: {sr}")
    factor_sharpe.append((feature, sr))

# Convert to DataFrame
sharpe_results = pd.DataFrame(factor_sharpe, columns=['factor_id', 'Sharpe'])
selected_factors_sharpe = sharpe_results[sharpe_results['Sharpe'] > 0.5]
print(f"Selected Factors After Sharpe Test: {len(selected_factors_sharpe)}")

wr: 0.6914656895983681
vpt: 0.5798856552626505
volume_vwap: 0.04718843804874026
volume_vpt: 0.7904577372793072
volume_sma_em: 0.08154716386248921
volume_obv: 0.6833164395064665
volume_nvi: 0.9465698995620585
volume_mfi: 0.40217439515702313
volume_fi: 0.32393011131194616
volume_em: 0.045453383539026206
volume_cmf: 0.37725795735910955
volume_adi: 0.9445942774369116
volume: 0.6784181400815164
volatility_ui: 0.8076284154255422
volatility_kcw: 0.8060217803284663
volatility_kcp: 0.41699134243001396
volatility_kcli: 0.7414790222949034
volatility_kcl: 0.039804408160433
volatility_kchi: 0.5003141112631149
volatility_kch: 0.02232773491911085
volatility_kcc: 0.045420884706388324
volatility_dcw: 0.7735333869739308
volatility_dcp: 0.399975894998412
volatility_dcm: 0.05993320007770001
volatility_dcl: 0.09226287645588237
volatility_dch: 0.04397024131961053
volatility_bbw: 0.706422968784976
volatility_bbp: 0.48310456903801213
volatility_bbm: 0.06501616155082574
volatility_bbli: 0.46019542329253393
vol

In [19]:
import pandas as pd
import numpy as np

# Input data as a string
data = """
wr: 0.6914656895983681 
vpt: 0.5798856552626505 
volume_vwap: 0.04718843804874026 
volume_vpt: 0.7904577372793072 
volume_sma_em: 0.08154716386248921 
volume_obv: 0.6833164395064665 
volume_nvi: 0.9465698995620585 
volume_mfi: 0.40217439515702313 
volume_fi: 0.32393011131194616 
volume_em: 0.045453383539026206 
volume_cmf: 0.37725795735910955 
volume_adi: 0.9445942774369116 
volume: 0.6784181400815164 
volatility_ui: 0.8076284154255422 
volatility_kcw: 0.8060217803284663 
volatility_kcp: 0.41699134243001396 
volatility_kcli: 0.7414790222949034 
volatility_kcl: 0.039804408160433 
volatility_kchi: 0.5003141112631149 
volatility_kch: 0.02232773491911085 
volatility_kcc: 0.045420884706388324 
volatility_dcw: 0.7735333869739308 
volatility_dcp: 0.399975894998412 
volatility_dcm: 0.05993320007770001 
volatility_dcl: 0.09226287645588237 
volatility_dch: 0.04397024131961053 
volatility_bbw: 0.706422968784976 
volatility_bbp: 0.48310456903801213 
volatility_bbm: 0.06501616155082574 
volatility_bbli: 0.46019542329253393 
volatility_bbl: 0.10454631850014047 
volatility_bbhi: 0.7121185393791359 
volatility_bbh: 0.05184894116220881 
volatility_atr: 1.1658554437382633 
vol_vo_p_1m: 0.7657329757722727 
vol_vo_p_12m: 1.123260590638699 
trend_vortex_ind_pos: 0.26077786688774734 
trend_vortex_ind_neg: 0.7575455070360182 
trend_vortex_ind_diff: 0.25609502855887484 
trend_visual_ichimoku_b: 0.08409442229553689 
trend_visual_ichimoku_a: 0.11795883504350385 
trend_trix: 0.3789362236891252 
trend_stc: 0.4519129722724283 
trend_sma_slow: 0.05678458549191371 
trend_sma_fast: 0.054829447260416386 
trend_psar_up_indicator: 0.5377606690547154 
trend_psar_up: -0.13464875419110264 
trend_psar_down_indicator: 0.4038599226903656 
trend_psar_down: 0.06275530409166093 
trend_mass_index: 0.46199108115677673 
trend_macd_signal: 0.038601267882122606 
trend_macd_diff: 0.0859804922129032 
trend_macd: -0.019013785381982735 
trend_kst_sig: 0.37288531112089074 
trend_kst_diff: 0.5694625359360648 
trend_kst: 0.29677728257697944 
trend_ichimoku_conv: 0.036220374931293486 
trend_ichimoku_base: 0.05416640457041727 
trend_ichimoku_b: 0.02342886958048618 
trend_ichimoku_a: 0.043401531493190974 
trend_ema_slow: 0.0458957553651171 
trend_ema_fast: 0.051509881297954974 
trend_dpo: 0.24321190403988174 
trend_cci: 0.5231805488581475 
trend_aroon_up: 0.5003232348308396 
trend_aroon_ind: 0.26073309031077607 
trend_aroon_down: 0.7241410376891882 
trend_adx_pos: 0.3691090115692973 
trend_adx_neg: 0.6192211997398799 
trend_adx: 0.5253992270577335 
price_to_52w_low: 0.6237090216013766 
price_to_52w_high: 0.8263238114583569 
price_to_52w_close_low: 0.5913655427125828 
price_to_52w_close_high: 0.8245604141798798 
others_dr: 0.5244681073161144 
others_dlr: 0.5245507582340635 
others_cr: 0.9705753366326166 
open: 0.027995770388266346 
month: nan 
momentum_wr: 0.3348914519432627 
momentum_uo: 0.3642951075841561 
momentum_tsi: 0.25165968291185514 
momentum_stoch_signal: 0.34084475771088 
momentum_stoch_rsi_k: 0.5441117928074249 
momentum_stoch_rsi_d: 0.41325055010619083 
momentum_stoch_rsi: 0.4418875138392069 
momentum_stoch: 0.3349623068754032 
momentum_rsi: 0.3022611977146704 
momentum_roc: 0.4710732049530271 
momentum_pvo_signal: 0.649717857390625 
momentum_pvo_hist: 0.7022728137411196 
momentum_pvo: 0.6540879148550868 
momentum_ppo_signal: 0.3622732230325548 
momentum_ppo_hist: 0.5082536342106206 
momentum_ppo: 0.39088632136245943 
momentum_kama: 0.035887968605636644 
momentum_ao: -0.01943648325954666 
momentum_9m: 0.9012328259311869 
momentum_6m: 0.7296183113247744 
momentum_5y: 0.846206246227162 
momentum_3m: 0.4648219572050348 
momentum_1m: 0.4131442082537275 
momentum_12m_1m: 0.9994217686239861 
momentum_12m: 1.0094112928257137 
momentum_10y: 0.6770891598780809 
low: 0.03550677963317369 
lottery: 0.7885534076344668 
log_volume: 0.6784181400815164 
log_price: 0.03539842997527862 
high: 0.025860972540473228 
dayofyear: nan 
dayofweek: nan 
dayofmonth: nan 
close: 0.03539842997527861 
amihud: 0.9527217513282975 
adtv_30d: 0.6997727087403415
"""

# Split the data into lines
lines = data.strip().split('\n')

# Initialize lists to store the data
features = []
sharpes = []

# Process each line
for line in lines:
    parts = line.split(': ')
    feature = parts[0]
    sharpe = float(parts[1]) if parts[1] != 'nan' else np.nan
    
    features.append(feature)
    sharpes.append(sharpe)

# Create a DataFrame
sharpe_df = pd.DataFrame({
    'feature': features,
    'sharpe': sharpes
})

# Display the DataFrame
sharpe_df

Unnamed: 0,feature,sharpe
0,wr,0.691466
1,vpt,0.579886
2,volume_vwap,0.047188
3,volume_vpt,0.790458
4,volume_sma_em,0.081547
...,...,...
111,dayofweek,
112,dayofmonth,
113,close,0.035398
114,amihud,0.952722


# Combine the results

In [20]:
# Assuming ic_results_df is already defined from the previous script
# Join the two DataFrames on the 'feature' column
ic_and_sharpe_results = pd.merge(ic_results_df, sharpe_df, on='feature', how='inner')

# Display the merged DataFrame
ic_and_sharpe_results

Unnamed: 0,feature,ic,p_value,sharpe
0,wr,-0.013704,0.000000,0.691466
1,vpt,-0.015464,0.000000,0.579886
2,volume_vwap,0.015679,0.000000,0.047188
3,volume_vpt,-0.039015,0.000000,0.790458
4,volume_sma_em,0.015516,0.000000,0.081547
...,...,...,...,...
111,dayofweek,0.000216,0.346663,
112,dayofmonth,-0.000270,0.238416,
113,close,0.016395,0.000000,0.035398
114,amihud,-0.054762,0.000000,0.952722


# Filter the results

In [23]:
# Select factors with IC > 0.01 or IC < -0.01 and Sharpe >= 0.3 or Sharpe <= -0.3
selected_factors_ic_sharpe = ic_and_sharpe_results[
    ((ic_and_sharpe_results['ic'] >= 0.01) | (ic_and_sharpe_results['ic'] <= -0.01)) &
    ((ic_and_sharpe_results['sharpe'] >= 0.3) | (ic_and_sharpe_results['sharpe'] <= -0.3))
]

print(len(selected_factors_ic_sharpe))
selected_factors_ic_sharpe

49


Unnamed: 0,feature,ic,p_value,sharpe
0,wr,-0.013704,0.0,0.691466
1,vpt,-0.015464,0.0,0.579886
3,volume_vpt,-0.039015,0.0,0.790458
5,volume_obv,0.049904,0.0,0.683316
6,volume_nvi,0.066182,0.0,0.94657
10,volume_cmf,0.017954,0.0,0.377258
11,volume_adi,0.062348,0.0,0.944594
12,volume,0.037605,0.0,0.678418
13,volatility_ui,-0.074465,0.0,0.807628
14,volatility_kcw,-0.085319,0.0,0.806022


# Save the results

In [25]:
# create duckdb table features_selected from selected_factors_mse['id']
con.sql("""
    CREATE OR REPLACE TABLE features_selected AS (
        SELECT * FROM selected_factors_ic_sharpe
    );
""")

InvalidInputException: Invalid Input Error: Cannot execute statement of type "CREATE" on database "duck" which is attached in read-only mode!

In [58]:
sharpe_results[(sharpe_results['Sharpe'] >= 0.3) | (sharpe_results['Sharpe'] <= -0.3)]

Unnamed: 0,factor_id,Sharpe
0,wr,0.691466
1,vpt,0.579886
3,volume_vpt,0.790458
5,volume_obv,0.683316
6,volume_nvi,0.946570
...,...,...
104,momentum_10y,0.677089
106,lottery,0.788553
107,log_volume,0.678418
114,amihud,0.952722


In [None]:
volume_vpt: 0.4283482150582938
volume_obv: 0.45215747218403923
volume_nvi: 0.8806182740887712
volume_adi: 0.8367229714348144
volume: 0.6240775643034828
volatility_ui: 0.7913383045258772
volatility_kcw: 0.7829298888505547
volatility_dcw: 0.7503936146761273
volatility_bbw: 0.669564776345932
volatility_atr: 1.471320992121905
vol_vo_p_12m: 1.0672966192748692
price_to_52w_high: 0.7842053152614247
price_to_52w_close_high: 0.8349328374714025
others_cr: 0.8939839735891502
momentum_9m: 0.33235338026447314
momentum_6m: 0.2404488305171433
momentum_5y: 0.885210428915087
momentum_12m_1m: 0.8982885509921844
momentum_12m: 0.47420104366739946
momentum_10y: 0.2433768649299933
lottery: 0.7412467769490553
log_volume: 0.6240775643034827
amihud: 0.7973601672830561
adtv_30d: 0.6571481495796523
Selected Factors After Sharpe Test: 18

In [None]:
volume_vpt: -0.43174584418166345
volume_obv: 0.4521574721840393
volume_nvi: -0.9046005154461747
volume_adi: -0.8363436471159194
volume: 0.6240775643034827
volatility_ui: -0.7922537649356824
volatility_kcw: -0.7841656095699453
volatility_dcw: -0.7512738532179984
volatility_bbw: -0.6692123046456694
volatility_atr: -1.448104820828147
vol_vo_p_12m: -1.0698405825112938
price_to_52w_high: -0.78584140373609
price_to_52w_close_high: -0.8367387051781012
others_cr: -0.9240561249662492
momentum_9m: -0.33452192898923183
momentum_6m: -0.24238395451081687
momentum_5y: -0.8863393511238598
momentum_12m_1m: -0.8987546322937401
momentum_12m: -0.4788102187677107
momentum_10y: -0.2465000545020666
lottery: -0.7418558541694853
log_volume: 0.6240775643034826
amihud: -0.7975303107150515
adtv_30d: 0.6571481495796522
Selected Factors After Sharpe Test: 3

In [15]:
sharpe_results

Unnamed: 0,factor_id,Sharpe
0,adtv_30d,0.657148
1,amihud,0.79736
2,log_volume,0.624078
3,lottery,0.734549
4,momentum_10y,0.243377
5,momentum_12m,0.474201
6,momentum_12m_1m,0.898289
7,momentum_5y,0.88521
8,momentum_6m,0.240449
9,momentum_9m,0.332353


adtv_30d: 0.6571481495796522
amihud: 0.797360167283056
log_volume: 0.6240775643034827
lottery: 0.734548822487241
momentum_10y: 0.2433768649299933
momentum_12m: 0.4742010436673994
momentum_12m_1m: 0.8982885509921844
momentum_5y: 0.8852104289150869
momentum_6m: 0.24044883051714336
momentum_9m: 0.3323533802644732
others_cr: 0.9225915596046939
price_to_52w_close_high: 0.8349328374714027
price_to_52w_high: 0.7842053152614247
vol_vo_p_12m: 1.0436174845080621
volatility_bbw: 0.665556525249067
volatility_dcw: 0.7488451569693538
volatility_kcw: 0.7807023900729306
volatility_ui: 0.738110934905194
volume_adi: 0.8367229714348146
volume_nvi: 0.9021893851634966
volume_obv: 0.4521574721840394
volume_vpt: 0.42834821505829385
Selected Factors After Sharpe Test: 3

daily

In [8]:
def annualized_sharpe_ratio(returns, periods_per_year=252):
    """
    Compute annualized Sharpe ratio from time series of returns
    periods_per_year: 12 for monthly, 252 for daily
    """
    if len(returns) < 2:
        return 0.0  # Not enough data
    
    excess_returns = returns  # Assuming returns are already excess returns (over risk-free rate)
    mean_return = excess_returns.mean()
    std_return = excess_returns.std()
    
    if std_return == 0:
        return 0.0
    
    # Annualize
    annualized_mean = mean_return * periods_per_year
    annualized_std = std_return * np.sqrt(periods_per_year)
    
    return annualized_mean / annualized_std

factor_sharpe = []

# selected_factors_ic = list(selected_factors['factor_id'].values)
selected_factors_ic = fs['id'].to_list()

for feature in selected_factors_ic:

    query = f"""
    SELECT f.date, f.symbol, f.id AS factor_id,
        f.value AS factor_value,
        r.value AS fwd_return_3m
    FROM features_normalized f 
    JOIN features_cleaned r on f.symbol = r.symbol and f.date = r.date
    -- JOIN features_winsorized_id r on f.symbol = r.symbol and f.date = r.date
    -- JOIN features_normalized r ON f.symbol = r.symbol AND f.date = r.date
        WHERE r.id = 'fwd_return_3m'
        AND f.id = '{feature}'
    """
    df = con.execute(query).fetchdf()

    if feature in [
        "momentum_10y", "momentum_12m", "momentum_12m_1m", "momentum_5y", "momentum_6m", "momentum_9m", 
        "others_cr", "price_to_52w_close_high", "price_to_52w_high", "volume_adi", "volume_nvi", 
        ]:
        df['factor_value'] = -df['factor_value']

    df_factor = df.copy()
        
    # Rank stocks into quintiles
    df_factor['rank'] = df_factor.groupby('date')['factor_value'].rank(pct=True)

    # Compute long-short portfolio returns
    long_returns = df_factor[df_factor['rank'] >= 0.8].groupby('date')['fwd_return_3m'].mean()
    short_returns = df_factor[df_factor['rank'] <= 0.2].groupby('date')['fwd_return_3m'].mean()

    portfolio_returns = long_returns - short_returns
    portfolio_returns = portfolio_returns.dropna()

    if len(portfolio_returns) >= 12:  # Require at least 1 year of data
        sr = annualized_sharpe_ratio(portfolio_returns, periods_per_year=252)
    else:
        sr = 0.0

    print(f"{feature}: {sr}")
    factor_sharpe.append((feature, sr))

# Convert to DataFrame
sharpe_results = pd.DataFrame(factor_sharpe, columns=['factor_id', 'Sharpe'])
selected_factors_sharpe = sharpe_results[sharpe_results['Sharpe'] > 0.9]
print(f"Selected Factors After Sharpe Test: {len(selected_factors_sharpe)}")

adtv_30d: 3.829383181880025
amihud: 4.643805446102614
log_volume: 3.242434085899013
lottery: 4.551935437243916
momentum_10y: 1.0619837058609183
momentum_12m: 2.7035997477963396
momentum_12m_1m: 2.809460488281006
momentum_5y: 5.3210767968635455
momentum_6m: 0.5135276836058025
momentum_9m: 1.4542806525201903
others_cr: 4.325573551303645
price_to_52w_close_high: 4.125160318312619
price_to_52w_high: 4.251907929399377
vol_vo_p_12m: 4.7235601825098055
volatility_bbw: 4.332896368642562
volatility_dcw: 4.588787474657784
volatility_kcw: 4.406444748346734
volatility_ui: 4.2749175637649195
volume_adi: 4.973459791687381
volume_nvi: 4.191483412901032
volume_obv: 3.5387368314552923
volume_vpt: 3.012379725131735
Selected Factors After Sharpe Test: 21


adtv_30d: 3.829383181880025
amihud: 4.643805446102614
log_volume: 3.242434085899013
lottery: 4.551935437243916
momentum_10y: 1.0619837058609183
momentum_12m: 2.7035997477963396
momentum_12m_1m: 2.809460488281006
momentum_5y: 5.3210767968635455
momentum_6m: 0.5135276836058025
momentum_9m: 1.4542806525201903
others_cr: 4.325573551303645
price_to_52w_close_high: 4.125160318312619
price_to_52w_high: 4.251907929399377
vol_vo_p_12m: 4.7235601825098055
volatility_bbw: 4.332896368642562
volatility_dcw: 4.588787474657784
volatility_kcw: 4.406444748346734
volatility_ui: 4.2749175637649195
volume_adi: 4.973459791687381
volume_nvi: 4.191483412901032
volume_obv: 3.5387368314552923
volume_vpt: 3.012379725131735
Selected Factors After Sharpe Test: 21

adtv_30d: 3.829383181880025
amihud: -4.642960117234034
log_volume: 3.242434085899013
lottery: -4.551977446809072
momentum_10y: -1.068197280266565
momentum_12m: -2.7073435225159184
momentum_12m_1m: -2.815377528444662
momentum_5y: -5.325560975738531
momentum_6m: -0.5187246987606463
momentum_9m: -1.4593848458655485
others_cr: -4.3272677631780425
price_to_52w_close_high: -4.12629320021686
price_to_52w_high: -4.253299834777007
vol_vo_p_12m: -4.722890381363356
volatility_bbw: -4.332518598955527
volatility_dcw: -4.5872113244894255
volatility_kcw: -4.406794228635602
volatility_ui: -4.275294619680599
volume_adi: -4.973861135193764
volume_nvi: -4.191511140627841
volume_obv: 3.5387368314552923
volume_vpt: -3.0153246459758365
Selected Factors After Sharpe Test: 3

adtv_30d: 10.37182884426742
amihud: 2.4352028316782874
log_volume: 10.218618897541933
lottery: 1.4906566462712518
momentum_10y: 1.559238029733736
momentum_12m: 3.8950311000504496
momentum_12m_1m: 4.542539164280306
momentum_5y: 1.326376868598897
momentum_6m: 4.04481813113957
momentum_9m: 4.27046700812115
others_cr: 3.5404282404636045
price_to_52w_close_high: 3.087844777541484
price_to_52w_high: 3.102783701952952
vol_vo_p_12m: 2.3799487625734033
volatility_bbw: 1.6069349662052366
volatility_dcw: 1.5561629602581188
volatility_kcw: 1.7163875272386167
volatility_ui: 1.9611448489951764
volume_adi: 0.9854862227354166
volume_nvi: 2.852956949401475
volume_obv: 12.474200987315733
volume_vpt: 1.403474821808464
Selected Factors After Sharpe Test: 21

In [142]:
selected_factors_sharpe

Unnamed: 0,factor_id,Sharpe
0,adtv_30d,10.371829
1,amihud,2.435203
2,log_volume,10.218619
3,lottery,1.490657
4,momentum_10y,1.559238
5,momentum_12m,3.895031
6,momentum_12m_1m,4.542539
7,momentum_5y,1.326377
8,momentum_6m,4.044818
9,momentum_9m,4.270467


# 3. MSE Test

In [155]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

factor_mse = []

selected_factors_sharpe_list = list(selected_factors_sharpe['factor_id'].values)

for feature in selected_factors_sharpe_list:

    query = f"""
    SELECT f.date, f.symbol, f.id AS factor_id,
        f.value AS factor_value,
        r.value AS fwd_return_3m
    FROM features_normalized f
    JOIN features_normalized r ON f.symbol = r.symbol AND f.date = r.date
        WHERE r.id = 'fwd_return_3m'
        AND f.id = '{feature}'
    """
    df = con.execute(query).fetchdf()

    if feature in ["amihud", "lottery", "vol_vo_p_12m", "volatility_bbw", "volatility_dcw", "volatility_kcw", "volatility_ui", "volume_vpt"]:
        df['factor_value'] = -df['factor_value']

    df_factor = df.copy()

    # Train baseline model (using only intercept)
    X_baseline = np.ones((len(df_factor), 1))  # Only intercept
    y = df_factor['fwd_return_3m']

    model_baseline = LinearRegression()
    model_baseline.fit(X_baseline, y)
    y_pred_baseline = model_baseline.predict(X_baseline)
    mse_baseline = mean_squared_error(y, y_pred_baseline)
    
    # Train simple regression model
    # Train model with factor
    X = df_factor[['factor_value']]
    y = df_factor['fwd_return_3m']
    model = LinearRegression()
    model.fit(X, y)
    y_pred = model.predict(X)

    mse = mean_squared_error(y, y_pred)
    mse_reduction = mse_baseline - mse  # Positive means the factor improved prediction

    factor_mse.append((feature, mse, mse_baseline, mse_reduction))
    print(f"feature: {feature}, mse: {mse}, baseline_mse: {mse_baseline}, reduction: {mse_reduction}")


feature: adtv_30d, mse: 0.9994233322591375, baseline_mse: 0.9999999999999994, reduction: 0.000576667740861958
feature: amihud, mse: 0.9996389654283869, baseline_mse: 0.9996781302062757, reduction: 3.916477788878936e-05
feature: log_volume, mse: 0.9980309133240602, baseline_mse: 1.0000000000000016, reduction: 0.0019690866759413117
feature: lottery, mse: 0.9976451821910038, baseline_mse: 0.9996781302062729, reduction: 0.002032948015269098
feature: momentum_10y, mse: 0.6764857894080095, baseline_mse: 0.6767017296460431, reduction: 0.0002159402380336184
feature: momentum_12m, mse: 0.9515341634491962, baseline_mse: 0.9522638402472975, reduction: 0.0007296767981013197
feature: momentum_12m_1m, mse: 0.951255627940419, baseline_mse: 0.9522638402472986, reduction: 0.0010082123068796411
feature: momentum_5y, mse: 0.8003485267244346, baseline_mse: 0.8004425834950323, reduction: 9.405677059770579e-05
feature: momentum_6m, mse: 0.9720021852093087, baseline_mse: 0.9727598529493919, reduction: 0.0007

In [161]:
# Convert to DataFrame
mse_results = pd.DataFrame(factor_mse, columns=['id', 'MSE', 'Baseline_MSE', 'MSE_Reduction'])

# Keep features that reduce MSE
selected_factors_mse = mse_results[mse_results['MSE_Reduction'] > 0]

print(f"Selected Factors After MSE Test: {len(selected_factors_mse)}")

selected_factors_mse

Selected Factors After MSE Test: 22


Unnamed: 0,id,MSE,Baseline_MSE,MSE_Reduction
0,adtv_30d,0.999423,1.0,0.000577
1,amihud,0.999639,0.999678,3.9e-05
2,log_volume,0.998031,1.0,0.001969
3,lottery,0.997645,0.999678,0.002033
4,momentum_10y,0.676486,0.676702,0.000216
5,momentum_12m,0.951534,0.952264,0.00073
6,momentum_12m_1m,0.951256,0.952264,0.001008
7,momentum_5y,0.800349,0.800443,9.4e-05
8,momentum_6m,0.972002,0.97276,0.000758
9,momentum_9m,0.961818,0.962807,0.000988


# 4. IC Decay Test

In [None]:
ic_decay_results = []

for feature in selected_factors_mse['id']:
    
    for period in [1, 3, 6]:  # Check IC at different forward returns

        query = f"""
        SELECT f.date, f.symbol, f.id AS factor_id,
            f.value AS factor_value,
            r.value AS fwd_return_{period}m
        FROM features_normalized f
        JOIN features_normalized r ON f.symbol = r.symbol AND f.date = r.date
            WHERE r.id = 'fwd_return_{period}m'
            AND f.id = '{feature}'
        """
        df = con.execute(query).fetchdf()

        if feature in ["amihud", "lottery", "vol_vo_p_12m", "volatility_bbw", "volatility_dcw", "volatility_kcw", "volatility_ui", "volume_vpt"]:
            df['factor_value'] = -df['factor_value']

        df_factor = df.copy()

        # df_factor[f'future_return_{period}M'] = df_factor.groupby('symbol')[f'fwd_return_{period}m'].shift(-period)
        ic = spearmanr(df_factor['factor_value'], df_factor[f'fwd_return_{period}m'])[0]
        ic_decay_results.append((feature, period, ic))


ic_decay_df = pd.DataFrame(ic_decay_results, columns=['id', 'Horizon', 'IC'])
ic_decay_df = ic_decay_df.pivot(index='id', columns='Horizon', values='IC')

# # Keep only stable IC factors
selected_factors_stable = ic_decay_df[ic_decay_df.min(axis=1) > 0.03]
print(f"Selected Factors After IC Decay Test: {len(selected_factors_stable)}")

ic_decay_df

Horizon,1,3,6
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
adtv_30d,0.029277,0.041813,0.050335
amihud,0.040395,0.04641,0.053241
log_volume,0.0309,0.04276,0.051972
lottery,0.05544,0.074092,0.08797
momentum_10y,0.035931,0.049338,0.074285
momentum_12m,0.046525,0.060259,0.067135
momentum_12m_1m,0.05109,0.063805,0.065859
momentum_5y,0.040141,0.055525,0.070639
momentum_6m,0.035586,0.055855,0.078449
momentum_9m,0.039442,0.063617,0.080041


# Save Features

In [165]:
# create duckdb table features_selected from selected_factors_mse['id']
con.sql("""
    CREATE OR REPLACE TABLE features_selected AS (
        SELECT id FROM selected_factors_mse
    );
""")

In [167]:
%%sql
select * from features_selected

id
adtv_30d
amihud
log_volume
lottery
momentum_10y
momentum_12m
momentum_12m_1m
momentum_5y
momentum_6m
momentum_9m


Save selected features in horizontal format

In [47]:
con.sql("""
    CREATE OR REPLACE TEMP TABLE features_finala AS (
        with features as (
            select
                a.*
            from features_normalized a 
            where a.id in (
                'adtv_30d','amihud','log_volume','lottery','momentum_10y'
                ,'momentum_12m','momentum_12m_1m','momentum_5y','momentum_6m'
                ,'momentum_9m','others_cr','price_to_52w_close_high','price_to_52w_high'
                ,'vol_vo_p_12m','volatility_bbw','volatility_dcw','volatility_kcw'
                ,'volatility_ui','volume_adi','volume_nvi','volume_obv','volume_vpt'
                ,'fwd_return_3m'
            )
            and a.date <= '2022-01-01'
        )
        pivot features on id using first(value)
    );
""")



In [48]:
con.sql("""
    CREATE OR REPLACE TEMP TABLE features_finalb AS (
        with features as (
            select
                a.*
            from features_normalized a 
            where a.id in (
                'adtv_30d','amihud','log_volume','lottery','momentum_10y'
                ,'momentum_12m','momentum_12m_1m','momentum_5y','momentum_6m'
                ,'momentum_9m','others_cr','price_to_52w_close_high','price_to_52w_high'
                ,'vol_vo_p_12m','volatility_bbw','volatility_dcw','volatility_kcw'
                ,'volatility_ui','volume_adi','volume_nvi','volume_obv','volume_vpt'
                ,'fwd_return_3m'
            )
            and a.date > '2022-01-01'
        )
        pivot features on id using first(value)
    );
""")


In [49]:
con.sql("""
    CREATE OR REPLACE TABLE features_final AS (
        SELECT * FROM features_finala
        union all
        SELECT * FROM features_finalb
    );
""")

In [57]:
%%sql
select * from features_final where date <= '2024-06-01' order by date desc

date,symbol,adtv_30d,amihud,fwd_return_3m,log_volume,lottery,momentum_10y,momentum_12m,momentum_12m_1m,momentum_5y,momentum_6m,momentum_9m,others_cr,price_to_52w_close_high,price_to_52w_high,vol_vo_p_12m,volatility_bbw,volatility_dcw,volatility_kcw,volatility_ui,volume_adi,volume_nvi,volume_obv,volume_vpt
2024-05-31,ENTG,-0.0200619001961991,-0.3747293553199437,-0.5261318096443015,1.0034795505003269,-0.2380375252916757,3.841820185876516,0.1718873266027267,0.2247150721019618,1.516551042274648,0.4016747097924485,0.6860770569774889,4.690695572064845,0.3042794802402138,0.3163160212864881,-0.8273733727172772,-0.5510741629529992,-0.3972509482016376,0.004682548138694,-0.3817992735537041,0.6354843582347891,1.6171808618917145,0.5004297612069035,-0.082318757375534
2024-05-31,CCLD,-0.304150299912981,0.646466223445163,0.3231696877906925,-0.282635104496324,4.403002139714292,,-0.4825697748858431,-2.2117192940043524,,4.123454315929814,2.279074034486412,-0.4270772822183894,-0.2342575817577669,-0.240141126444013,0.742096723716063,4.30958533768406,4.158924359528461,2.632760758398918,0.2726616526821353,-0.2326186567966805,-0.462177409962264,-0.4250969122199116,-0.2548839949490534
2024-05-31,ASTL,-0.3125334749826244,-0.3649575801972102,1.4310743264335148,0.2971581135581543,-0.3446737022880942,,-0.1195402476747909,-0.0794399519480836,,-0.4132184103162089,0.0599226976591197,-0.4472529757342336,-0.1838057806341062,-0.1337575788774071,0.0209970959671371,-0.4140287726312047,-0.3784413643551793,-0.0360344240463243,-0.299072842054946,-0.1417186315220596,-0.5559482930302814,-0.3235063095918817,-0.2369073151691932
2024-05-31,MPAA,-0.3377012106645266,-0.3133153345611728,1.5554157451666328,-0.6744787961087805,-0.061659791561304,-0.7156365622220224,-0.4683636287825626,-0.2339928514777771,-0.902473694238326,-1.713134228125392,-0.9866543048955478,-0.5547448248968853,-1.2016229710244182,-1.1633671229264382,-0.0104009343505931,-0.1174285966270733,0.1624402768756972,0.5235363784865453,0.7061826318069561,-0.1841146024673543,-0.579739486418554,-0.372193049654032,-0.2327829315040744
2024-05-31,AVDL,-0.1912403676864034,-0.3727752173913752,-0.340658902336697,0.4235396779253699,-0.2003448251328218,-0.3070348396827234,0.0421029391786499,0.393480271096999,5.248499347262581,0.3244462670648104,0.4652997597719098,0.2477730072228326,0.215927811936136,0.2183655198701617,-0.264953010523376,0.3285700926548748,0.5793573399665766,0.0042129740304702,1.180416621075224,0.9578370082813806,-0.5186837441604072,0.7000224642235344,4.956571968863724
2024-05-31,ROAM,-0.3506496543294681,-0.3296681754706776,0.136771414833221,-1.2611983097954056,-0.6274059492170928,,0.1459696974493383,0.1553885451495368,-0.2174995596296925,0.0703603151302825,0.1421730625061937,-0.3252152881470235,0.7234451113129342,0.7550491480345823,1.6157376134795298,-0.62543253322721,-0.6939401783529782,-0.8257766618501557,-0.7017549701261415,,-0.2429120304081792,-0.4235674138327763,-0.2819792332998512
2024-05-31,GDC,-0.3477027687854429,3.6179372989268046,4.277028667539435,-0.8808192441204941,0.9786845316835084,,-1.8232537706521068,-2.2117192940043524,,-2.085120353170004,-2.1347932437186543,-0.6235172488994243,-2.4320720857875404,-2.39212088583128,1.078985511926385,2.135777511946485,2.097851154202711,3.030675326392271,-0.1874539165446259,-0.7012956380010048,-0.6744281015492406,-0.0446040677225688,4.956571968863724
2024-05-31,DIHP,-0.1805550009176644,-0.3745214249161632,0.0945964343472204,0.7405608584222362,-0.679009830737177,,0.1127290777547588,0.0650737082420633,,0.115588991520157,0.1593182560599201,-0.2644761328121522,0.8154738458075161,0.8427917296110135,-0.9224053082190716,-0.6965577501341598,-0.7378952428902101,-0.7652511358117465,-0.7168208259747667,-0.1397446261945965,-0.2500642144023866,-0.4133369602303045,-0.2769923417848327
2024-05-31,CIG,0.5562885892152601,-0.3709207849379564,0.5046664783190088,1.1880428596748864,4.403002139714292,-0.4148925865005972,0.384782323441232,0.6064063607588898,0.1244050694644232,1.1303931494658517,0.55628653451469,-0.5876174506773048,0.4668829635382266,0.4580660326819362,-0.3957024496783321,-0.3294598936215935,-0.271588708285869,-0.0355291418627509,-0.1386217128857143,-1.9298708340789732,-0.6175797224132493,5.308770677422082,0.0479375754843491
2024-05-31,PL,0.1114062065067141,-0.3663313988919732,1.6675680997504354,1.0218075063016747,0.0187595962169452,,-1.1606695469843078,-1.2454012145641598,,-1.058167297133529,-1.0386482767610188,-0.6321180227039771,-1.6927981686727176,-1.6321640800122623,-0.8465835704781782,0.0780295098247396,-0.0543559322664494,0.4968466133196133,0.4487013262445647,-0.7798950171217929,-0.589673744417943,-0.2664335569185699,-0.5420503976001877


# Close Database Connection

In [5]:
con.close()