In [4]:
import pandas as pd
import os
from scipy.signal import argrelextrema
import numpy as np

In [3]:
#The aim with this code is to process the raw data into DataFrames

In [28]:
#If no arguments applied raw will be used
def load_all_data(raw_dir='raw'):
    data = {'stocks': {}, 'indices': {}, 'commodities': {} }
    for category in data.keys():
        folder_path = os.path.join(raw_dir, category)

        for filename in os.listdir(folder_path):
            ticker = filename.replace('.csv', '')
            file_path = os.path.join(folder_path, filename)
                            #Index_col=0 makes date the index, parse_dates makes the date a datetime-object
            df = pd.read_csv(file_path, index_col=0, parse_dates=True, date_format='%Y-%m-%d')
            df.dropna(inplace=True)
            data[category][ticker] = df
    return data

In [5]:
#Adding the indicators
def add_indicators(data):
    for category in data:
        for ticker, df in data[category].items():
            df = df.copy()

            #Makes Close numerical
            df['Close'] = pd.to_numeric(df['Close'], errors='coerce')

            #Moving avegare
            df['MA_30w'] = df['Close'].rolling(window=150).mean()
            df['MA_40w'] = df['Close'].rolling(window=200).mean()

            #RSI (14 days)
            delta = df['Close'].diff()
            gain = (delta.where(delta > 0, 0)).ewm(span=14, adjust=False).mean()
            loss = (-delta.where(delta < 0, 0)).ewm(span=14, adjust=False).mean()
            rs = gain / loss
            df['RSI'] = 100 - (100 / (1 + rs))

            #MACD
            ema_12 = df['Close'].ewm(span=12, adjust=False).mean()
            ema_26 = df['Close'].ewm(span=26, adjust=False).mean()
            df['MACD'] = ema_12 - ema_26
            df['MACD_signal'] = df['MACD'].ewm(span=9, adjust=False).mean()

            
            n = 10  # Looking +-10 days around every day
            # Local bottoms
            min_idx = argrelextrema(df['Close'].values, np.less_equal, order=n)[0]
            df['Bottom'] = 0
            df.loc[df.index[min_idx], 'Bottom'] = 1
            
            # Local tops
            max_idx = argrelextrema(df['Close'].values, np.greater_equal, order=n)[0]
            df['Top'] = 0
            df.loc[df.index[max_idx], 'Top'] = 1
            
            # Days since bottom/top
            df['DaysSinceBottom'] = (df['Bottom'] == 1).cumsum()
            df['DaysSinceBottom'] = df.groupby('DaysSinceBottom').cumcount()
            
            df['DaysSinceTop'] = (df['Top'] == 1).cumsum()
            df['DaysSinceTop'] = df.groupby('DaysSinceTop').cumcount()

            df.drop(['Top', 'Bottom'], axis=1, inplace=True)


            
            df.dropna(inplace=True)
            data[category][ticker] = df

    return data

In [19]:
#Saving the processed data in csv files
def save_processed_data_train(data, save_dir='processed/train'):
    for category in data:
        for ticker, df in data[category].items():
            file_path = os.path.join(save_dir, category, f'{ticker}.csv')
            df.to_csv(file_path)

In [16]:
#Saving the processed data in csv files
def save_processed_data_test(data, save_dir='processed/test'):
    for category in data:
        for ticker, df in data[category].items():
            file_path = os.path.join(save_dir, category, f'{ticker}.csv')
            df.to_csv(file_path)

In [7]:
def add_market_regime(data, index_name='OMX'):
    index_df = data['indices'][index_name].copy()

    #Moving avearge 12 months
    index_df['MA_12M'] = index_df['Close'].rolling(window=252).mean()

    #Regime column
    index_df['Regime'] = (index_df['Close'] > index_df['MA_12M']).astype(int)

    #Adding the trend to the stocks columns
    for ticker, df in data['stocks'].items():
        df = df.copy()
        df = df.join(index_df['Regime'], how='left', rsuffix='_regime')
        df.dropna(inplace=True)
        data['stocks'][ticker] = df

    return data
    

In [8]:
#Making targets for the model
def make_targets(data, horizons=[10,15,20]):
    for category in data:
        for ticker, df in data[category].items():
            df = df.copy()

            for h in horizons:
                df[f'Target_{h}d_up'] = (df['Close'].shift(-h) > df['Close']).astype(int)

                df[f'Target_{h}d_down'] = (df['Close'].shift(-h) < df['Close']).astype(int)
                

            df.dropna(inplace=True)
            data[category][ticker] = df

    return data

In [1]:
def add_target_action(data):
    
    for ticker, df in data['stocks'].items():
        df = df.copy()

        df['Target_Action'] = 0  # Standard: wait

        # Bull market + rise = long
        bull_cond = (df['Regime'] == 1) & (
            (df['Target_10d_up'] == 1) |
            (df['Target_15d_up'] == 1) |
            (df['Target_20d_up'] == 1)
        )

        # Bear market + decline = short
        bear_cond = (df['Regime'] == 0) & (
            (df['Target_10d_down'] == 1) |
            (df['Target_15d_down'] == 1) |
            (df['Target_20d_down'] == 1)
        )

        df.loc[bull_cond, 'Target_Action'] = 1
        df.loc[bear_cond, 'Target_Action'] = -1

        data['stocks'][ticker] = df

    return data

In [2]:
#Splitting the data into train and test
def split_train_test(data, cutoff_date='2020-01-01'):
    train, test = {'stocks': {}, 'indices': {}, 'commodities': {}}, {'stocks': {}, 'indices': {}, 'commodities': {}}
    
    for category in data:
        for ticker, df in data[category].items():
            df = df.copy()
            train[category][ticker] = df[df.index < cutoff_date]
            test[category][ticker] = df[df.index >= cutoff_date]
    
    return train, test


In [5]:
#Creating maps for the processed data
processed_dir = 'processed'
os.makedirs(processed_dir, exist_ok=True)
os.makedirs(f"{processed_dir}/train", exist_ok=True)
os.makedirs(f"{processed_dir}/test", exist_ok=True)

os.makedirs(f"{processed_dir}/train/stocks", exist_ok=True)
os.makedirs(f"{processed_dir}/train/indices", exist_ok=True)
os.makedirs(f"{processed_dir}/train/commodities", exist_ok=True)

os.makedirs(f"{processed_dir}/test/stocks", exist_ok=True)
os.makedirs(f"{processed_dir}/test/indices", exist_ok=True)
os.makedirs(f"{processed_dir}/test/commodities", exist_ok=True)

In [36]:
data = load_all_data()
data = add_indicators(data)
data = add_market_regime(data)

data_train, data_test = split_train_test(data)

data_train = make_targets(data_train)
data_train = add_target_action(data_train)

save_processed_data_train(data_train)
save_processed_data_test(data_test)