In [6]:
import os
import pickle
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

In [7]:
def train_model_for_category(category, data_path='processed/train', features=None,
                             target='Target_Action', save_dir='models'):
#-- 
    # Training a RandomForest-model on all data gathered for training within a chosen category.
    # Saving the trained model as a .pkl file
#--    

    if features is None:
        # If no features are specified these standard features will be selected
        features = ['Close', 'Volume', 'MA_30w', 'MA_40w',
                    'RSI', 'MACD', 'MACD_signal',
                    'Regime', 'DaysSinceBottom', 'DaysSinceTop']

    # Path to the map for the chosen category
    category_path = os.path.join(data_path, category)
    # The trainingdata is stored
    X_train, y_train = [], []

    # Looping trough the every file (one file per stock) in the map for the category
    for filename in os.listdir(category_path):
        filepath = os.path.join(category_path, filename)
        try:
            # Reading the file as a DataFrame
            df = pd.read_csv(filepath, index_col=0, parse_dates=True)
        except UnicodeDecodeError:
            # If there is a file with encoding error, skip
            print(f"Skipping file due to encoding error: {filename}")
            continue
        except Exception as e:
            # If there is a file with unexpected error, skip
            print(f"Skipping file '{filename}' due to unexpected error: {e}")
            continue

        # Controlling that all the features and targets are in the data
        if set(features).issubset(df.columns) and target in df.columns:
            # Removing rows without data
            df = df.dropna(subset=features + [target])
            # Saving features and targets for current file
            X_train.append(df[features])
            y_train.append(df[target])
        else:
            print(f"Skipping '{filename}' — missing required columns.")

    # If no data for training was found, cancel
    if not X_train:
        raise ValueError("No valid training data found!")

    # Merges all data into one big DataFrame, otherwise the model can't train on it
    X_train = pd.concat(X_train)
    y_train = pd.concat(y_train)

    # Creating and training a RandomForest-model
    model = RandomForestClassifier(n_estimators=500, class_weight='balanced', random_state=42)
    model.fit(X_train, y_train)

    # Creating a map if there is none and saving the model as pickle-file
    os.makedirs(save_dir, exist_ok=True)
    model_path = os.path.join(save_dir, f'{category}_model.pkl')
    with open(model_path, 'wb') as f:
        pickle.dump(model, f)

    print(f"Modell för '{category}' sparad som '{model_path}'")
    return model # Returning the model so that it can be used directly in Python

In [18]:
def test_model_on_asset(model, filepath, features=None):
    if features is None:
        # If no features are specified these standard features will be selected
        features = ['Close', 'Volume', 'MA_30w', 'MA_40w',
                    'RSI', 'MACD', 'MACD_signal',
                    'Regime', 'DaysSinceBottom', 'DaysSinceTop']

    # Extract ticker name from file path
    ticker = os.path.splitext(os.path.basename(filepath))[0]
    df = pd.read_csv(filepath, index_col=0, parse_dates=True)

    # Ensure required 'Open' column exists for trade simulation
    if 'Open' not in df.columns:
        raise ValueError("Kolumnen 'Open' krävs för att simulera realistisk handel.")

    # Drop rows with missing values in features
    df = df.dropna(subset=features)

     # Prepare input features and make predictions
    X_test = df[features]
    y_pred = model.predict(X_test)

    # Store predictions as trading signals
    df['Signal'] = y_pred

    # Shift signals by one day to simulate delayed execution
    df['Position'] = df['Signal'].shift(1).fillna(0).astype(int)

    # Calculate daily returns and cumulative returns (Buy & Hold)
    df['Return'] = df['Close'].pct_change().fillna(0)
    df['CumulativeReturn'] = (1 + df['Return']).cumprod()

    # Prepare strategy return column
    df['StrategyReturn'] = 0.0

    # Initialize trading state variables
    trades = []
    in_position = False
    entry_price = None
    entry_date = None
    entry_pos = None
    pending_new_entry = None

    # Loop through each row to simulate trading decisions
    for i in range(1, len(df) - 1):
        today = df.index[i]
        today_signal = df['Signal'].iloc[i]
        prev_signal = df['Signal'].iloc[i - 1]

        # Enter pending position (e.g., after reversal)
        if pending_new_entry is not None:
            entry_price = df['Open'].iloc[i]
            entry_date = today
            entry_pos = pending_new_entry
            in_position = True
            pending_new_entry = None

        # Enter new position
        elif not in_position and today_signal in [-1, 1]:
            entry_price = df['Open'].iloc[i]
            entry_date = today
            entry_pos = today_signal
            in_position = True

        # Exit if signal turns neutral
        elif in_position and today_signal == 0 and prev_signal != 0:
            exit_price = df['Open'].iloc[i]
            exit_date = today
            trade_return = (exit_price / entry_price - 1) if entry_pos == 1 else (entry_price / exit_price - 1)
            df.at[exit_date, 'StrategyReturn'] = trade_return
            
            # Save trade
            trades.append({
                'entry_date': entry_date,
                'exit_date': exit_date,
                'position': entry_pos,
                'entry_price': entry_price,
                'exit_price': exit_price,
                'return': trade_return
            })
            # Reset position
            in_position = False
            entry_price = entry_date = entry_pos = None

        # Exit and reverse position if signal changes direction
        elif in_position and (today_signal != 0 and np.sign(today_signal) != np.sign(prev_signal)):
            exit_price = df['Open'].iloc[i]
            exit_date = today
            trade_return = (exit_price / entry_price - 1) if entry_pos == 1 else (entry_price / exit_price - 1)
            df.at[exit_date, 'StrategyReturn'] = trade_return
            trades.append({
                'entry_date': entry_date,
                'exit_date': exit_date,
                'position': entry_pos,
                'entry_price': entry_price,
                'exit_price': exit_price,
                'return': trade_return
            })
            # Schedule new entry for tomorrow
            in_position = False
            pending_new_entry = today_signal if today_signal in [-1, 1] else None
            entry_price = entry_date = entry_pos = None

    # Force exit if still in position on the last day
    if in_position and df['Signal'].iloc[-1] == 0:
        exit_price = df['Open'].iloc[-1]
        exit_date = df.index[-1]
        trade_return = (exit_price / entry_price - 1) if entry_pos == 1 else (entry_price / exit_price - 1)
        df.at[exit_date, 'StrategyReturn'] = trade_return
        trades.append({
            'entry_date': entry_date,
            'exit_date': exit_date,
            'position': entry_pos,
            'entry_price': entry_price,
            'exit_price': exit_price,
            'return': trade_return
        })

    # Calculate cumulative strategy return
    df['CumulativeStrategyReturn'] = (1 + df['StrategyReturn']).cumprod()

    # Risk metrics
    roll_max = df['CumulativeStrategyReturn'].cummax()
    drawdown = df['CumulativeStrategyReturn'] / roll_max - 1
    max_drawdown = drawdown.min()
    sharpe = df['StrategyReturn'].mean() / df['StrategyReturn'].std(ddof=0) * np.sqrt(252) if df['StrategyReturn'].std() > 0 else np.nan

    # Compile trade and performance stats
    trade_df = pd.DataFrame(trades)
    stats = pd.DataFrame([{
        'ticker': ticker,
        'total_return_strategy': df['CumulativeStrategyReturn'].iloc[-1],
        'total_return_bh': df['CumulativeReturn'].iloc[-1],
        'sharpe_ratio': sharpe,
        'max_drawdown': max_drawdown,
        'num_trades': len(trade_df),
        'win_rate': (trade_df['return'] > 0).mean() if not trade_df.empty else 0.0
    }])

    # Save results
    os.makedirs("outputs/signals", exist_ok=True)
    os.makedirs("outputs/trades", exist_ok=True)
    os.makedirs("outputs/stats", exist_ok=True)

    df.to_csv(f"outputs/signals/{ticker}_signals.csv")
    trade_df.to_csv(f"outputs/trades/{ticker}_trades.csv", index=False)
    stats.to_csv(f"outputs/stats/{ticker}_stats.csv", index=False)

    # Print summary to console
    print(f"\nSaved: {ticker}")
    print(f"Model Return: {df['CumulativeStrategyReturn'].iloc[-1]:.2f}×")
    print(f"Buy & Hold:        {df['CumulativeReturn'].iloc[-1]:.2f}×")
    print(f"Sharpe Ratio: {sharpe:.2f} | Max DD: {max_drawdown:.2%}")
    print(f"Trades: {len(trade_df)} | Win Rate: {(trade_df['return'] > 0).mean():.1%}")

    return df, trade_df, stats

In [14]:
def load_model_for_category(category, model_dir='models'):
    
    # Construct the path to the saved model file based on category name
    path = os.path.join(model_dir, f'{category}_model.pkl')

    # If the file does not exist, raise an error
    if not os.path.exists(path):
        raise FileNotFoundError(f"Ingen modell hittades för '{category}'")

    # Load the model from the pickle file
    with open(path, 'rb') as f:
        model = pickle.load(f)

    # Print confirmation
    print(f"Modell för '{category}' laddad.")
    return model

In [206]:
train_model_for_category('stocks')

Modell för 'stocks' sparad som 'models/stocks_model.pkl'


In [20]:
# Loading model
model_stocks = load_model_for_category('stocks')

Modell för 'stocks' laddad.


In [19]:
folder = 'processed/test/stocks'

#Looping trough all .csv-files in the map
for file in os.listdir(folder):
    if file.endswith('.csv'):
        filepath = os.path.join(folder, file)
        print(f"Running test for: {file}")
        try:
            test_model_on_asset(model_stocks, filepath)
        except Exception as e:
            print(f"Error with {file}: {e}")

Running test for: NDA-SE.ST.csv

Saved: NDA-SE.ST
Model Return: 3.82×
Buy & Hold:        1.86×
Sharpe Ratio: 0.98 | Max DD: -17.54%
Trades: 108 | Win Rate: 50.0%
Running test for: SCA-B.ST.csv

Saved: SCA-B.ST
Model Return: 2.53×
Buy & Hold:        1.41×
Sharpe Ratio: 0.73 | Max DD: -20.28%
Trades: 92 | Win Rate: 45.7%
Running test for: ALFA.ST.csv

Saved: ALFA.ST
Model Return: 2.50×
Buy & Hold:        1.86×
Sharpe Ratio: 0.78 | Max DD: -27.66%
Trades: 110 | Win Rate: 49.1%
Running test for: GETI-B.ST.csv

Saved: GETI-B.ST
Model Return: 4.17×
Buy & Hold:        1.16×
Sharpe Ratio: 0.86 | Max DD: -32.44%
Trades: 119 | Win Rate: 55.5%
Running test for: NIBE-B.ST.csv

Saved: NIBE-B.ST
Model Return: 1.67×
Buy & Hold:        0.98×
Sharpe Ratio: 0.43 | Max DD: -66.14%
Trades: 97 | Win Rate: 49.5%
Running test for: SWED-A.ST.csv

Saved: SWED-A.ST
Model Return: 4.80×
Buy & Hold:        2.50×
Sharpe Ratio: 1.08 | Max DD: -25.57%
Trades: 103 | Win Rate: 59.2%
Running test for: SHB-A.ST.csv

Save