In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime, timedelta
import joblib
from pathlib import Path


def load_stock_data(file_path):
    # Φόρτωση του αρχείου CSV σε DataFrame
    df = pd.read_csv(file_path, parse_dates=['date'])

    # Αφαίρεση της στήλης 'complete'
    df.drop(columns=['complete'], inplace=True, errors='ignore')
    
    # Ορισμός της στήλης 'time' ως index
    df.set_index('date', inplace=True)
    
    # Μετονομασία των στηλών
    df.rename(columns={'volume': 'Volume', 'open': 'Open', 'high': 'High', 'low': 'Low', 'close': 'Close'}, inplace=True)

    return df

def plot_records_distribution(df):
    """
    Δημιουργεί διάγραμμα συχνότητας των εγγραφών για το σύνολο των δεδομένων.
    """
    records_per_day = df.resample('D').size().reset_index()
    records_per_day.columns = ['date', 'count']
    plt.figure(figsize=(12, 6))
    bars = plt.bar(records_per_day['date'], records_per_day['count'], color='blue')
    plt.xlabel('Ημερομηνία')
    plt.ylabel('Πλήθος Εγγραφών')
    plt.title(f'Κατανομή Εγγραφών')
    plt.xticks(rotation=45)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    # Εμφάνιση του αριθμού εγγραφών πάνω από κάθε μπάρα
    for bar in bars:
        height = bar.get_height()
        plt.text(
            bar.get_x() + bar.get_width() / 2, height,
            f'{int(height)}',
            ha='center', va='bottom',
            fontsize=12,      # Μεγαλύτερη γραμματοσειρά
            # rotation=90       # Περιστροφή κειμένου
        )

    plt.tight_layout()
    plt.show()

def plot_stock_prices(df):
    """
    Δημιουργεί διάγραμμα με τις τιμές των μετοχών (κλείσιμο) για το σύνολο του DataFrame.
    Προσθέτει σκίαση ανά ημέρα ή εβδομάδα.
    """
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.plot(df.index.to_numpy(), df['Close'].to_numpy(), color='green', linestyle='-', marker='o')
    
    # Προσθήκη σκίασης ανά ημέρα
    for i, date in enumerate(pd.date_range(start=df.index.min(), end=df.index.max())):
        if i % 2 == 0:
            ax.axvspan(date, date + pd.Timedelta(days=1), color='gray', alpha=0.3)
    
    ax.set_xlabel('Ημερομηνία')
    ax.set_ylabel('Τιμή Κλεισίματος')
    ax.set_title(f'Τιμές Κλεισίματος')
    ax.xaxis.set_major_locator(mdates.DayLocator(interval=1))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    plt.xticks(rotation=45)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.show()
        
def get_stock_data_for_period(df, start_date, end_date, frequency):
    """
    Επιστρέφει τις τιμές των μετοχών για το δοσμένο χρονικό διάστημα με ανάλυση δείγματος σε λεπτά.
    Αν frequency = 0, επιστρέφει τα αρχικά δεδομένα χωρίς δειγματοληψία.
    Αν υπάρχουν άλματα στις ημερομηνίες, ενσωματώνει την πλησιέστερη διαθέσιμη εγγραφή.
    Ενημερώνει την τιμή volume με το άθροισμα όλων των τιμών που παραλήφθηκαν.
    Το low και high είναι το μικρότερο low και το μεγαλύτερο high από τις παραληφθείσες τιμές.
    """
    period_data = df.loc[start_date:end_date].copy()
    
    if frequency == 0:
        return period_data
    
    sampled_data = []
    last_time = None
    temp_group = []
    
    for time, row in period_data.iterrows():
        if last_time is None or (time - last_time).total_seconds() / 60 >= frequency:
            if temp_group:
                sampled_row = temp_group[0].copy()
                sampled_row['Volume'] = sum(r['Volume'] for r in temp_group)
                sampled_row['Low'] = min(r['Low'] for r in temp_group)
                sampled_row['High'] = max(r['High'] for r in temp_group)
                sampled_data.append(sampled_row)
            temp_group = []
            last_time = time
        temp_group.append(row)
    
    if temp_group:
        sampled_row = temp_group[0].copy()
        sampled_row['Volume'] = sum(r['Volume'] for r in temp_group)
        sampled_row['Low'] = min(r['Low'] for r in temp_group)
        sampled_row['High'] = max(r['High'] for r in temp_group)
        sampled_data.append(sampled_row)
    
    return pd.DataFrame(sampled_data)

def monday_two_weeks_ago(date=None):
    if date is None:
        date = datetime.now().date()
    days_since_monday = date.weekday()  # Δευτέρα=0
    last_monday = date - timedelta(days=days_since_monday)
    return last_monday - timedelta(days=14)

def get_train_test_weeks(date=None):
    start_train = monday_two_weeks_ago(date)
    end_train = start_train + timedelta(days=4)
    start_test = start_train + timedelta(days=7)
    end_test = start_test + timedelta(days=4)

    # Μετατροπή σε string
    date_fmt = "%Y-%m-%d"
    return (
        start_train.strftime(date_fmt),
        end_train.strftime(date_fmt),
        start_test.strftime(date_fmt),
        end_test.strftime(date_fmt)
    )


In [None]:
### Imports
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import gymnasium as gym
import gym_anytrading
from gym_anytrading.envs import ForexEnv, Positions 

from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback

import torch
import quantstats as qs
from ta import add_all_ta_features
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, roc_curve, auc, accuracy_score
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

def my_render(env, df2=None, start_index=None, end_index=None, title=None):
    # Extract the price series for the episode
    prices = df2['Close'][0:end_index].values
    window_ticks = np.arange(len(prices))
    position_history = env._position_history
    window_size = env.window_size
    trade_fee = env.trade_fee if hasattr(env, 'trade_fee') else 0.0

    # Identify all trade points (count any position change as a trade)
    trade_ticks = []
    short_ticks = []
    long_ticks = []
    shorts = longs = 0
    # for i, tick in enumerate(window_ticks):
    for i, tick in enumerate(window_ticks):
        if env._position_history[i] == Positions.Short and env._position_history[i-1] != Positions.Short:
            trade_ticks.append(tick)
            short_ticks.append(tick)
            shorts+=1
        elif env._position_history[i] == Positions.Long and env._position_history[i-1] != Positions.Long:
            trade_ticks.append(tick)
            long_ticks.append(tick)
            longs+=1

    trade_labels = []  # true labels: 1 for good, 0 for bad
    trade_preds = []   # predicted labels: 1=long, 0=short
    # Categorize trades as Good or Bad
    good_shorts = bad_shorts = good_longs = bad_longs = 0
    for i, tick in enumerate(trade_ticks):
        entry_price = prices[tick]
        if i < len(trade_ticks) - 1:
            exit_tick = trade_ticks[i + 1]
        else:
            exit_tick = len(prices) - 1
        exit_price = prices[exit_tick]
        position = position_history[tick]
        trade_labels.append(0 if exit_price < entry_price else 1)  # 0=short, 1=long
        if position == Positions.Short:
            trade_preds.append(0)  # predicted as short
            if exit_price < entry_price:
                good_shorts += 1
            else:
                bad_shorts += 1
        elif position == Positions.Long:
            trade_preds.append(1)  # predicted as long
            if exit_price > entry_price:
                good_longs += 1
            else:
                bad_longs += 1

    # Manually calculate total_profit at each trade point
    total_profit = 1.0
    manual_profits = np.ones(len(prices))
    entry_price = None
    current_position = None

    for tick in range(len(prices)):
        if tick in trade_ticks:
            new_position = position_history[tick]
            current_price = prices[tick]
            if entry_price is not None and current_position is not None:
                if current_position == Positions.Long:
                    profit = current_price - entry_price - trade_fee
                    return_factor = 1 + profit / entry_price
                else:  # Short
                    profit = entry_price - current_price - trade_fee
                    return_factor = 1 + profit / entry_price
                total_profit *= return_factor
            entry_price = current_price
            current_position = new_position
        manual_profits[tick] = total_profit

    # Calculate RL model's net worth: total_profit * initial price
    initial_price = prices[0]
    rl_net_worth = manual_profits * initial_price

    # Determine the starting point for the RL net worth (first trade after window_size)
    start_idx = window_size
    if trade_ticks:
        first_trade = min(trade_ticks)
        if first_trade >= window_size:
            start_idx = first_trade

    # Create a step-like plot for RL net worth (update only at trade points)
    trade_ticks = [start_idx] + [tick for tick in trade_ticks if tick > start_idx]
    rl_steps_x = []
    rl_steps_y = []
    for i in range(len(trade_ticks)):
        current_tick = trade_ticks[i]
        if i < len(trade_ticks) - 1:
            next_tick = trade_ticks[i + 1]
        else:
            next_tick = len(prices)
        rl_steps_x.extend([current_tick, next_tick])
        rl_steps_y.extend([rl_net_worth[current_tick], rl_net_worth[current_tick]])

    # Calculate total returns
    buy_hold_return = prices[-1] / prices[0]
    rl_return = manual_profits[-1]

    # Plot the price series (Buy & Hold) and RL model's net worth
    plt.figure(figsize=(16, 6))
    plt.plot(window_ticks, prices, label=f"Buy & Hold (Return: {buy_hold_return:.2f})", color='blue')
    plt.plot(rl_steps_x, rl_steps_y, label=f"RL Model (Return: {rl_return:.2f})", color='orange', drawstyle='steps-post')

    # Plot Short and Long trade_mk
    plt.plot(short_ticks, prices[short_ticks], 'rv', label='Short Trades')
    plt.plot(long_ticks, prices[long_ticks], 'g^', label='Long Trades')

    # Add labels and title
    plt.xlabel('Step')
    plt.ylabel('Price / Net Worth')
    if title:
        plt.title(title)
    plt.suptitle(
        f"Total Reward: {env._total_reward:.6f} ~ " +
        f"Total Profit: {env._total_profit:.6f} ~ " +
        f"Total Trades: {shorts + longs} ~ " +
        f"Shorts: {shorts} ~ " +
        f"Longs: {longs}"
    )
    plt.legend()
    plt.grid(True)
    plt.show()

    # Print the trade performance report
    print("Actual Decisions:")
    print(f"Good shorts: {good_shorts}, Bad shorts: {bad_shorts}")
    print(f"Good longs: {good_longs}, Bad longs: {bad_longs}")
    print(f"Total Actual Trades: {shorts + longs} ~ Shorts: {shorts} ~ Longs: {longs}")

    print("Calculated Total Profit:", manual_profits[-1])
    print("Environment Total Profit:", env._total_profit)

    # Generate quantstats reports using manual_profits
    qs.extend_pandas()
    # Create a pandas Series for net worth using manual_profits, indexed by dates
    net_worth = pd.Series(manual_profits, index=df2.index[0:end_index])
    # Calculate returns as percentage change, skipping the first NaN
    returns = net_worth.pct_change().iloc[1:]
    # Generate metrics report
    qs.reports.metrics(returns)
    # Generate snapshot plot
    qs.plots.snapshot(returns, mode='full', title='My Forex Trading Strategy', plot_all=False)
    
    # -- Classification Metrics Report --
    print("Classification Report: Predicting Trade Direction (Short vs Long)")
    print(classification_report(trade_labels, trade_preds, target_names=['Short', 'Long']))

    # -- Confusion Matrix: Προβλέψεις Short vs Long --
    cm = confusion_matrix(trade_labels, trade_preds, labels=[0, 1])
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Short', 'Long'])
    disp.plot(cmap='Blues', colorbar=False)
    plt.title("Confusion Matrix: Model Prediction vs Optimal Trade Direction")
    plt.grid(False)
    plt.show()

    # -- ROC Curve --
    if len(set(trade_labels)) > 1:  # ROC needs both classes
        fpr, tpr, _ = roc_curve(trade_labels, trade_preds)
        roc_auc = auc(fpr, tpr)

        plt.figure(figsize=(6, 5))
        plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
        plt.plot([0, 1], [0, 1], color='navy', lw=1, linestyle='--')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC Curve: Trade Direction Prediction (Short vs Long)')
        plt.legend(loc="lower right")
        plt.grid(True)
        plt.show()
    else:
        print("ROC Curve could not be plotted (only one class present).")
    
def my_profit(env, df2=None, title=None):
    # Extract the price series for the episode
    end_index = len(df2)
    prices = df2['Close'][0:end_index].values
    window_ticks = np.arange(len(prices))
    position_history = env._position_history
    trade_fee = env.trade_fee if hasattr(env, 'trade_fee') else 0.0

     # Identify all trade points (count any position change as a trade)
    trade_ticks = []
    short_ticks = []
    long_ticks = []
    shorts = longs = 0
    # for i, tick in enumerate(window_ticks):
    for i, tick in enumerate(window_ticks):
        if env._position_history[i] == Positions.Short and env._position_history[i-1] != Positions.Short:
            trade_ticks.append(tick)
            short_ticks.append(tick)
            shorts+=1
        elif env._position_history[i] == Positions.Long and env._position_history[i-1] != Positions.Long:
            trade_ticks.append(tick)
            long_ticks.append(tick)
            longs+=1

    trade_labels = []  # true labels: 1 for good, 0 for bad
    trade_preds = []   # predicted labels: 1=long, 0=short
    # Categorize trades as Good or Bad
    good_shorts = bad_shorts = good_longs = bad_longs = 0
    for i, tick in enumerate(trade_ticks):
        entry_price = prices[tick]
        if i < len(trade_ticks) - 1:
            exit_tick = trade_ticks[i + 1]
        else:
            exit_tick = len(prices) - 1
        exit_price = prices[exit_tick]
        position = position_history[tick]
        trade_labels.append(0 if exit_price < entry_price else 1)  # 0=short, 1=long
        if position == Positions.Short:
            trade_preds.append(0)  # predicted as short
            if exit_price < entry_price:
                good_shorts += 1
            else:
                bad_shorts += 1
        elif position == Positions.Long:
            trade_preds.append(1)  # predicted as long
            if exit_price > entry_price:
                good_longs += 1
            else:
                bad_longs += 1

    # Manually calculate total_profit at each trade point
    total_profit = 1.0
    manual_profits = np.ones(len(prices))
    entry_price = None
    current_position = None

    for tick in range(len(prices)):
        if tick in trade_ticks:
            new_position = position_history[tick]
            current_price = prices[tick]
            if entry_price is not None and current_position is not None:
                if current_position == Positions.Long:
                    profit = current_price - entry_price - trade_fee
                    return_factor = 1 + profit / entry_price
                else:  # Short
                    profit = entry_price - current_price - trade_fee
                    return_factor = 1 + profit / entry_price
                total_profit *= return_factor
            entry_price = current_price
            current_position = new_position
        manual_profits[tick] = total_profit

    # Calculate RL model's net worth: total_profit * initial price
    initial_price = prices[0]
    # rl_net_worth = manual_profits * initial_price
    rl_accuracy = accuracy_score(trade_labels, trade_preds)

    return(manual_profits[-1], rl_accuracy, shorts, longs)

In [None]:
# ProgressBarCallback for model.learn()
class ProgressBarCallback(BaseCallback):

    def __init__(self, check_freq: int, verbose: int = 1):
        super().__init__(verbose)
        self.check_freq = check_freq

    def _on_training_start(self) -> None:
        """
        This method is called before the first rollout starts.
        """
        self.progress_bar = tqdm(total=self.model._total_timesteps, desc="model.learn()")

    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:
            self.progress_bar.update(self.check_freq)
        return True
    
    def _on_training_end(self) -> None:
        """
        This event is triggered before exiting the `learn()` method.
        """
        self.progress_bar.close()

# TRAINING
def train_model(model, my_test_env, seed, total_learning_timesteps=10_000):
    """ if model=None then execute 'Random actions' """

    # reproduce training and test
    print('-' * 80)
    obs = my_test_env.reset(seed=seed)
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)

    vec_env = None

    if model is not None:
        # model.learn(total_timesteps=total_learning_timesteps, callback=ProgressBarCallback(100))
        model.learn(total_timesteps=total_learning_timesteps, progress_bar=True)

        vec_env = model.get_env()
        obs = vec_env.reset()
    else:
        print ("RANDOM actions")

    return


def my_process_data(env):
    start = env.frame_bound[0] - env.window_size
    end = env.frame_bound[1]
    prices = env.df.loc[:, 'Close'].to_numpy()[start:end]

    # Βάζουμε default features αν δεν έχει οριστεί κάτι
    if env.signal_features_columns is None:
        feature_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
    else:
        feature_cols = env.signal_features_columns

    signal_features = env.df.loc[:, feature_cols].to_numpy()[start:end]
    return prices, signal_features

class MyForexEnv(ForexEnv):
    _process_data = my_process_data

    def __init__(self, df, window_size, frame_bound, signal_features_columns=None):
        self.signal_features_columns = signal_features_columns 
        super().__init__(df=df, window_size=window_size, frame_bound=frame_bound)

def evaluate(model, df, window_size=10, trade_fee=0.005, features=None, seed=42):
    if features is None:
        features = ['Open', 'High', 'Low', 'Close', 'Volume']

    df = df.copy().reset_index(drop=True)
    start_index = window_size
    end_index = len(df)

    env = MyForexEnv(df=df, window_size=window_size, frame_bound=(start_index, end_index),
                     signal_features_columns=features)
    env.trade_fee = trade_fee
    observation, info = env.reset(seed=seed)

    while True:
        action, _states = model.predict(observation, deterministic=True)
        observation, reward, terminated, truncated, info = env.step(action)
        if terminated or truncated:
            break

    total_profit, accuracy, shorts, longs = my_profit(env, df)

    return env, shorts, longs, total_profit, accuracy

def train(stock_df, start_date, end_date, frequency, timesteps,
          seed=42, window_size=10, trade_fee=0.005,
          features=None, ppo_params=None):

    if features is None:
        features = ['Open', 'High', 'Low', 'Close', 'Volume']
    if ppo_params is None:
        ppo_params = {}

    model_dir_name = f"models"
    model_dir = Path(model_dir_name)
    model_dir.mkdir(exist_ok=True)

    # --- Ονόματα αρχείων με window ---
    model_filename = f"model_{window_size}_{frequency}_{timesteps}.zip"
    env_filename = f"env_{window_size}_{frequency}_{timesteps}.pkl"
    model_path = model_dir / model_filename
    env_path = model_dir / env_filename


    # --- Φιλτράρισμα δεδομένων περιόδου ---
    df = stock_df.copy()
    df = df.loc[start_date:end_date]
    df.reset_index(drop=True, inplace=True)

    start_index = window_size
    end_index = len(df)

    # Καθορισμός policy_kwargs αν δοθεί
    m_policy_kwargs = dict(net_arch=[128, 128])
    # Υπερπαράμετροι
    m_ppo_params = {
        "learning_rate": 0.000265,
        "n_steps": 256,
        "batch_size": 32,
        "gamma": 0.973832,
        "gae_lambda": 0.818521,
        "clip_range": 0.144761,
        "ent_coef": 0.000979
    }
    print(f"[Info] Χρήση policy_kwargs: {ppo_params["policy_kwargs"]}")
    
    print(f"[Info] Εκπαίδευση μοντέλου με {ppo_params}")

    # --- Αν υπάρχει ήδη έτοιμο μοντέλο ---
    if model_path.exists() and env_path.exists():
        print(f"[Info] Βρέθηκε προηγούμενο μοντέλο: {model_filename}")
        model = PPO.load(model_path)
        env = joblib.load(env_path)
        model.set_env(env)
    else:
        print(f"[Info] Νέα εκπαίδευση για {timesteps}k timesteps")
        env = MyForexEnv(
            df=df,
            window_size=window_size,
            frame_bound=(start_index, end_index),
            signal_features_columns=features
        )
        env.trade_fee = trade_fee
        env.reset(seed=seed)

        model = PPO("MlpPolicy", env, policy_kwargs=m_policy_kwargs, seed=seed, verbose=0, **m_ppo_params)

        model.learn(total_timesteps=timesteps * 1000, callback=ProgressBarCallback(100))
        model.save(model_path)
        joblib.dump(env, env_path)

    return model, env


### Ζωντανή ροή τιμών, λήψη αποφάσεων, πραγματοποίηση & αναφορά συναλλαγών

Πραγματοποίηση σύνδεσης και καθορισμός παραμέτρων

In [None]:
import datetime as dt
from datetime import datetime, timezone # new
# Τι ώρα είναι τώρα για την πλατφόρμα;
datetime.now(timezone.utc).time()

In [None]:
from ib_async import * # Πρέπει να είναι εγκατεστημένο το ib_async
from IPython.display import display, clear_output
util.startLoop()
ib = IB()
ib.connect() # Πρέπει να έχουμε συνδεθεί στον IB Gateway

##### Καθορισμός παραμέτρων του ρομπότ συναλλαγών #####
freq = "1 min" # Ορισμός της συχνότητας των δεδομένων σε 1 λεπτό
units = 1000 # Ορισμός του μεγέθους της συναλλαγής σε 1000 ευρώ
end_time = dt.time(0, 55, 0) # ορίζεται πριν ξεκινήσει το ρομπότ
contract = Forex('EURUSD') # Το πρώτο contract FOREX EUR/USD για τη λήγη ιστορικού τιμών
ib.qualifyContracts(contract)
cfd = CFD("EUR", currency = "USD") # Το δεύτερο contract CFD EUR/USD
ib.qualifyContracts(cfd)
conID = cfd.conId # Το ID σύνδεσης του CFD EUR/USD για να γίνουν συναλλαγές
session_start = pd.to_datetime(datetime.now(timezone.utc)) # Η τρέχουσα ώρα σε UTC



Καθορισμός βοηθητικών συναρτήσεων & συνάρτησης συμβάντος

In [None]:
def onBarUpdate(bars, hasNewBar):  
    global df, last_bar
    
    # Έλεγχος αν υπάρχει νέα μπάρα (νεώτερη τιμή) ΚΑΙ αν η διαφορά της είναι μεγαλύτερη από 50 δευτερόλεπτα
    if (bars[-1].date > last_bar):
        last_bar = bars[-1].date
    
        # Δημιουργία DataFrame από τα δεδομένα των μπαρών
        df = pd.DataFrame(bars)[["date", "open", "high", "low", "close", "volume"]].iloc[:-1] 
        df.set_index("date", inplace = True)
        df.columns = ["Open", "High", "Low", "Close", "Volume"]

        
        ####################### Κλήση του πράκτορα ###########################
        test_df = add_all_ta_features(df.copy(), open="Open", high="High", low="Low", close="Close", volume="Volume", fillna=True)

        # --- Ρυθμίσεις ---
        frequency = 1
        window_size = 20
        timesteps = 30
        selected_features = ['momentum_pvo_signal', 'momentum_ppo_signal', 'momentum_ppo']

        # --- Φόρτωση του μοντέλου και του περιβάλλοντος ---
        model_dir_name = f"models"
        model_dir = Path(model_dir_name)
        model_dir.mkdir(exist_ok=True)
        model_filename = f"model_{window_size}_{frequency}_{timesteps}.zip"
        env_filename = f"env_{window_size}_{frequency}_{timesteps}.pkl"
        model_path = model_dir / model_filename
        env_path = model_dir / env_filename
        model = PPO.load(model_path)
        env = joblib.load(env_path)
        model.set_env(env)

        # --- Ερώτηση του μοντέλου ---
        df_mk = test_df.copy().reset_index(drop=True)
        start_index = window_size
        end_index = len(df_mk)
        env = MyForexEnv(df=df_mk, window_size=window_size, frame_bound=(start_index, end_index),
                        signal_features_columns=selected_features)
        observation, info = env.reset()
        while True:
            action, _states = model.predict(observation, deterministic=True) # BUY / Long = 1 or SELL / Short = 0
            observation, reward, terminated, truncated, info = env.step(action)
            if terminated or truncated:
                break
        ####################################################################

        # Trading
        target = units if action==1 else -units # BUY / Long = units or SELL / Short = -units
        execute_trade(target = target)
        
        # Display
        clear_output(wait=True)
        trade_reporting()
    else:
        try:
            trade_reporting()
        except:
            pass

def execute_trade(target):
    global current_pos
    
    # 1. Εύρεση της τρέχουσας θέσης (long / short) για το συγκεκριμένο contract
    try:
        current_pos = [pos.position for pos in ib.positions() if pos.contract.conId == conID][0]
    except:
        current_pos = 0
         
    # 2. Καθορισμός της συναλλαγής (long / short) που πρέπει να γίνει
    trade_mk = target - current_pos

    # 3. Εκτέλεση συναλλαγής
    if trade_mk > 0:
        side = "BUY" # Long position
        order = MarketOrder(side, abs(trade_mk))
        trade = ib.placeOrder(cfd, order)  
    elif trade_mk < 0:
        side = "SELL" # Short position
        order = MarketOrder(side, abs(trade_mk))
        trade = ib.placeOrder(cfd, order)
    else:
        pass

def trade_reporting():
    global report
    
    fill_df = util.df([fs.execution for fs in ib.fills()])[["execId", "time", "side", "cumQty", "avgPrice"]].set_index("execId")
    profit_df = util.df([fs.commissionReport for fs in ib.fills()])[["execId", "realizedPNL"]].set_index("execId")
    report = pd.concat([fill_df, profit_df], axis = 1).set_index("time").loc[session_start:]
    report = report.groupby("time").agg({"side":"first", "cumQty":"max", "avgPrice":"mean", "realizedPNL":"sum"})
    report["cumPNL"] = report.realizedPNL.cumsum()
        
    clear_output(wait=True)
    display(df.tail(5), report) # Εμφάνιση των τελευταίων 5 τιμών και του report


Πρόσφατο ιστορικό & εκκίνηση ροής τρεχουσών τιμών

In [None]:
# Ιστορικά δεδομένα για το τελευταίο 24ωρο με συχνότητα 1 λεπτό
bars = ib.reqHistoricalData(
        contract,
        endDateTime='',
        durationStr='1 D', # Διάρκεια 1 ημέρα
        barSizeSetting=freq, # Μέγεθος μπάρας 1 λεπτό
        whatToShow='MIDPOINT',
        useRTH=True,
        formatDate=2,
        keepUpToDate=True)
last_bar = bars[-1].date # Η χρονοσήμανση της τελευταίας μπάρας
bars.updateEvent += onBarUpdate


Έλεγχος για τερματισμό του ρομπότ συναλλαγών

In [None]:
while True:
    ib.sleep(5) # check every 5 seconds
    if datetime.now(timezone.utc).time() >= end_time: # if stop conditions has been met
        execute_trade(target = 0) # close open position 
        ib.cancelHistoricalData(bars) # stop stream
        ib.sleep(10)
        try:
            trade_reporting() # final reporting
        except:
            pass
        print("Session Stopped.")
        ib.disconnect()
        break
    else:
        pass

In [None]:
# Αν θελήσουμε να σταματήσουμε το ρομπότ μετά από "διακοπή" ή ακύρωση
ib.cancelHistoricalData(bars) # cancel trading

In [None]:
ib.disconnect()