In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder 
import boto3
import plotly.express as px
from sklearn.model_selection import train_test_split
import requests
from io import StringIO
from datetime import datetime, timedelta
import time
import sys
import logging
from tqdm.auto import tqdm
import warnings
from pandas.errors import SettingWithCopyWarning

warnings.filterwarnings("ignore", category=SettingWithCopyWarning)
 

In [None]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)])
logger = logging.getLogger(__name__)

In [None]:



 

def fetch_and_concat_data(api_key):
    base_url = "https://financialmodelingprep.com/api/v3/historical-chart/5min/^NDX"
    start_date = datetime(year=2019, month=10, day=30)
    end_date = datetime.now()  # Or any other end date you want

    all_data_frames = []
    request_count = 0
    requests_per_minute = 200
    sleep_time = 60 / requests_per_minute  # Adjust sleep time to maintain rate limit
    total_days = (end_date - start_date).days

    with tqdm(total=total_days, desc="Fetching data") as pbar:
        while start_date < end_date:
            params = {
                "apikey": api_key,
                "from": start_date.strftime("%Y-%m-%d"),
                "to": start_date.strftime("%Y-%m-%d")
            }

            response = requests.get(base_url, params=params)
            request_count += 1

            if response.status_code == 200:
                data = response.json()
                if data:  # Check if data is not empty
                    df = pd.DataFrame(data)
                    df['date'] = pd.to_datetime(df['date'])  # Ensure the 'date' column is in datetime format
                    df = df.iloc[::-1]  # Reverse the dataframe to maintain chronological order
                    all_data_frames.append(df)
            else:
                print(f"Failed to fetch data for {start_date.strftime('%Y-%m-%d')}")

            # Sleep to maintain the rate limit
            time.sleep(sleep_time)

            start_date += timedelta(days=1)
            pbar.update(1)

    # Concatenate all data frames
    final_df = pd.concat(all_data_frames, ignore_index=True)

    # Remove duplicates based on the datetime column
    final_df.drop_duplicates(subset='date', keep='first', inplace=True)

    return final_df



# Usage
api_key = "REDACTED_API_KEY"
data_frame = fetch_and_concat_data(api_key)

In [None]:
data_frame[["date","open","high","low","close"]]

In [None]:
intradayndx = data_frame[["date","open","high","low","close"]]
intradayndx = intradayndx.rename(columns={"date":"datetime"})
intradayndx.head()

In [None]:
intradayndx['datetime'] = pd.to_datetime(intradayndx['datetime'])
intradayndx.set_index('datetime', inplace=True)

In [None]:
# Function to calculate daily candles updated at every 5-minute interval
def calculate_updated_daily_candles(df):
    # Create an empty DataFrame for the updated daily candles
    updated_daily_candles = pd.DataFrame(columns=['open', 'high', 'low', 'close'], index=df.index)

    # Iterate over the 5-minute candles
    for current_time in df.index:
        # Filter data up to the current timestamp
        current_day_data = df[df.index.date == current_time.date()]
        up_to_current_time_data = current_day_data[current_day_data.index <= current_time]

        # Calculate updated daily candle
        updated_daily_candles.loc[current_time, 'open'] = current_day_data.iloc[0]['open']
        updated_daily_candles.loc[current_time, 'high'] = up_to_current_time_data['high'].max()
        updated_daily_candles.loc[current_time, 'low'] = up_to_current_time_data['low'].min()
        updated_daily_candles.loc[current_time, 'close'] = up_to_current_time_data.iloc[-1]['close']

    return updated_daily_candles

# Calculate the updated daily candles
intradayndx_agg = calculate_updated_daily_candles(intradayndx)

# Display the first few rows of the updated daily candles dataframe
intradayndx_agg.head()

In [None]:
intradayndx_agg = intradayndx_agg.reset_index()

intradayndx_agg['Date'] = intradayndx_agg['datetime'].dt.date
intradayndx_agg['Date'] = pd.to_datetime(intradayndx_agg['Date'], errors='coerce')
intradayndx_agg =intradayndx_agg.rename(columns={"high": "High", "low": "Low","open": "Open","close": "Close"})

In [None]:
daily_df = intradayndx.resample('D').agg({'open': 'first', 
                                 'high': 'max', 
                                 'low': 'min', 
                                 'close': 'last'})

# Drop rows with NaN values (days where there might be no data)
daily_df.dropna(inplace=True)

In [None]:
intradayndx_agg[intradayndx_agg["Date"] == "2024-01-18"].iloc[16]

In [None]:
daily_df.head()

In [None]:
daily_df = daily_df.reset_index()
daily_df = daily_df.rename(columns={"high": "High", "low": "Low","open": "Open","close": "Close","datetime": "Date"})

In [None]:
daily_df.tail()

In [None]:
csv_buffer = StringIO()
daily_df = daily_df.iloc[:]
daily_df.to_csv(csv_buffer, index=False)
# Create a boto3 client
s3_client = boto3.client('s3')

# Specify the bucket name and file name in S3
bucket_name = 'REDACTED_BUCKET'
object_name = 'ndx.csv'

# Upload the CSV to S3
#s3_client.put_object(Bucket=bucket_name, Key=object_name, Body=csv_buffer.getvalue())


In [None]:


df = daily_df

In [None]:
print(df.isnull().sum())
df = df.dropna()

In [None]:
df.tail()

In [None]:
len(df.values)

In [None]:
def one_hot_encode_column(data: pd.DataFrame, col, max_classes=None):
    df = data.copy()
    
    if max_classes is not None:
        dummies = pd.get_dummies(df[col], prefix=col, dtype=int)
         
        missing_cols = set(range(max_classes + 1)) - set(df[col].unique())
        for col_num in missing_cols:
            dummies[col +"_"+ str(col_num)] = 0
        
        # Custom sort function to ensure correct column order
        dummies = dummies[sorted(dummies.columns, key=lambda x: int(x.split("_")[1]))]
    else:
        dummies = pd.get_dummies(df[col], prefix=col, dtype=int)

    df = pd.concat([df, dummies], axis=1)
    if max_classes is None:
        df = df.drop(columns=[col])

    return df

In [None]:
df["Open_200_MA"] = df["Open"].rolling(window=30).mean()
df["High_200_MA"] = df["High"].rolling(window=30).mean()
df["Low_200_MA"] = df["Low"].rolling(window=30).mean()
df["Close_200_MA"] = df["Close"].rolling(window=30).mean()
df["Open_100_MA"] = df["Open"].rolling(window=14).mean()
df["High_100_MA"] = df["High"].rolling(window=14).mean()
df["Low_100_MA"] = df["Low"].rolling(window=14).mean()
df["Close_100_MA"] = df["Close"].rolling(window=14).mean()
df["Open_10_MA"] = df["Open"].rolling(window=7).mean()
df["High_10_MA"] = df["High"].rolling(window=7).mean()
df["Low_10_MA"] = df["Low"].rolling(window=7).mean()
df["Close_10_MA"] = df["Close"].rolling(window=7).mean()
df = df.dropna()

In [None]:
df.tail()

In [None]:
fig = px.line(df, x="Date", y=["Open","Open_200_MA","Open_100_MA","Open_10_MA"])
fig.show()

In [None]:
import ta


# Calculate RSI
df['RSI_open'] = ta.momentum.rsi(df['Open'])
df['RSI_open'] = (df['RSI_open'] - 50) / 50
df['RSI_diff_open'] = df['RSI_open'].diff()

In [None]:
fig = px.line(df, x="Date", y=['RSI_open'])
fig.show()

In [None]:
df['Close_lag1'] = df['Close'].shift(1)
df['Open_lag1'] = df['Open'].shift(1)
df['Low_lag1'] = df['Low'].shift(1)
df['High_lag1'] = df['High'].shift(1)

df['Close_lag1_200_MA'] = df['Close_200_MA'].shift(1)
df['Open_lag1_200_MA'] = df['Open_200_MA'].shift(1)
df['Low_lag1_200_MA'] = df['Low_200_MA'].shift(1)
df['High_lag1_200_MA'] = df['High_200_MA'].shift(1)

df['Close_lag1_100_MA'] = df['Close_100_MA'].shift(1)
df['Open_lag1_100_MA'] = df['Open_100_MA'].shift(1)
df['Low_lag1_100_MA'] = df['Low_100_MA'].shift(1)
df['High_lag1_100_MA'] = df['High_100_MA'].shift(1)

df['Close_lag1_10_MA'] = df['Close_10_MA'].shift(1)
df['Open_lag1_10_MA'] = df['Open_10_MA'].shift(1)
df['Low_lag1_10_MA'] = df['Low_10_MA'].shift(1)
df['High_lag1_10_MA'] = df['High_10_MA'].shift(1)


In [None]:
df["Open_Close"] = (df["Open"]- df["Close_lag1"]) / df["Open"]
df["Open_Open"] = (df["Open"]- df["Open_lag1"]) / df["Open"]
df["Open_Low"] = (df["Open"]- df["Low_lag1"]) / df["Open"]
df["Open_High"] = (df["Open"]- df["High_lag1"]) / df["Open"]

df["Open_Close_200_MA"] = (df["Open"]- df["Close_lag1_200_MA"]) / df["Open"]
df["Open_Open_200_MA"] = (df["Open"]- df["Open_lag1_200_MA"]) / df["Open"]
df["Open_Low_200_MA"] = (df["Open"]- df["Low_lag1_200_MA"]) / df["Open"]
df["Open_High_200_MA"] = (df["Open"]- df["High_lag1_200_MA"]) / df["Open"]

df["Open_Close_100_MA"] = (df["Open"]- df["Close_lag1_100_MA"]) / df["Open"]
df["Open_Open_100_MA"] = (df["Open"]- df["Open_lag1_100_MA"]) / df["Open"]
df["Open_Low_100_MA"] = (df["Open"]- df["Low_lag1_100_MA"]) / df["Open"]
df["Open_High_100_MA"] = (df["Open"]- df["High_lag1_100_MA"]) / df["Open"]

df["Open_Close_10_MA"] = (df["Open"]- df["Close_lag1_10_MA"]) / df["Open"]
df["Open_Open_10_MA"] = (df["Open"]- df["Open_lag1_10_MA"]) / df["Open"]
df["Open_Low_10_MA"] = (df["Open"]- df["Low_lag1_10_MA"]) / df["Open"]
df["Open_High_10_MA"] = (df["Open"]- df["High_lag1_10_MA"]) / df["Open"]

In [None]:
df['EMA_Close'] = ta.trend.ema_indicator(df['Close'])
df['EMA_Ratio_Close'] = (df['Close'] - df['EMA_Close']) / df['Close']
bollinger = ta.volatility.BollingerBands(close=df['Close'])
df['BB_Bandwidth'] = (bollinger.bollinger_hband() - bollinger.bollinger_lband()) / bollinger.bollinger_mavg()
df['BB_Percent'] = (df['Close'] - bollinger.bollinger_lband()) / (bollinger.bollinger_hband() - bollinger.bollinger_lband())/2
stochastic = ta.momentum.stoch(df['High'], df['Low'], df['Close'])
df['Stochastic_Scaled'] = stochastic / 100.0
df['EMA_Ratio_Close'] = df['EMA_Ratio_Close'].shift(1)
df['BB_Bandwidth'] = df['BB_Bandwidth'].shift(1)
df['BB_Percent'] = df['BB_Percent'].shift(1)
df['Stochastic_Scaled']  = df['Stochastic_Scaled'].shift(1)




In [None]:
df['SMA_5'] = ta.trend.SMAIndicator(close=df['Close'], window=5).sma_indicator()
df['SMA_10'] = ta.trend.SMAIndicator(close=df['Close'], window=10).sma_indicator()
df['MA_Crossover_Signal'] = 0
df.loc[df['SMA_5'] > df['SMA_10'], 'MA_Crossover_Signal'] = 1  # Bullish crossover
df.loc[df['SMA_5'] < df['SMA_10'], 'MA_Crossover_Signal'] = -1  # Bearish crossover
df['MA_Crossover'] = (df['SMA_5']-df['SMA_10'])/df['SMA_10']
df['MA_Crossover'] = df['MA_Crossover'].shift(1)
df['MA_Crossover_Signal'] = df['MA_Crossover_Signal'].shift(1)
df = df.drop(columns=['EMA_Close','SMA_5','SMA_10',"MA_Crossover"])

In [None]:
fig = px.line(df, x="Date", y=["Stochastic_Scaled"])
fig.show()

In [None]:
intradayndx_agg['Date'] = intradayndx_agg['datetime'].dt.date
intradayndx_agg['Date'] = pd.to_datetime(intradayndx_agg['Date'], errors='coerce')
df["Date"] = pd.to_datetime(df['Date'], errors='coerce')

In [None]:
df['weekday'] = df["Date"].dt.weekday.astype(np.int8) 
df['month'] = df["Date"].dt.month.astype(np.int8) - 1
df['monthday'] = df["Date"].dt.day.astype(np.int8) -1
df['month_of_quarter'] = ((df["Date"].dt.month - 1) % 3) 
df = one_hot_encode_column(df,'weekday')
df = one_hot_encode_column(df,'month') 
df = one_hot_encode_column(df,'monthday')
df = one_hot_encode_column(df,'month_of_quarter')

In [None]:
columns = ['Open', 'High', 'Low',"Close",'Open_lag1', 'High_lag1', 'Low_lag1',"Close_lag1"]
columns_delete = []

for elem in columns:
    columns_delete.append(f"{elem}")
    columns_delete.append(f"{elem}_200_MA")
    columns_delete.append(f"{elem}_100_MA")
    columns_delete.append(f"{elem}_10_MA")
dataset= df.drop(columns_delete,axis=1)
dataset = dataset.iloc[:]
dataset = dataset.dropna()


In [None]:
dataset.head()

In [None]:
intradayndx_agg.head()

In [None]:
intradayndx_agg[intradayndx_agg["Date"]== "2023-12-22"].tail()

In [None]:
def pricelong(indexvalue, knockoutprice) :
    """Calculate the price of long options """
    return np.maximum((indexvalue - knockoutprice) * 0.01, 0)

def priceshort(indexvalue, knockoutprice) :
    """Calculate the price of short options"""
    return np.maximum((knockoutprice - indexvalue) * 0.01, 0)

In [None]:
 
def prepare_intra_day_data(date):
    """
    Prepares intra-day data for a given date.
    """
    # Load the data for the given date
    open_day = df[df["Date"] == date]["Open"]
     
    knockout_long = 0.96 * open_day
    knockout_short = 1.04 * open_day
    open_price_long = pricelong(open_day,knockout_long) 
    open_price_short = priceshort(open_day,knockout_short)
    df_intra_day = intradayndx_agg[intradayndx_agg["Date"]==date]
    temp_list = []
    for element in df_intra_day.iloc[0:17].iterrows():
        high_long = pricelong(element[1]["High"],knockout_long)
        low_long = pricelong(element[1]["Low"],knockout_long)
        close_long = pricelong(element[1]["Close"],knockout_long)

        high_short = priceshort(element[1]["High"],knockout_short)
        low_short = priceshort(element[1]["Low"],knockout_short)
        close_short = priceshort(element[1]["Close"],knockout_short)
        perc_high_long = (high_long - open_price_long) / open_price_long
        perc_low_long = (low_long - open_price_long) / open_price_long
        perc_close_long = (close_long - open_price_long) / open_price_long
        perc_high_short = (high_short - open_price_short) / open_price_short
        perc_low_short = (low_short - open_price_short) / open_price_short
        perc_close_short = (close_short - open_price_short) / open_price_short

        temp_list.append([perc_high_long.iloc[0],perc_low_long.iloc[0],perc_close_long.iloc[0],perc_high_short.iloc[0],perc_low_short.iloc[0],perc_close_short.iloc[0]])
   
    
    
    return temp_list

In [None]:
dataset[dataset["Date"] < "2024-01-1"]

In [None]:
current_date = datetime.now()

# Calculate the date six months ago
six_months_ago = current_date - timedelta(days=5*30)

six_months_ago = eval("datetime(2024, 6, 11, 10, 21, 17, 817147)")- timedelta(days=10)
all_daily_data = []

for elem in dataset[dataset["Date"] < six_months_ago]["Date"].unique():
    if len(dataset[dataset["Date"] ==  elem]) > 0:
        all_daily_data.append(prepare_intra_day_data(elem))
       
all_daily_data = np.array(all_daily_data)


In [None]:
all_daily_data_val = []

for elem in dataset[dataset["Date"] >= six_months_ago]["Date"].unique():
    if len(dataset[dataset["Date"] ==  elem]) > 0:
        all_daily_data_val.append(prepare_intra_day_data(elem))
       
all_daily_data_val = np.array(all_daily_data_val)

In [None]:
def findtarget(date):
    close_day = df[df["Date"] == date]["Close"].values[0]
    df_intra_day = intradayndx_agg[intradayndx_agg["Date"] == date]

    index_value = df_intra_day.iloc[17]["Close"]

    knockout_long = 0.93 * index_value
    open_price_long = pricelong(index_value, knockout_long)
    close_price_long = pricelong(close_day, knockout_long)

    percentage_diff = ((close_price_long - open_price_long) / open_price_long) * 100

    if percentage_diff <= -20:
        return 0  # Loss > 20%
    elif -20 < percentage_diff <= -10:
        return 1  # Loss 10% to 20%
    elif -10 < percentage_diff < 0:
        return 2  # Loss 0% to 10%
    elif 0 <= percentage_diff < 10:
        return 3  # Win 0% to 10%
    elif 10 <= percentage_diff <= 20:
        return 4  # Win 10% to 20%
    else:
        return 5  # Win > 20%



    

In [None]:
#datetime.datetime(2024, 1, 24, 11, 53, 43, 403145)

In [None]:
six_months_ago

In [None]:
all_targets = []
for elem in dataset[dataset["Date"] < six_months_ago]["Date"].unique():
    all_targets.append(findtarget(elem))
all_targets = np.array(all_targets)

In [None]:
all_targets_val = []
for elem in dataset[dataset["Date"] >= six_months_ago]["Date"].unique():
    all_targets_val.append(findtarget(elem))
all_targets_val = np.array(all_targets_val)

In [None]:
all_targets.shape

In [None]:
all_targets_val.shape

In [None]:
all_targets[9]

In [None]:
counts = np.bincount(all_targets)
num_zeros = counts[0]
num_ones = counts[1]
num_twos = counts[2]
num_threes = counts[3]
num_fours = counts[4]
num_fives = counts[5]
 

# Calculate proportions
total_elements = all_targets.size
proportion_zeros = num_zeros / total_elements
proportion_ones = num_ones / total_elements
proportion_twos = num_twos / total_elements
proportion_threes = num_threes / total_elements
proportion_fours = num_fours / total_elements
proportion_fives = num_fives / total_elements
 
# Print the results
print("Number of 0s:", num_zeros)
print("Number of 1s:", num_ones)
print("Number of 2s:", num_twos)
print("Number of 3s:", num_threes)
print("Number of 4s:", num_fours)
print("Number of 5s:", num_fives)
 
print("Proportion of 0s:", proportion_zeros)
print("Proportion of 1s:", proportion_ones)
print("Proportion of 2s:", proportion_twos)
print("Proportion of 3s:", proportion_threes)
print("Proportion of 4s:", proportion_fours)
print("Proportion of 5s:", proportion_fives) 

In [None]:
counts = np.bincount(all_targets_val)
num_zeros = counts[0]
num_ones = counts[1]
num_twos = counts[2]
num_threes = counts[3]
num_fours = counts[4]
 
 

# Calculate proportions
total_elements = all_targets_val.size
proportion_zeros = num_zeros / total_elements
proportion_ones = num_ones / total_elements
proportion_twos = num_twos / total_elements
proportion_threes = num_threes / total_elements
proportion_fours = num_fours / total_elements
 
 
# Print the results
print("Number of 0s:", num_zeros)
print("Number of 1s:", num_ones)
print("Number of 2s:", num_twos)
print("Number of 3s:", num_threes)
print("Number of 4s:", num_fours)
print("Number of 5s:", num_fives)
 
print("Proportion of 0s:", proportion_zeros)
print("Proportion of 1s:", proportion_ones)
print("Proportion of 2s:", proportion_twos)
print("Proportion of 3s:", proportion_threes)
print("Proportion of 4s:", proportion_fours)
 

In [None]:
dataset.head()

In [None]:



data = dataset[dataset["Date"] < six_months_ago].values[:,1:]
data.shape

In [None]:
data_val = dataset[dataset["Date"] >= six_months_ago].values[:,1:]
data_val.shape

In [None]:
X1 = data
X2 = all_daily_data[:,:]
X2.shape
num_rows, dim1, dim2 = X2.shape
X2 = X2.reshape(num_rows, dim1 * dim2)
X2.shape

In [None]:
X1_val = data_val
X2_val = all_daily_data_val[:,:]

num_rows, dim1, dim2 = X2_val.shape
X2_val = X2_val.reshape(num_rows, dim1 * dim2)
X2_val.shape

In [None]:
X = np.hstack((X1, X2))

print(X.shape)  # This should output (num, 176)

In [None]:
X_val = np.hstack((X1_val, X2_val))

print(X_val.shape)  # This should output (num, 176)

In [None]:
y = all_targets[:]
y.shape

In [None]:
y_val = all_targets_val[:]
y_val.shape

In [None]:
dataset.shape

In [None]:

import json


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
y.shape

In [None]:
X.shape

In [None]:
# Split each array individually while keeping the same random_state to ensure matching indices
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y,random_state=42)
data_train, data_test, _ , _ = train_test_split(dataset[dataset["Date"] < six_months_ago], y, test_size=0.2, stratify=y,random_state=42)  # y is just to keep the split consistent

In [None]:
X_train = X_train.astype(np.float32)
X = X.astype(np.float32)
y = y.astype(np.float32)
y_train = y_train.astype(np.float32)
X_test = X_test.astype(np.float32)

y_test = y_test.astype(np.float32)

In [None]:
# Convert all losses to 0 and all wins to 1 in y_train
y_train = np.where(y_train < 3, 0, 1)
y = np.where(y < 3, 0, 1)
# Convert all losses to 0 and all wins to 1 in y_test
y_test = np.where(y_test < 3, 0, 1)

In [None]:
y_train = y_train.astype(np.int64)

y = y.astype(np.int64)

y_test = y_test.astype(np.int64)

In [None]:
y_train.sum() / len(y_train)

In [None]:
y_test.sum() / len(y_test)

In [None]:
#y_train = y_train.reshape(-1, 1)
#y_test = y_test.reshape(-1, 1)

In [None]:
from kan import *
import torch
import random
import numpy as np

In [None]:
dataset_kan = {}
dataset_kan['train_input'] = torch.from_numpy(X)
dataset_kan['test_input'] = torch.from_numpy(X_test)
dataset_kan['train_label'] = torch.from_numpy(y)
dataset_kan['test_label'] = torch.from_numpy(y_test)



In [None]:
model = KAN(width=[176,16,2],  grid=3, k=3)

def train_acc():
    return torch.mean((torch.argmax(model(dataset_kan['train_input']), dim=1) == dataset_kan['train_label']).float())

def test_acc():
    return torch.mean((torch.argmax(model(dataset_kan['test_input']), dim=1) == dataset_kan['test_label']).float())

results = model.train(dataset_kan, opt="LBFGS",  steps=1, metrics=(train_acc,test_acc), loss_fn=torch.nn.CrossEntropyLoss())
print(test_acc())
print(train_acc())

In [None]:
y_pred_class = torch.argmax(model(torch.from_numpy(X_val.astype(np.float32))), dim=1)

In [None]:
def evaluate_strategy(date, direction):
    close_day = df[df["Date"] == date]["Close"].values[0]
    high_day = df[df["Date"] == date]["Close"].values[0]
    low_day = df[df["Date"] == date]["Close"].values[0]
    df_intra_day = intradayndx_agg[intradayndx_agg["Date"]==date]

    index_value = df_intra_day.iloc[17]["Close"]
    

    knockout_long = 0.96 * index_value
    knockout_short = 1.04 * index_value

    open_price_long = pricelong(index_value,knockout_long)
    open_price_short = priceshort(index_value,knockout_short) 
     
    close_price_long = pricelong(close_day,knockout_long)
    close_price_short = priceshort(close_day,knockout_short)

    if close_price_long < 0.71 *open_price_long:
        close_price_long = 0.71 *open_price_long
    if close_price_short < 0.71 *open_price_short:
        close_price_short = 0.71 *open_price_short 
    
    if direction == 1:
        return 1+ (close_price_long - open_price_long) / open_price_long
    else:
        return 1+(close_price_short - open_price_short) / open_price_short

In [None]:
def evaluate_strategy(date, direction):
    close_day = df[df["Date"] == date]["Close"].values[0]
    high_day = df[df["Date"] == date]["Close"].values[0]
    low_day = df[df["Date"] == date]["Close"].values[0]
    df_intra_day = intradayndx_agg[intradayndx_agg["Date"]==date]

    index_value = df_intra_day.iloc[17]["Close"]
    

    knockout_long = 0.96 * index_value
    knockout_short = 1.04 * index_value

    open_price_long = pricelong(index_value,knockout_long)
    open_price_short = priceshort(index_value,knockout_short) 
     
    close_price_long = pricelong(close_day,knockout_long)
    close_price_short = priceshort(close_day,knockout_short)

    low_price_long_min = min([ pricelong(elem,knockout_long)  for elem in df_intra_day.iloc[18:]["Close"] ])
    high_price_short_max = max([ priceshort(elem,knockout_short)  for elem in df_intra_day.iloc[18:]["Close"] ])
    if close_price_long < 0.71 *open_price_long or low_price_long_min <   0.71 *open_price_long :
        close_price_long = 0.71 *open_price_long
    if close_price_short < 0.71 *open_price_short or high_price_short_max <   0.71 *open_price_long :
        close_price_short = 0.71 *open_price_short 
    
    if direction == 1:
        return 1+ (close_price_long - open_price_long) / open_price_long
    else:
        return 1+(close_price_short - open_price_short) / open_price_short

In [None]:
dataset_val = dataset[dataset["Date"] >=six_months_ago].values

In [None]:
strat_performance = 1
for i, elem in enumerate(dataset_val[:,0]):
    direction = y_pred_class[i]
    temp_performance = evaluate_strategy(elem, direction)
    strat_performance *= temp_performance
strat_performance

In [None]:
def evaluate_strategy_stop(date, direction,stop):
    close_day = df[df["Date"] == date]["Close"].values[0]
    high_day = df[df["Date"] == date]["Close"].values[0]
    low_day = df[df["Date"] == date]["Close"].values[0]
    df_intra_day = intradayndx_agg[intradayndx_agg["Date"]==date]

    index_value = df_intra_day.iloc[17]["Close"]
    

    knockout_long = 0.96 * index_value
    knockout_short = 1.04 * index_value

    open_price_long = pricelong(index_value,knockout_long)
    open_price_short = priceshort(index_value,knockout_short) 
     
    close_price_long = pricelong(close_day,knockout_long)
    close_price_short = priceshort(close_day,knockout_short)

    low_price_long_min = min([ pricelong(elem,knockout_long)  for elem in df_intra_day.iloc[18:]["Close"] ])
    high_price_short_max = max([ priceshort(elem,knockout_short)  for elem in df_intra_day.iloc[18:]["Close"] ])
    if close_price_long < stop*open_price_long or low_price_long_min <   stop *open_price_long :
        close_price_long = stop *open_price_long
    if close_price_short <stop *open_price_short or high_price_short_max <   stop *open_price_long :
        close_price_short = stop *open_price_short 
    
    if direction == 1:
        return 1+ (close_price_long - open_price_long) / open_price_long
    else:
        return 1+(close_price_short - open_price_short) / open_price_short

In [None]:
best_val = 7.160002074325383

 
    


In [None]:
def simple_kan(**params):
    global best_val
    layers = round(params["layers"])
    hidden_layers=[ int(params["x1"]),int(params["x2"]),int(params["x3"]),int(params["x4"])]
   
    epochs = int(params["epoch"])
    kan_struc = [176] + hidden_layers[0:layers] + [2]
    #print(kan_struc)
    model = KAN(width=kan_struc,  grid=3, k=3)
    try:
        results = model.train(dataset_kan, opt="LBFGS",  steps=epochs, metrics=(train_acc, test_acc), loss_fn=torch.nn.CrossEntropyLoss())
    except:
        return 0
    y_pred_class = torch.argmax(model(torch.from_numpy(X_val.astype(np.float32))), dim=1)

    strat_performance = 1
    stats = []
    for i, elem in enumerate(dataset_val[:,0]):
        direction = y_pred_class[i]
        temp_performance = evaluate_strategy_stop(elem, direction,0.93)
        strat_performance *= temp_performance
        stats.append([elem,temp_performance,strat_performance,direction])
    if strat_performance > best_val:
        best_val = strat_performance
        model.save_ckpt('bestmodel_kan')
        file_path = 'kan_struc_best.json'

        df_pic = pd.DataFrame(stats, columns=['Date', 'Day_Performance', 'Current_Performance','Direction'])

        # Create the plot
        fig = go.Figure()

        # Add traces for each value column
        fig.add_trace(go.Scatter(x=df_pic['Date'], y=df_pic['Day_Performance'], mode='lines+markers', name='Day_Performance'))
        fig.add_trace(go.Scatter(x=df_pic['Date'], y=df_pic['Current_Performance'], mode='lines+markers', name='Current_Performance'))

    # Update layout
        fig.update_layout(
        title='Time Series Data',  
        xaxis_title='Date',
        yaxis_title='Performance',
        template='plotly'
    )

    # Show the plot
        fig.show()

        with open(file_path, 'w') as json_file:
            json.dump(kan_struc, json_file)

    return strat_performance


In [None]:
import json
import os
import torch
import boto3
import plotly.graph_objects as go
import pandas as pd
from tqdm.auto import tqdm
from bayes_opt import BayesianOptimization
from bayes_opt.util import UtilityFunction

# Define the S3 bucket and paths
BUCKET_NAME = 'REDACTED_BUCKET'
s3_client = boto3.client('s3')

# Early stopping variables
no_improvement_count = 0
best_val = 7.160002074325383
wait = 50  # Early stopping wait iterations
def simple_kan(**params):
    global best_val
    layers = round(params["layers"])
    hidden_layers=[int(params["x1"]), int(params["x2"]), int(params["x3"]), int(params["x4"])]
    epochs = int(params["epoch"])
    kan_struc = [176] + hidden_layers[0:layers] + [2]

    model = KAN(width=kan_struc, grid=3, k=3)
    
    try:
        results = model.train(dataset_kan, opt="LBFGS", steps=epochs, metrics=(train_acc, test_acc), loss_fn=torch.nn.CrossEntropyLoss())
    except:
        return 0

    y_pred_class = torch.argmax(model(torch.from_numpy(X_val.astype(np.float32))), dim=1)

    strat_performance = 1
    stats = []
    for i, elem in enumerate(dataset_val[:, 0]):
        direction = y_pred_class[i]
        temp_performance = evaluate_strategy_stop(elem, direction, 0.93)
        strat_performance *= temp_performance
        stats.append([elem, temp_performance, strat_performance, direction])

    if strat_performance > best_val:
        
        model.save_ckpt('bestmodel_kan')
        
        # Save KAN structure as JSON
        file_path = 'kan_struc_best.json'
        with open(file_path, 'w') as json_file:
            json.dump(kan_struc, json_file)
        
        # Upload KAN structure to S3 bucket
        s3_client.upload_file(file_path, BUCKET_NAME, file_path)
        
        # Upload model weights to S3 bucket
        model_weight_path = 'model_ckpt/bestmodel_kan'
        s3_client.upload_file(model_weight_path, BUCKET_NAME,"bestmodel_kan")

        # Prepare data for Plotly
        df_pic = pd.DataFrame(stats, columns=['Date', 'Day_Performance', 'Current_Performance', 'Direction'])

        # Create the plot with dark mode
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=df_pic['Date'], y=df_pic['Day_Performance'], mode='lines+markers', name='Day_Performance'))
        fig.add_trace(go.Scatter(x=df_pic['Date'], y=df_pic['Current_Performance'], mode='lines+markers', name='Current_Performance'))

        fig.update_layout(
            title='Time Series Data',
            xaxis_title='Date',
            yaxis_title='Performance',
            template='plotly_dark'  # Dark mode enabled
        )

        # Show the plot
        fig.show()

    return strat_performance

# Initialize Bayesian optimization with utility function
utility = UtilityFunction(kind="ucb", kappa=2.576, xi=0.0)
optimizer = BayesianOptimization(
    f=simple_kan,
    pbounds={'x1': (1, 32), 'x2': (1, 32), 'x3': (1, 32), 'x4': (1, 32),
             'epoch': (2, 50), 'layers': (0.5001, 4.5)},
    verbose=2,
    random_state=42,
)

# Set Gaussian process parameters
optimizer.set_gp_params(alpha=1e-6)

# Run the Bayesian optimization
TOTAL_ITERATIONS = 150
#optimizer.maximize(init_points=5, n_iter=0)  # Run initial exploration for 5 points
"""
for i in tqdm(range(TOTAL_ITERATIONS - 5)):  # Subtract the initial 5 points
    # Perform the next optimization step
    suggested_params = optimizer.suggest(utility)
    target = simple_kan(**suggested_params)  # Get target for the suggested parameters
    optimizer.register(params=suggested_params, target=target)  # Register new observation
    
    # Check for early stopping
    if target > best_val:
        best_val = target
        no_improvement_count = 0
    else:
        no_improvement_count += 1

    if no_improvement_count >= wait:
        print(f"Early stopping triggered after {i + 1 + 5} iterations")  # +5 accounts for the initial points
        break"""

In [None]:
TOTAL_ITERATIONS = 150
optimizer = BayesianOptimization(
    f=simple_kan,
     pbounds={'x1': (1, 32),'x2': (1, 32),'x3': (1, 32),'x4': (1, 32),
 
 'epoch': (2, 50),
 'layers': (0.5001, 4.5)},
    verbose=2,
    random_state=42,
    #bounds_transformer=bounds_transformer
)

#optimizer.maximize(
 #  init_points=5,
 ##   n_iter=TOTAL_ITERATIONS-5,
#)


In [None]:
def load_kan():
    file_path = 'kan_struc_best.json'

    # Load the JSON file
    with open(file_path, 'r') as file:
        kan_struc = json.load(file)
    model = KAN(width=kan_struc,  grid=3, k=3)
    print(kan_struc)
    model.load_ckpt('bestmodel_kan')
    return model


In [None]:
import json

In [None]:
model = load_kan()

In [None]:
y_pred_class = torch.argmax(model(torch.from_numpy(X_val.astype(np.float32))), dim=1)

In [None]:
strat_performance = 1
stats = []
for i, elem in enumerate(dataset_val[:,0]):
    direction = y_pred_class[i]
    temp_performance = evaluate_strategy_stop(elem, direction, 0.93)
    
    
    strat_performance *= temp_performance
    stats.append([elem,temp_performance,strat_performance,direction])
strat_performance

In [None]:
y_pred_class[-2:]

In [None]:
X2_val[-1]

In [None]:
import plotly.graph_objects as go

In [None]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np


# Assuming stats is already loaded
df_pic = pd.DataFrame(stats, columns=['Date', 'Day_Performance', 'Current_Performance', 'Direction'])

# Convert 'Date' to datetime to manage future date calculations
df_pic['Date'] = pd.to_datetime(df_pic['Date'])

# Define the future date you want to predict for
future_date_str = '2025-01-31'  # Example: 'YYYY-MM-DD'
future_date = pd.to_datetime(future_date_str)

# Convert dates to numeric values (days since first date) for fitting
base_date = df_pic['Date'].min()
days_since_start = (df_pic['Date'] - base_date).dt.days

# Fit polynomial using days instead of index
z = np.polyfit(days_since_start, df_pic['Current_Performance'], 1)
p = np.poly1d(z)

# Create the original trendline using days
trendline_values = p(days_since_start)

# Calculate days until future date for prediction
days_to_future = (future_date - base_date).days
future_value = p(days_to_future)

# Create the plot
fig = go.Figure()

# Add traces for each value column
fig.add_trace(go.Scatter(x=df_pic['Date'], y=df_pic['Day_Performance'], mode='lines+markers', name='Day_Performance'))
fig.add_trace(go.Scatter(x=df_pic['Date'], y=df_pic['Current_Performance'], mode='lines+markers', name='Current_Performance'))

# Add the trendline for the original data
fig.add_trace(go.Scatter(
    x=df_pic['Date'],
    y=trendline_values,
    mode='lines',
    name='Trendline (Current Performance)',
    line=dict(dash='dot')
))

# Add the future prediction point
fig.add_trace(go.Scatter(
    x=[future_date],
    y=[future_value],
    mode='markers+text',
    name='Predicted Value',
    text=[f'Predicted: {future_value:.2f}'],
    textposition='top center',
    marker=dict(color='red', size=10)
))

# Update layout with dark theme
fig.update_layout(
    title='Time Series Data with Future Prediction',
    xaxis_title='Date',
    yaxis_title='Performance',
    template='plotly_dark'
)

# Show the plot
fig.show()

In [None]:
40.78/31.55

In [None]:

# First, let's add the streak column
def calculate_streak(df):
    streak = 0
    streaks = []
    
    for performance in df['Day_Performance']:
        if performance > 1:
            streak += 1
        elif performance < 1:
            streak -= 1
        # if performance is 0, streak remains unchanged
        streaks.append(streak)
    
    return streaks

# Add the streak column to your dataframe
df_pic['streak'] = calculate_streak(df_pic)
# Create the plot with dark theme
fig = go.Figure()

# Add trace for streak
fig.add_trace(
    go.Scatter(
        x=df_pic['Date'], 
        y=df_pic['streak'], 
        mode='lines+markers', 
        name='streak',
        line=dict(width=2),
        marker=dict(size=6)
    )
)

# Update layout with dark theme
fig.update_layout(
    title='Time Series Data for ML Performance',
    xaxis_title='Date', 
    yaxis_title='Performance',
    template='plotly_dark',  # Changed to dark theme
    paper_bgcolor='rgba(0,0,0,1)',  # Dark background
    plot_bgcolor='rgba(0,0,0,1)',   # Dark plot area
    font=dict(color='white'),       # White text
    xaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='rgba(128,128,128,0.2)',
        zeroline=True,
        zerolinewidth=1,
        zerolinecolor='rgba(128,128,128,0.2)'
    ),
    yaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='rgba(128,128,128,0.2)',
        zeroline=True,
        zerolinewidth=1,
        zerolinecolor='rgba(128,128,128,0.2)'
    )
)

# Show the plot
fig.show()


In [None]:
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim

In [None]:
#Fix naming conventions for clarity
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_train_final, X_val_final, _, _ = train_test_split(
    dataset[dataset["Date"] < six_months_ago].values, y, test_size=0.2, stratify=y, random_state=42
)

# Convert numpy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

# Create TensorDatasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Print to verify shapes
print(f"Train data shape: {X_train_tensor.shape}, Labels shape: {y_train_tensor.shape}")
print(f"Validation data shape: {X_val_tensor.shape}, Labels shape: {y_val_tensor.shape}")

In [None]:
type(X_val_final) 


In [None]:
X_train_tensor

In [None]:
# Define model and data parameters
num_features = X_train.shape[1]
num_classes = len(np.unique(y_train))

In [None]:
def get_validation_predictions(model, val_loader):
    model.eval()  # Set the model to evaluation mode
    all_predictions = []

    with torch.no_grad():  # Disable gradient calculations
        for inputs, _ in val_loader:
            outputs = model(inputs)
            _, predicted_classes = torch.max(outputs, 1)  # Get the class with the highest score
            all_predictions.append(predicted_classes)

    # Combine all predictions into a single tensor
    all_predictions = torch.cat(all_predictions)

    return all_predictions


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from bayes_opt import BayesianOptimization
import plotly.graph_objects as go
import numpy as np

# Device handling: Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

best_performance = -float('inf')

# Updated ComplexNN with different dropout rates
class ComplexNN(nn.Module):
    def __init__(self, input_size, num_classes, hidden_units_1=128, hidden_units_2=64, hidden_units_3=32,
                 dropout_rate_1=0.3, dropout_rate_2=0.3, dropout_rate_3=0.3):
        super(ComplexNN, self).__init__()
        self.shared = nn.Sequential(
            nn.Linear(input_size, hidden_units_1),
            nn.BatchNorm1d(hidden_units_1),
            nn.GELU(),
            nn.Dropout(dropout_rate_1),
            nn.Linear(hidden_units_1, hidden_units_2),
            nn.BatchNorm1d(hidden_units_2),
            nn.GELU(),
            nn.Dropout(dropout_rate_2),
            nn.Linear(hidden_units_2, hidden_units_3),
            nn.GELU(),
            nn.Dropout(dropout_rate_3)
        )
        self.head = nn.Linear(hidden_units_3, num_classes)

    def forward(self, x):
        x = self.shared(x)
        return self.head(x)


# Updated train_and_evaluate
def train_and_evaluate(lr, weight_decay, hidden_units_1, hidden_units_2, hidden_units_3, dropout_rate_1, dropout_rate_2, dropout_rate_3):
    global best_performance
    model = ComplexNN(
        input_size=num_features,
        num_classes=num_classes,
        hidden_units_1=int(hidden_units_1),
        hidden_units_2=int(hidden_units_2),
        hidden_units_3=int(hidden_units_3),
        dropout_rate_1=dropout_rate_1,
        dropout_rate_2=dropout_rate_2,
        dropout_rate_3=dropout_rate_3
    ).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)

    num_epochs = 50
    patience = 10
    best_loss = float('inf')
    wait = 0
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()
        val_loss /= len(val_loader)

        train_losses.append(train_loss)
        val_losses.append(val_loss)

        # Early stopping logic
        if val_loss < best_loss:
            best_loss = val_loss
            best_model_state = model.state_dict()
            wait = 0
        else:
            wait += 1
            if wait >= patience:
                break

        scheduler.step()

    # Evaluate the strategy
    model.eval()
    with torch.no_grad():
        outputs = model(torch.from_numpy(X_val.astype(np.float32)).to(device))
        _, predicted_classes = torch.max(outputs, 1)

    strat_performance = 1
    for i, elem in enumerate(dataset_val[:, 0]):
        direction = predicted_classes[i].item()
        temp_performance = evaluate_strategy_stop(elem, direction, 0.93)
        strat_performance *= temp_performance

    # Check for the best performance
    if strat_performance > best_performance:
        best_performance = strat_performance
        torch.save(model.state_dict(), 'best_model_state.pth')
        print("New best performance:", best_performance)
        print("Model saved.")

        # Plot training and validation losses
        fig = go.Figure()
        fig.add_trace(go.Scatter(y=train_losses, mode='lines+markers', name='Training Loss'))
        fig.add_trace(go.Scatter(y=val_losses, mode='lines+markers', name='Validation Loss'))
        fig.update_layout(
            title="Training and Validation Loss",
            xaxis_title="Epochs",
            yaxis_title="Loss",
            template="plotly_dark",
            legend=dict(x=0, y=1)
        )
        fig.show()

    return strat_performance


# Updated Bayesian Optimization
optimizer_params = {
    'lr': (1e-4, 1e-2),
    'weight_decay': (1e-5, 1e-2),
    'hidden_units_1': (64, 256),
    'hidden_units_2': (32, 128),
    'hidden_units_3': (16, 64),
    'dropout_rate_1': (0.1, 0.5),
    'dropout_rate_2': (0.1, 0.5),
    'dropout_rate_3': (0.1, 0.5)
}

bo = BayesianOptimization(
    f=train_and_evaluate,
    pbounds=optimizer_params,
    random_state=42,
    verbose=2
)
bo.maximize(init_points=5, n_iter=100)
print("Best parameters found:", bo.max)


In [None]:
# write a loading method to load the bp.max into the above defined pytorch class as a model
bo.max['params']

In [None]:
import torch

# Initialize the model using the best parameters found during Bayesian Optimization
model = ComplexNN(
    input_size=num_features,
    num_classes=num_classes,
    hidden_units_1=int(bo.max['params']['hidden_units_1']),
    hidden_units_2=int(bo.max['params']['hidden_units_2']),
    hidden_units_3=int(bo.max['params']['hidden_units_3']),
    dropout_rate_1=bo.max['params']['dropout_rate_1'],  # Different dropout for first layer
    dropout_rate_2=bo.max['params']['dropout_rate_2'],  # Different dropout for second layer
    dropout_rate_3=bo.max['params']['dropout_rate_3']   # Different dropout for third layer
)

# Load the model state
model.load_state_dict(torch.load('best_model_state.pth'))

# Ensure model is on the correct device (GPU or CPU)
 

# Set the model to evaluation mode
model.eval()

print("Model loaded successfully and set to evaluation mode!")


In [None]:
model.eval()

In [None]:
y_pred_class = torch.argmax(model(torch.from_numpy(X_val.astype(np.float32))), dim=1)

In [None]:
model.eval()  # Set the model to evaluation mode
y_pred_class = []

with torch.no_grad():  # Disable gradient calculations
        
        outputs = model(torch.from_numpy(X_val.astype(np.float32)))
        _, predicted_classes = torch.max(outputs, 1)  # Get the class with the highest score
        y_pred_class.append(predicted_classes)

# Combine all predictions into a single tensor
y_pred_class = torch.cat(y_pred_class)

In [None]:
y_pred_class

In [None]:
strat_performance = 1
stats = []
for i, elem in enumerate(dataset_val[:,0]):
    direction = y_pred_class[i]
    temp_performance = evaluate_strategy_stop(elem, direction, 0.93)
    
    
    strat_performance *= temp_performance
    stats.append([elem,temp_performance,strat_performance,direction])
strat_performance

In [None]:

# Assuming stats is already loaded
df_pic = pd.DataFrame(stats, columns=['Date', 'Day_Performance', 'Current_Performance', 'Direction'])

# Convert 'Date' to datetime to manage future date calculations
df_pic['Date'] = pd.to_datetime(df_pic['Date'])

# Define the future date you want to predict for
future_date_str = '2025-01-31'  # Example: 'YYYY-MM-DD'
future_date = pd.to_datetime(future_date_str)

# Convert dates to numeric values (days since first date) for fitting
base_date = df_pic['Date'].min()
days_since_start = (df_pic['Date'] - base_date).dt.days

# Fit polynomial using days instead of index
z = np.polyfit(days_since_start, df_pic['Current_Performance'], 1)
p = np.poly1d(z)

# Create the original trendline using days
trendline_values = p(days_since_start)

# Calculate days until future date for prediction
days_to_future = (future_date - base_date).days
future_value = p(days_to_future)

# Create the plot
fig = go.Figure()

# Add traces for each value column
fig.add_trace(go.Scatter(x=df_pic['Date'], y=df_pic['Day_Performance'], mode='lines+markers', name='Day_Performance'))
fig.add_trace(go.Scatter(x=df_pic['Date'], y=df_pic['Current_Performance'], mode='lines+markers', name='Current_Performance'))

# Add the trendline for the original data
fig.add_trace(go.Scatter(
    x=df_pic['Date'],
    y=trendline_values,
    mode='lines',
    name='Trendline (Current Performance)',
    line=dict(dash='dot')
))

# Add the future prediction point
fig.add_trace(go.Scatter(
    x=[future_date],
    y=[future_value],
    mode='markers+text',
    name='Predicted Value',
    text=[f'Predicted: {future_value:.2f}'],
    textposition='top center',
    marker=dict(color='red', size=10)
))

# Update layout with dark theme
fig.update_layout(
    title='Time Series Data with Future Prediction',
    xaxis_title='Date',
    yaxis_title='Performance',
    template='plotly_dark'
)

# Show the plot
fig.show()

In [None]:
class DynamicComplexNN(nn.Module):
    def __init__(self, input_size, num_classes, num_hidden_layers=3, hidden_units=None, dropout_rates=None):
        """
        input_size: int, number of input features
        num_classes: int, number of output classes
        num_hidden_layers: int, number of hidden layers
        hidden_units: list of int, sizes of each hidden layer
        dropout_rates: list of float, dropout rates for each layer
        """
        super(DynamicComplexNN, self).__init__()

        if hidden_units is None or dropout_rates is None:
            raise ValueError("hidden_units and dropout_rates must be provided as lists")

        self.layers = nn.ModuleList()
        current_input_size = input_size

        # Dynamically create layers
        for i in range(num_hidden_layers):
            self.layers.append(nn.Linear(current_input_size, hidden_units[i]))
            self.layers.append(nn.BatchNorm1d(hidden_units[i]))
            self.layers.append(nn.GELU())  # Activation
            self.layers.append(nn.Dropout(dropout_rates[i]))  # Dropout
            current_input_size = hidden_units[i]

        # Final output layer
        self.output = nn.Linear(current_input_size, num_classes)

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return self.output(x)


In [None]:
# Updated train_and_evaluate
def train_and_evaluate(lr, weight_decay, num_hidden_layers, *hidden_units_and_dropouts):    
    global best_performance
    

    # Extract hidden units and dropout rates dynamically
    num_hidden_layers = int(num_hidden_layers)
    hidden_units = []
    dropout_rates = []

    for i in range(num_hidden_layers):
        hidden_units.append(int(hidden_units_and_dropouts[2 * i]))      # Even indices: Hidden units
        dropout_rates.append(hidden_units_and_dropouts[2 * i + 1])     # Odd indices: Dropout rates

    # Initialize the model
    model = DynamicComplexNN(
        input_size=num_features,
        num_classes=num_classes,
        num_hidden_layers=num_hidden_layers,
        hidden_units=hidden_units,
        dropout_rates=dropout_rates
    ).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)

    num_epochs = 50
    patience = 10
    best_loss = float('inf')
    wait = 0
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()
        val_loss /= len(val_loader)

        train_losses.append(train_loss)
        val_losses.append(val_loss)

        # Early stopping logic
        if val_loss < best_loss:
            best_loss = val_loss
            best_model_state = model.state_dict()
            wait = 0
        else:
            wait += 1
            if wait >= patience:
                break

        scheduler.step()

    # Evaluate the strategy
    model.eval()
    with torch.no_grad():
        outputs = model(torch.from_numpy(X_val.astype(np.float32)).to(device))
        _, predicted_classes = torch.max(outputs, 1)

    strat_performance = 1
    for i, elem in enumerate(dataset_val[:, 0]):
        direction = predicted_classes[i].item()
        temp_performance = evaluate_strategy_stop(elem, direction, 0.93)
        strat_performance *= temp_performance

    # Check for the best performance
    if strat_performance > best_performance:
        best_performance = strat_performance
        torch.save(model.state_dict(), 'best_model_weights.pth')
        print("New best performance:", best_performance)
        print("Model saved.")

        # Plot training and validation losses
        fig = go.Figure()
        fig.add_trace(go.Scatter(y=train_losses, mode='lines+markers', name='Training Loss'))
        fig.add_trace(go.Scatter(y=val_losses, mode='lines+markers', name='Validation Loss'))
        fig.update_layout(
            title="Training and Validation Loss",
            xaxis_title="Epochs",
            yaxis_title="Loss",
            template="plotly_dark",
            legend=dict(x=0, y=1)
        )
        fig.show()

    return strat_performance

In [None]:
best_performance = -float('inf')

max_layers = 20  # Allow up to 5 layers dynamically
optimizer_params = {
    'lr': (1e-4, 1e-2),
    'weight_decay': (1e-5, 1e-2),
    'num_hidden_layers': (1, max_layers),  # Number of layers: 1 to max_layers
}

# Add hidden units and dropout rates dynamically
for i in range(max_layers):
    optimizer_params[f'hidden_units_{i+1}'] = (2, 512)  # Hidden units range
    optimizer_params[f'dropout_rate_{i+1}'] = (0.01, 0.5)  # Dropout rate range


# Define the wrapper for train_and_evaluate
def bayesian_eval_wrapper(lr, weight_decay, num_hidden_layers, **kwargs):
    num_hidden_layers = int(num_hidden_layers)
    hidden_units_and_dropouts = []

    # Extract hidden units and dropout rates dynamically
    for i in range(num_hidden_layers):
        hidden_units_and_dropouts.append(kwargs[f"hidden_units_{i+1}"])
        hidden_units_and_dropouts.append(kwargs[f"dropout_rate_{i+1}"])

    return train_and_evaluate(lr, weight_decay, num_hidden_layers, *hidden_units_and_dropouts)

# Run Bayesian Optimization
bo = BayesianOptimization(
    f=bayesian_eval_wrapper,
    pbounds=optimizer_params,
    random_state=42,
    verbose=2
)
bo.maximize(init_points=5, n_iter=1500)
print("Best parameters found:", bo.max)

In [None]:
import torch

import torch
# Extract the best parameters from Bayesian Optimization
best_params = bo.max['params']

# Extract key parameters
num_hidden_layers = int(best_params['num_hidden_layers'])
hidden_units = []
dropout_rates = []

# Dynamically extract hidden units and dropout rates
for i in range(num_hidden_layers):
    hidden_units.append(int(best_params[f'hidden_units_{i+1}']))
    dropout_rates.append(best_params[f'dropout_rate_{i+1}'])

# Initialize the model with the dynamic architecture
model = DynamicComplexNN(
    input_size=num_features,
    num_classes=num_classes,
    num_hidden_layers=num_hidden_layers,
    hidden_units=hidden_units,
    dropout_rates=dropout_rates
)
 

# Load the saved model weights
model.load_state_dict(torch.load('best_model_weights.pth', map_location=device))

# Set model to evaluation mode
model.eval()

# Print confirmation and model structure
print("Model loaded successfully with the following architecture:")
print(f"Number of Hidden Layers: {num_hidden_layers}")
print(f"Hidden Units: {hidden_units}")
print(f"Dropout Rates: {dropout_rates}")
print(model)

y_pred_class = []

with torch.no_grad():  # Disable gradient calculations
        
        outputs = model(torch.from_numpy(X_val.astype(np.float32)))
        predicted_classes =  torch.argmax(outputs, dim=1)  # Get the class with the highest score
        y_pred_class.append(predicted_classes)

# Combine all predictions into a single tensor
y_pred_class = torch.cat(y_pred_class)
strat_performance = 1
stats = []
for i, elem in enumerate(dataset_val[:,0]):
    direction = y_pred_class[i]
    print(direction)
    temp_performance = evaluate_strategy_stop(elem, direction, 0.93)
    
    
    strat_performance *= temp_performance
    stats.append([elem,temp_performance,strat_performance,direction])

# Assuming stats is already loaded
df_pic = pd.DataFrame(stats, columns=['Date', 'Day_Performance', 'Current_Performance', 'Direction'])

# Convert 'Date' to datetime to manage future date calculations
df_pic['Date'] = pd.to_datetime(df_pic['Date'])

# Define the future date you want to predict for
future_date_str = '2025-01-31'  # Example: 'YYYY-MM-DD'
future_date = pd.to_datetime(future_date_str)

# Convert dates to numeric values (days since first date) for fitting
base_date = df_pic['Date'].min()
days_since_start = (df_pic['Date'] - base_date).dt.days

# Fit polynomial using days instead of index
z = np.polyfit(days_since_start, df_pic['Current_Performance'], 1)
p = np.poly1d(z)

# Create the original trendline using days
trendline_values = p(days_since_start)

# Calculate days until future date for prediction
days_to_future = (future_date - base_date).days
future_value = p(days_to_future)

# Create the plot
fig = go.Figure()

# Add traces for each value column
fig.add_trace(go.Scatter(x=df_pic['Date'], y=df_pic['Day_Performance'], mode='lines+markers', name='Day_Performance'))
fig.add_trace(go.Scatter(x=df_pic['Date'], y=df_pic['Current_Performance'], mode='lines+markers', name='Current_Performance'))

# Add the trendline for the original data
fig.add_trace(go.Scatter(
    x=df_pic['Date'],
    y=trendline_values,
    mode='lines',
    name='Trendline (Current Performance)',
    line=dict(dash='dot')
))

# Add the future prediction point
fig.add_trace(go.Scatter(
    x=[future_date],
    y=[future_value],
    mode='markers+text',
    name='Predicted Value',
    text=[f'Predicted: {future_value:.2f}'],
    textposition='top center',
    marker=dict(color='red', size=10)
))

# Update layout with dark theme
fig.update_layout(
    title='Time Series Data with Future Prediction',
    xaxis_title='Date',
    yaxis_title='Performance',
    template='plotly_dark'
)

# Show the plot
fig.show()


In [None]:
data = {
  "wait": 0,
  "counter": 0,
  "X1": [
    [
      -0.13192251324653625,
      -0.18630719184875488,
      -0.011558888480067253,
      -0.019972870126366615,
      -0.008799443952739239,
      -0.022233862429857254,
      -0.0105429133400321,
      -0.010949401184916496,
      -0.0044204252772033215,
      -0.015692589804530144,
      -0.027859758585691452,
      -0.028866613283753395,
      -0.022564474493265152,
      -0.03436919301748276,
      -0.034305281937122345,
      -0.038298334926366806,
      -0.02899443916976452,
      -0.043828755617141724,
      -0.013844897039234638,
      0.07633326202630997,
      0.21024826169013977,
      0.18406324088573456,
      1,
      0,
      0,
      0,
      0,
      1,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      1,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      1,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      1
    ]
  ],
  "X2": [
    [
      [
        0.01715690828859806,
        -0.01503333356231451,
        0.0005264173378236592,
        -0.01715690828859806,
        0.01503333356231451,
        -0.0005264173378236592
      ],
      [
        0.12117267400026321,
        -0.01503333356231451,
        0.12117267400026321,
        -0.12117267400026321,
        0.01503333356231451,
        -0.12117267400026321
      ],
      [
        0.12117267400026321,
        -0.01503333356231451,
        0.07095891237258911,
        -0.12117267400026321,
        0.01503333356231451,
        -0.07095891237258911
      ],
      [
        0.12117267400026321,
        -0.018070796504616737,
        0.011233698576688766,
        -0.12117267400026321,
        0.018070796504616737,
        -0.011233698576688766
      ],
      [
        0.12117267400026321,
        -0.018070796504616737,
        0.12019611895084381,
        -0.12117267400026321,
        0.018070796504616737,
        -0.12019611895084381
      ],
      [
        0.12117267400026321,
        -0.018070796504616737,
        0.06906571984291077,
        -0.12117267400026321,
        0.018070796504616737,
        -0.06906571984291077
      ],
      [
        0.12117267400026321,
        -0.018070796504616737,
        0.007597718387842178,
        -0.12117267400026321,
        0.018070796504616737,
        -0.007597718387842178
      ],
      [
        0.12117267400026321,
        -0.03401527553796768,
        0.04343205690383911,
        -0.12117267400026321,
        0.03401527553796768,
        -0.04343205690383911
      ],
      [
        0.12117267400026321,
        -0.03401527553796768,
        0.035369038581848145,
        -0.12117267400026321,
        0.03401527553796768,
        -0.035369038581848145
      ],
      [
        0.1717672348022461,
        -0.03401527553796768,
        0.10869491845369339,
        -0.1717672348022461,
        0.03401527553796768,
        -0.10869491845369339
      ],
      [
        0.2106027454137802,
        -0.03401527553796768,
        0.09224789589643478,
        -0.2106027454137802,
        0.03401527553796768,
        -0.09224789589643478
      ],
      [
        0.28900790214538574,
        -0.03401527553796768,
        0.16660094261169434,
        -0.28900790214538574,
        0.03401527553796768,
        -0.16660094261169434
      ],
      [
        0.28900790214538574,
        -0.03401527553796768,
        0.17652182281017303,
        -0.28900790214538574,
        0.03401527553796768,
        -0.17652182281017303
      ],
      [
        0.39697834849357605,
        -0.03401527553796768,
        0.22029921412467957,
        -0.39697834849357605,
        0.03401527553796768,
        -0.22029921412467957
      ],
      [
        0.39697834849357605,
        -0.03401527553796768,
        0.3080121576786041,
        -0.39697834849357605,
        0.03401527553796768,
        -0.3080121576786041
      ],
      [
        0.400627464056015,
        -0.03401527553796768,
        0.3626677691936493,
        -0.400627464056015,
        0.03401527553796768,
        -0.3626677691936493
      ],
      [
        0.40157681703567505,
        -0.03401527553796768,
        0.40157681703567505,
        -0.40157681703567505,
        0.03401527553796768,
        -0.40157681703567505
      ]
    ]
  ],
  "last": 21279.8753
}

In [None]:
X1 = np.array(data["X1"], dtype=np.float32)
X2 = np.array(data["X2"], dtype=np.float32)

        # Reshape and stack inputs
num_rows, dim1, dim2 = X2.shape
X2 = X2.reshape(num_rows, dim1 * dim2)
X111 = np.hstack((X1, X2))
X111.reshape(-1)

In [None]:
X_val[len(X_val)-1]

In [None]:
# Extract best parameters
best_params = bo.max['params']

# Create `hidden_units` and `dropout_rates` lists dynamically
num_hidden_layers = int(best_params['num_hidden_layers'])
hidden_units = [
    int(best_params[f"hidden_units_{i+1}"]) for i in range(num_hidden_layers)
]
dropout_rates = [
    best_params[f"dropout_rate_{i+1}"] for i in range(num_hidden_layers)
]

# Save the full structure, including metadata
model_structure = {
    "input_size": num_features,
    "num_classes": num_classes,
    "num_hidden_layers": num_hidden_layers,
    "hidden_units": hidden_units,
    "dropout_rates": dropout_rates,
    "lr": best_params["lr"],
    "weight_decay": best_params["weight_decay"]
}

with open("model_structure.json", "w") as f:
    json.dump(model_structure, f, indent=4)

print("Model structure saved to model_structure.json")

In [None]:
def count_trainable_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable parameters: {count_trainable_parameters(model)}")


In [None]:
model_scripted = torch.jit.script(model) # Export to TorchScript
model_scripted.save('best_model_weights.pt') # Save

In [None]:
import pandas as pd

def get_hourly_data_with_price_development(intradayndx_agg, input_date):
    # Filter the DataFrame for the given date
    df_intra_day = intradayndx_agg[intradayndx_agg["Date"] == input_date]

    # Initialize dictionary to store hourly data and price developments
    hourly_data = {}

    # Define trading hours from 9:30 to 15:30 (removing the last half-hour)
    trading_hours = [(9, 30), (10, 30), (11, 30), (12, 30), (13, 30), (14, 30), (15, 30)]

    # Check if the day's last timestamp is at least 15:30 or later
    if not df_intra_day.empty and df_intra_day["datetime"].max().time() >= pd.to_datetime("15:30").time():
        # Iterate through each time period
        for i in range(len(trading_hours) - 1):
            start_hour, start_min = trading_hours[i]
            end_hour, end_min = trading_hours[i + 1]

            # Create key for this hour period
            period_key = f"{start_hour}:{start_min:02d}-{end_hour}:{end_min:02d}"

            # Filter data for this hour
            mask = (df_intra_day["datetime"].dt.hour == start_hour) & \
                   (df_intra_day["datetime"].dt.minute >= start_min) & \
                   ((df_intra_day["datetime"].dt.hour < end_hour) | \
                    ((df_intra_day["datetime"].dt.hour == end_hour) & \
                     (df_intra_day["datetime"].dt.minute < end_min)))
            df_hour = df_intra_day[mask]

            # If there's no data for this hour, skip
            if df_hour.empty:
                continue

            # Extract open price of the first candle of the hour
            open_day = df_hour.iloc[0]["Open"]

            # Define knockout levels
            knockout_long = 0.96 * open_day
            knockout_short = 1.04 * open_day

            # Calculate initial open price for both long and short options
            open_price_long = pricelong(open_day, knockout_long)
            open_price_short = priceshort(open_day, knockout_short)

            # Initialize list to store price development for this hour
            price_development_list = []

            # Iterate through each row (candle) in the hour data
            for _, row in df_hour.iterrows():
                # Calculate high, low, close for both long and short options
                high_long = pricelong(row["High"], knockout_long)
                low_long = pricelong(row["Low"], knockout_long)
                close_long = pricelong(row["Close"], knockout_long)

                high_short = priceshort(row["High"], knockout_short)
                low_short = priceshort(row["Low"], knockout_short)
                close_short = priceshort(row["Close"], knockout_short)

                # Calculate percentage changes relative to open price
                perc_high_long = (high_long - open_price_long) / open_price_long
                perc_low_long = (low_long - open_price_long) / open_price_long
                perc_close_long = (close_long - open_price_long) / open_price_long

                perc_high_short = (high_short - open_price_short) / open_price_short
                perc_low_short = (low_short - open_price_short) / open_price_short
                perc_close_short = (close_short - open_price_short) / open_price_short

                # Append the calculated percentage changes to the list
                price_development_list.append([
                    perc_high_long, perc_low_long, perc_close_long,
                    perc_high_short, perc_low_short, perc_close_short
                ])

            # Store both the hourly data and price developments in the dictionary
            hourly_data[period_key] = {
                "data": df_hour,
                "price_development": price_development_list
            }

    # Return the hourly data dictionary with price developments
    return hourly_data

# Example usage
input_date = "2024-11-08"
hourly_data_result = get_hourly_data_with_price_development(intradayndx_agg, input_date)


In [None]:
hourly_data_result