In [2]:
import requests
import pandas as pd
from sqlalchemy import create_engine
from datetime import datetime, timedelta
from polygon import RESTClient
from sqlalchemy import inspect
import numpy as np


In [37]:
EURPLN = pd.read_csv("EURPLN.csv", parse_dates=['datetime'])
GBPCHF = pd.read_csv("GBPCHF.csv", parse_dates=['datetime'])
HKDJPY = pd.read_csv("HKDJPY.csv", parse_dates=['datetime'])
NZDUSD = pd.read_csv("NZDUSD.csv", parse_dates=['datetime'])
SGDJPY = pd.read_csv("SGDJPY.csv", parse_dates=['datetime'])
USDBRL = pd.read_csv("USDBRL.csv", parse_dates=['datetime'])
USDDKK = pd.read_csv("USDDKK.csv", parse_dates=['datetime'])
USDHKD = pd.read_csv("USDHKD.csv", parse_dates=['datetime'])
USDMXN = pd.read_csv("USDMXN.csv", parse_dates=['datetime'])
USDTRY = pd.read_csv("USDTRY.csv", parse_dates=['datetime'])


In [43]:
def resample_data(df, start_date, end_date):
    # Filter data by date range
    data = df[(df['datetime'] >= start_date) & (df['datetime'] < end_date)]

    data['price_times_transactions'] = data['vw'] * data['n']

    # Group the data into the specified frequency intervals
    grouped_data = data.groupby(pd.Grouper(key='datetime', freq='6H'))

    # Calculate VWAP and Liquidity for each group
    grouped_vwap = grouped_data['price_times_transactions'].sum() / grouped_data['n'].sum()
    grouped_liquidity = grouped_data['n'].sum() / 6


    grouped_vwap = grouped_vwap.fillna(0)
    max_value = grouped_data['h'].max().fillna(0)
    min_value = grouped_data['l'].min().fillna(0)

    # Combine the results into a new DataFrame
    resampled_df = pd.DataFrame({
        'timestamp': grouped_vwap.index,
        'vwap': grouped_vwap.values,
        'liquidity': grouped_liquidity.values,
        'max': max_value,
        'min': min_value
    })

    resampled_df['volatility'] = (resampled_df['max'] - resampled_df['min']) / resampled_df['vwap']

    return resampled_df


In [48]:
start_date = "2022-01-01"
end_date = "2023-01-01"
EURPLN = resample_data(EURPLN, start_date, end_date)
GBPCHF = resample_data(GBPCHF, start_date, end_date)
HKDJPY = resample_data(HKDJPY, start_date, end_date)
NZDUSD = resample_data(NZDUSD, start_date, end_date)
SGDJPY = resample_data(SGDJPY, start_date, end_date)
USDBRL = resample_data(USDBRL, start_date, end_date)
USDDKK = resample_data(USDDKK, start_date, end_date)
USDHKD = resample_data(USDHKD, start_date, end_date)
USDMXN = resample_data(USDMXN, start_date, end_date)
USDTRY = resample_data(USDTRY, start_date, end_date)

In [50]:
def calculate_fd_values(df):
    fd_values = [0]
    for idx in range(1, len(df)):
        max_rate = df.iloc[idx - 1]['max']
        min_rate = df.iloc[idx - 1]['min']
        vol = df.iloc[idx - 1]['volatility']
        mean_rate = df.iloc[idx - 1]['vwap']

        keltner_upper = [mean_rate + n * 0.025 * vol for n in range(1, 101)]
        keltner_lower = [mean_rate - n * 0.025 * vol for n in range(1, 101)]

        prices = df.iloc[idx - 1:idx + 1]['vwap'].values

        crosses = calculate_fd(prices, keltner_upper, keltner_lower)

        fd = 0
        if (max_rate - min_rate) != 0:
            fd = crosses / (max_rate - min_rate)
        elif crosses == 0:
            fd = 0
        else:
            fd = 0

        fd_values.append(fd)

    df['fd'] = fd_values
    return df


def calculate_fd(prices, keltner_upper, keltner_lower):
    crosses = 0
    for i in range(1, len(prices)):
        for upper, lower in zip(keltner_upper, keltner_lower):
            if (prices[i - 1] < upper and prices[i] > upper) or (prices[i - 1] > lower and prices[i] < lower):
                crosses += 1
    return crosses

In [51]:
EURPLN = calculate_fd_values(EURPLN)
GBPCHF = calculate_fd_values(GBPCHF)
HKDJPY = calculate_fd_values(HKDJPY)
NZDUSD = calculate_fd_values(NZDUSD)
SGDJPY = calculate_fd_values(SGDJPY)
USDBRL = calculate_fd_values(USDBRL)
USDDKK = calculate_fd_values(USDDKK)
USDHKD = calculate_fd_values(USDHKD)
USDMXN = calculate_fd_values(USDMXN)
USDTRY = calculate_fd_values(USDTRY)

In [13]:
from pycaret.regression import *
from sklearn.model_selection import train_test_split


In [17]:


X = resampled_df[['vwap', 'liquidity', 'max', 'min', 'volatility']]
y = resampled_df['fd']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [22]:
from pycaret.regression import *

# Initialize the regression module
reg = setup(data= , target='fd')

# Compare different regression models
compare_models()

# Train a model
model = create_model('lr')

# Evaluate the model on the test set
predict_model(model, data=X_test)


IndexError: tuple index out of range