In [None]:
import pandas as pd
import numpy as np
import requests
from pandas.tseries.offsets import BDay
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import linregress

In [None]:
# Define the API key and base URL
api_key = 'beBybSi8daPgsTp5yx5cHtHpYcrjp5Jq'

# Define the currency pairs and years
currency_pairs = ["USDEUR", "USDCAD", "USDCHF", "USDGBP", "USDAUD"]
years = range(2022, 2024)
num_of_outliers_per_year = 15

In [None]:
def calculate_daily_return(df):
    # Use pct_change() to calculate the percentage change in 'c' (close prices)
    df['daily_return'] = df['c'].pct_change()
    df['abs_daily_return'] = df['daily_return'].abs()
    return df

def get_top_outliers(df, n=num_of_outliers_per_year):
    return df.nlargest(n, 'abs_daily_return')

In [None]:
# Initialize DataFrames
full_data = pd.DataFrame()
outliers_data = pd.DataFrame()

# Loop over each currency pair and year
for pair in currency_pairs:
    for year in years:
        # Format the API endpoint
        start_date = f'{year}-01-01'
        end_date = f'{year}-12-31'
        url = f"https://api.polygon.io/v2/aggs/ticker/C:{pair}/range/1/day/{start_date}/{end_date}?adjusted=true&sort=asc&limit=50000&apiKey={api_key}"
        
        # Make the API request
        response = requests.get(url)
        data = response.json()
        
        # Check if the request was successful
        if response.status_code == 200 and 'results' in data:
            # Load data into a DataFrame
            df = pd.DataFrame(data['results'])
            # Convert timestamps
            df['date'] = pd.to_datetime(df['t'], unit='ms')
            df.drop(columns=['t'], inplace=True)

            df = df[df['date'].dt.weekday < 5]  # Only include weekdays
            # Calculating returns
            df = calculate_daily_return(df)

            # Append the data to the full_data DataFrame for the current currency pair
            df['year'] = year
            df['day'] = df['date'].dt.day_name()
            df['pair'] = pair  # Add the currency pair identifier

            # Find the top 10 outliers based on absolute values of the daily return value
            top_outliers = get_top_outliers(df, num_of_outliers_per_year)
            outlier_dates = top_outliers['date']

            # Create a new column 'is_outlier' in the full_data DataFrame
            df['is_outlier'] = df['date'].isin(outlier_dates).astype(int)
            full_data = pd.concat([full_data, df], ignore_index=True)
            
            # Append outliers to the outliers_data DataFrame for the current currency pair
            top_outliers['year'] = year
            outliers_data = pd.concat([outliers_data, top_outliers], ignore_index=True)
        
sorted_full_data = full_data.sort_values(by="date")
sorted_outliers_data = outliers_data.sort_values(by="date")

In [None]:
# Convert dates in dataset to datetime objects
sorted_outliers_data['date'] = pd.to_datetime(sorted_outliers_data['date'])

date_ranges = pd.DataFrame({
    "start_date": sorted_outliers_data['date'] - BDay(14), # To predict X days, keep this as X-1 (as 1 day of outlier will be considered in LSTM input)
    "end_date": sorted_outliers_data['date'] + BDay(15),
    "outlier_date": sorted_outliers_data['date'],
    "outlier_price": sorted_outliers_data['c'],
    "daily_return": sorted_outliers_data['daily_return'],
    "currency_pair": sorted_outliers_data['pair']
})

date_ranges.reset_index(drop=True, inplace=True)
date_ranges.sort_values(by="outlier_date")
date_ranges.drop_duplicates(subset='outlier_date', keep='first', inplace=True)

date_ranges

In [None]:
def fetch_daily_data(pair, start_date, end_date, api_key):
    formatted_start_date = start_date.strftime('%Y-%m-%d')
    formatted_end_date = end_date.strftime('%Y-%m-%d')

    url = f"https://api.polygon.io/v2/aggs/ticker/C:{pair}/range/1/day/{formatted_start_date}/{formatted_end_date}?adjusted=true&sort=asc&apiKey={api_key}"
    response = requests.get(url)

    if response.status_code != 200:
        print(f"Failed to fetch data: {response.status_code} - {response.text}")
        return None

    response_data = response.json()

    if 'results' not in response_data:
        print(f"No 'results' in response: {response_data}")
        return None

    df = pd.DataFrame(response_data['results'])
    df['date'] = pd.to_datetime(df['t'], unit='ms')
    df.drop(columns=['t'], inplace=True)

    daily_data = calculate_daily_return(df)
    daily_data['currency_pair'] = pair
    daily_data.set_index('date', inplace=True)

    return daily_data

def fetch_and_process_daily_data(pair, start_date, end_date, api_key):
    daily_data = fetch_daily_data(pair, start_date, end_date, api_key)

    if daily_data is None:
        print("No data fetched")
        return None

    daily_data.reset_index(inplace=True)
    return daily_data

In [None]:
def process_data_and_train_model(daily_data):
    """Processes daily data, trains an LSTM model, and returns the model and scaler."""
    # Filter out weekends
    daily_data = daily_data[~daily_data['date'].dt.weekday.isin([5, 6])]

    # Sort data by date
    daily_data.sort_values(by='date', ascending=True, inplace=True)

    # Fill missing values
    daily_data.fillna(method='bfill', inplace=True)
    daily_data.fillna(method='ffill', inplace=True)

    # Split the dataset into train and test sets
    train_set = daily_data.iloc[:15].reset_index(drop=True)
    test_set = daily_data.iloc[15:].reset_index(drop=True)

    # Normalize the data using only the training data
    scaler = MinMaxScaler()
    train_scaled = scaler.fit_transform(train_set[["c"]])

    # Prepare data for LSTM model
    sequence_length = 12
    train_generator = TimeseriesGenerator(train_scaled, train_scaled, length=sequence_length, batch_size=1)

    # Define and compile LSTM model
    model = Sequential([
        LSTM(64, activation='relu', input_shape=(sequence_length, 1), kernel_initializer='orthogonal'),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.01), loss='mean_squared_error')

    # Fit the model
    model.fit(train_generator, epochs=100, verbose=0)

    # Prepare the last sequence for forecasting
    last_sequence = train_scaled[-sequence_length:]

    # Forecast the next steps
    forecast_steps = len(test_set)
    predictions_scaled = []
    for _ in range(forecast_steps):
        last_sequence_reshaped = last_sequence.reshape((1, sequence_length, 1))
        next_step_pred = model.predict(last_sequence_reshaped, verbose=0)
        predictions_scaled.append(next_step_pred.ravel()[0])
        last_sequence = np.roll(last_sequence, -1)
        last_sequence[-1] = next_step_pred

    # Inverse transform predictions
    predictions_inv = scaler.inverse_transform(np.array(predictions_scaled).reshape(-1, 1))

    return predictions_inv, forecast_steps, test_set

In [None]:
def analyze_market_trends(row, actuals, predictions_inv, test_set, daily_data):
    """Analyzes market trends and makes trade decisions based on the LSTM model's predictions and actual market data,
       calculating the daily return dynamically."""
    # Calculate daily return for the outlier date
    historical_data = daily_data.loc[daily_data['date'] <= row['outlier_date']]
    historical_data['daily_return'] = historical_data['c'].pct_change()
    daily_return = historical_data.loc[historical_data['date'] == row['outlier_date'], 'daily_return'].item()

    # Calculate actual market trend
    actual_days = np.arange(1, len(actuals) + 1)
    actual_slope, _, _, _, _ = linregress(actual_days, actuals)
    if daily_return > 0:  # Positive outlier
        actual_market_trend = "Momentum Continuation" if actual_slope > 0 else "Mean Reversion"
    else:  # Negative outlier
        actual_market_trend = "Momentum Continuation" if actual_slope < 0 else "Mean Reversion"

    # Determine the type of outlier
    is_positive_outlier = daily_return > 0
    outlier_type = "Positive" if is_positive_outlier else "Negative"

    # Create an array for time (days 1 to len(predictions_inv))
    days = np.arange(1, len(predictions_inv) + 1)
    slope, _, _, _, _ = linregress(days, predictions_inv.ravel())

    # Trade logic
    trade_initiated = False
    entry_price = None
    exit_price = None
    entry_date = None
    exit_date = None
    profit = 0
    days_held = 0
    position_type = None

    if slope > 0:
        market_trend = "Momentum Continuation" if outlier_type == 'Positive' else "Mean Reversion"
        exit_idx = np.argmax(predictions_inv)
        entry_idx = np.argmin(predictions_inv[:exit_idx + 1])
        trade_type = "Long"
    else:
        market_trend = "Mean Reversion" if outlier_type == 'Positive' else "Momentum Continuation"
        entry_idx = np.argmax(predictions_inv)
        exit_idx = np.argmin(predictions_inv[entry_idx:]) + entry_idx
        trade_type = "Short"

    if trade_type == "Long":
        entry_price = predictions_inv[entry_idx]
        exit_price = predictions_inv[exit_idx]
        entry_date = test_set.iloc[entry_idx]['date']
        exit_date = test_set.iloc[exit_idx]['date']
        profit = (exit_price - entry_price) / entry_price * 100
        days_held = exit_idx - entry_idx
        position_type = "Long"
        trade_initiated = True
    else:
        entry_price = predictions_inv[entry_idx]
        exit_price = predictions_inv[exit_idx]
        entry_date = test_set.iloc[entry_idx]['date']
        exit_date = test_set.iloc[exit_idx]['date']
        profit = (entry_price - exit_price) / entry_price * 100
        days_held = exit_idx - entry_idx
        position_type = "Short"
        trade_initiated = True

    # Compile results
    trade_results = {
        'Type': outlier_type,
        'Actual Market Trend': actual_market_trend,
        'Predicted Market Trend': market_trend,
        'Position Type': position_type,
        'Entry Date': entry_date,
        'Entry Price': entry_price,
        'Exit Date': exit_date,
        'Exit Price': exit_price,
        'Trading Days': days_held,
        'Profit': profit,
        'Trade Initiated': trade_initiated
    }

    return trade_results

In [None]:
def get_values(daily_data, row):
	predictions_inv, forecast_steps, test_set = process_data_and_train_model(daily_data)

	# Actual values for comparison
	actuals = test_set["c"].values[:forecast_steps]
	
	# Calculate MAPE (Mean Absolute Percentage Error)
	mse = mean_squared_error(actuals, predictions_inv)
	rmse = np.sqrt(mse)
	mape = mean_absolute_percentage_error(actuals, predictions_inv)
	accuracy = np.round(100 - (mape * 100), 2)

	results = analyze_market_trends(row, actuals, predictions_inv, test_set, daily_data)
	results["model_RMSE"] = rmse
	results["model_accuracy"] = accuracy
	return results

In [None]:
# Create a list to store the final results
final_results_dict = {'Date': [], 
	'Pair': [], 'Price': [], 'Type': [], 'Actual Market Trend': [], 'Predicted Market Trend': [], 'Position Type': [], 'Entry Date': [], 
	'Entry Price': [], 'Exit Date': [], 'Exit Price': [], 'Trading Days': [], 'Profit': [], 'model_RMSE': [], 'model_accuracy': [],
	
	'Counter Pair': [], 'Counter Price': [], 'Counter Type': [], 'Counter Actual Market Trend': [], 'Counter Predicted Market Trend': [],
	'Counter Position Type': [], 'Counter Entry Date': [], 'Counter Entry Price': [], 'Counter Exit Date': [], 'Counter Exit Price': [],
	'Counter Trading Days': [], 'Counter Profit': [], 'Counter model_RMSE': [], 'Counter model_accuracy': [] 
}

# List of currency pairs to ensure that only unique pairs are processed
unique_pairs = ["USDEUR", "USDCAD", "USDCHF", "USDGBP", "USDAUD", "USDNZD", "USDSGD", 
                "USDPLN", "USDILS", "USDBRL", "USDSEK", "USDNOK", "USDMXN", "USDCZK"]

# Iterate through each record in the outlier data
for idx, row in date_ranges.iterrows():
    reference_pair = row['currency_pair']
    start_date = pd.Timestamp(row['start_date'])
    end_date = pd.Timestamp(row['end_date']) + pd.Timedelta(days=1)
    outlier_date = pd.Timestamp(row['outlier_date'])

    # Retrieve and process daily data for the specified date range
    daily_data = fetch_and_process_daily_data(reference_pair, start_date, end_date, api_key)

    if daily_data is None:
        print(f"No data fetched for outlier_id: {idx + 1}")
        continue  # Skip to the next iteration if no data is available

    results = get_values(daily_data, row)
    
    # Store results from the fetched data
    final_results_dict['Date'].append(outlier_date)
    final_results_dict['Pair'].append(reference_pair)
    final_results_dict['Price'].append(row['outlier_price'])

    # Exclude 'Trade Initiated' from results
    for key in results:
        if key == 'Trade Initiated':
            continue
        final_results_dict[key].append(results[key])

    # Additional processing if a trade was initiated
    if results['Trade Initiated']:
        possible_pairs = []
        opposite_position = "long" if results["Position Type"] == "short" else "short"

        for pair in unique_pairs:
            if pair != reference_pair:  # Exclude the reference pair
                daily_data_pair = fetch_and_process_daily_data(pair, start_date, end_date, api_key)
                if daily_data_pair is None:
                    continue

                results_pair = get_values(daily_data_pair, row)
                possible_pairs.append((pair, results_pair))

        # Find pairs that meet specific conditions
        final_pairs = []
        for pair, results_pair in possible_pairs:
            if results_pair["Position Type"] == opposite_position and results_pair["RMSE"] < 0.05:
                final_pairs.append((pair, results_pair))

        if len(final_pairs) == 0:
            final_results_dict['Counter Pair'].append(None)
            final_results_dict['Counter Price'].append(None)
            for key in results:
                if key == 'Trade Initiated':
                    continue
                final_results_dict["Counter "+key].append(None)
        else:
            max_pair, max_results = max(final_pairs, key=lambda a: a[1]["Profit"])
            for key in max_results:
                if key == 'Trade Initiated':
                    continue
                final_results_dict["Counter "+key].append(max_results[key])

# Convert the dictionary of results to a DataFrame for easier analysis
final_results_df = pd.DataFrame(final_results_dict)

In [None]:
final_results_df

In [None]:
filtered_df = final_results_df[(final_results_df['model_RMSE'] < 0.05)]
filtered_df.reset_index(drop=True, inplace=True)

filtered_df

In [None]:
filtered_df['Final Profit'] = filtered_df['Profit'] + filtered_df['Counter Profit']
min_profit = round(filtered_df['Final Profit'].min(), 2)
max_profit = round(filtered_df['Final Profit'].max(), 2)

print('Minimum Profit:', min_profit, '%')
print('Maximum Profit:', max_profit, '%')