In [1]:
import pandas as pd
import numpy as np
import requests
from pandas.tseries.offsets import BDay
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.optimizers import Adam
from tensorflow.keras.initializers import Orthogonal
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import linregress

In [2]:
def calculate_daily_return(df):
    # Use pct_change() to calculate the percentage change in 'c' (close prices)
    df['daily_return'] = df['c'].pct_change()
    df['abs_daily_return'] = df['daily_return'].abs()
    return df

def get_top_outliers(df, n=10):
    return df.nlargest(n, 'abs_daily_return')

# Define the API key and base URL
api_key = 'beBybSi8daPgsTp5yx5cHtHpYcrjp5Jq'

# Define the currency pairs and years
pair = "C:USDEUR"
years = range(2022, 2024)

# Initialize DataFrames
full_data = pd.DataFrame()
outliers_data = pd.DataFrame()

# Loop over each year
for year in years:
    # Format the API endpoint
    start_date = f'{year}-01-01'
    end_date = f'{year}-12-31'
    url = f"https://api.polygon.io/v2/aggs/ticker/{pair}/range/1/day/{start_date}/{end_date}?adjusted=true&sort=asc&limit=50000&apiKey={api_key}"
    
    # Make the API request
    response = requests.get(url)
    data = response.json()
    
    # Check if the request was successful
    if response.status_code == 200 and 'results' in data:
        # Load data into a DataFrame
        df = pd.DataFrame(data['results'])
        # Convert timestamps
        df['date'] = pd.to_datetime(df['t'], unit='ms')
        df.drop(columns=['t'], inplace=True)

        df = df[df['date'].dt.weekday < 5]
        # Calculating returns
        df = calculate_daily_return(df)

        # Append the data to the full_data DataFrame for the current currency pair
        df['year'] = year
        df['day'] = df['date'].dt.day_name()
        # Find the top 10 outliers based on absolute values of the daily return value
        # df['abs_daily_return'] = df['c'].abs()
        top_outliers = df.nlargest(10, 'abs_daily_return')
        outlier_dates = top_outliers['date']

        # Create a new column 'is_outlier' in the full_data DataFrame
        df['is_outlier'] = df['date'].isin(outlier_dates).astype(int)
        full_data = pd.concat([full_data, df], ignore_index=True)
        
        # Append outliers to the outliers_data DataFrame for the current currency pair
        top_outliers['year'] = year
        outliers_data = pd.concat([outliers_data, top_outliers], ignore_index=True)
        

sorted_full_data = full_data.sort_values(by="date")
sorted_outliers_data = outliers_data.sort_values(by="date")

In [3]:
sorted_full_data

Unnamed: 0,v,vw,o,c,h,l,n,date,daily_return,abs_daily_return,year,day,is_outlier
0,57901,0.8828,0.87925,0.88478,0.88651,0.879020,57901,2022-01-03,,,2022,Monday,0
1,62051,0.8854,0.88460,0.88624,0.88709,0.883000,62051,2022-01-04,0.001650,0.001650,2022,Tuesday,0
2,64819,0.8838,0.88627,0.88390,0.88673,0.881100,64819,2022-01-05,-0.002640,0.002640,2022,Wednesday,0
3,68551,0.8844,0.88409,0.88520,0.88613,0.882300,68551,2022-01-06,0.001471,0.001471,2022,Thursday,0
4,45968,0.8833,0.88531,0.88000,0.88571,0.879662,45968,2022-01-07,-0.005874,0.005874,2022,Friday,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
515,6522,0.9078,0.90750,0.90747,0.90940,0.901600,6522,2023-12-25,0.000629,0.000629,2023,Monday,0
516,51232,0.9068,0.90747,0.90550,0.90833,0.905100,51232,2023-12-26,-0.002171,0.002171,2023,Tuesday,0
517,77389,0.9029,0.90557,0.90018,0.90670,0.898796,77389,2023-12-27,-0.005875,0.005875,2023,Wednesday,0
518,83788,0.9008,0.90018,0.90348,0.90453,0.897500,83788,2023-12-28,0.003666,0.003666,2023,Thursday,0


In [4]:
# Convert dates in dataset to datetime objects
sorted_outliers_data['date'] = pd.to_datetime(sorted_outliers_data['date'])

date_ranges = pd.DataFrame({
    "start_date": sorted_outliers_data['date'] - BDay(14), # To predict X days, keep this as X-1 (as 1 day of outlier will be considered in LSTM input)
    "end_date": sorted_outliers_data['date'] + BDay(15),
    "outlier_date": sorted_outliers_data['date'],
    "outlier_price": sorted_outliers_data['c'],
    "daily_return": sorted_outliers_data['daily_return']
})

date_ranges.reset_index(drop=True, inplace=True)

date_ranges

Unnamed: 0,start_date,end_date,outlier_date,outlier_price,daily_return
0,2022-02-17,2022-03-30,2022-03-09,0.90302,-0.015868
1,2022-06-15,2022-07-26,2022-07-05,0.97493,0.017163
2,2022-06-21,2022-08-01,2022-07-11,0.99537,0.014235
3,2022-08-24,2022-10-04,2022-09-13,1.00213,0.015134
4,2022-09-01,2022-10-12,2022-09-21,1.01711,0.014371
5,2022-09-05,2022-10-14,2022-09-23,1.0319,0.01529
6,2022-09-14,2022-10-25,2022-10-04,1.0015,-0.01495
7,2022-10-17,2022-11-25,2022-11-04,1.0015,-0.023575
8,2022-10-21,2022-12-01,2022-11-10,0.98115,-0.016913
9,2022-10-24,2022-12-02,2022-11-11,0.964,-0.017479


In [5]:
def fetch_daily_data(pair, start_date, end_date, api_key):
    formatted_start_date = start_date.strftime('%Y-%m-%d')
    formatted_end_date = end_date.strftime('%Y-%m-%d')

    url = f"https://api.polygon.io/v2/aggs/ticker/{pair}/range/1/day/{formatted_start_date}/{formatted_end_date}?adjusted=true&sort=asc&apiKey={api_key}"
    response = requests.get(url)

    if response.status_code != 200:
        print(f"Failed to fetch data: {response.status_code} - {response.text}")
        return None

    response_data = response.json()

    if 'results' not in response_data:
        print(f"No 'results' in response: {response_data}")
        return None

    df = pd.DataFrame(response_data['results'])
    df['date'] = pd.to_datetime(df['t'], unit='ms')
    df.drop(columns=['t'], inplace=True)

    daily_data = calculate_daily_return(df)
    daily_data.set_index('date', inplace=True)

    return daily_data

def fetch_and_process_daily_data(pair, start_date, end_date, api_key):
    daily_data = fetch_daily_data(pair, start_date, end_date, api_key)

    if daily_data is None:
        print("No data fetched")
        return None

    daily_data.reset_index(inplace=True)
    return daily_data

### Trading Strategy Logic

This script predicts next 12 days when an outlier is detected. It then identifies trading opportunities based on outlier events, using momentum continuation and mean reversion strategies. It calculates profit and tracks key trading details such as entry and exit prices, dates, and trade duration.

##### Steps:

1. **Identify Outlier Type**:  
   - Positive or Negative.
2. **Check the Predicted Trend Based on Linear Regression**:  
   - Perform linear regression on the predicted prices to detect momentum continuation or mean reversion. 
	- For a positive outlier, if the slope > 0, it indicates momentum continuation; if slope < 0, it suggests mean reversion.
	- For a negative outlier, slope < 0 confirms momentum continuation, while slope > 0 indicates mean reversion.
	- If the slope is zero, no significant trend is detected.
3. **Implement Trading Strategy**:  
   - For a positive outlier:
     - **Momentum Continuation**: If the trend is continuing upward, we buy (go long with a position size x) at the lowest price before it peaks, and then sell (the same position size) at the highest price within the next 12 days.
     - **Mean Reversion**: If the market recovers from the hike and comes back to the reference point (i.e. close price a day before outlier day), we go short so that we gain some profit.
   - For a negative outlier:
     - **Momentum Continuation**: If the trend is continuing downward, we sell (go short with a position size x) at the highest price before it drops, and then buy (the same position size) at the lowest price within the next 12 days.
     - **Mean Reversion**: If the market recovers from the drop and comes back to the reference point (i.e. close price a day before outlier day), we go long so that we gain some profit.
4. **Profit Calculation**:  
   - Formula for Long Position:  $\text{Profit (\%)} = \frac{\text{Exit Price} - \text{Entry Price}}{\text{Entry Price}} \times 100$

		 
   - Formula for Short Position:  $\text{Profit (\%)} = \frac{\text{Entry Price} - \text{Exit Price}}{\text{Entry Price}} \times 100$

#### #Make sure to Trade ONLY IF: Profit > 0 and Model RMSE < 0.05



In [6]:
# Create an empty list to store the model performance metrics for each outlier
trade_results_list = []

# Loop through each outlier in date_ranges
for idx, row in date_ranges.iterrows():
    start_date_co = pd.Timestamp(row['start_date'])
    end_date_co = pd.Timestamp(row['end_date']) + pd.Timedelta(days=1)
    outlier_date_co = pd.Timestamp(row['outlier_date'])

    # Fetch and process daily data for the current range
    daily_data = fetch_and_process_daily_data(pair, start_date_co, end_date_co, api_key)

    if daily_data is None:
        print(f"No data fetched for outlier_id: {idx + 1}")
        continue  # Skip to the next iteration if no data

    # Filter out weekends
    daily_data = daily_data[~daily_data['date'].dt.weekday.isin([5, 6])]

    # Sort data by date
    daily_data = daily_data.sort_values(by='date', ascending=True)

    # Fill missing values
    daily_data.fillna(method='bfill', inplace=True)
    daily_data.fillna(method='ffill', inplace=True)

    # Split the dataset into train and test sets
    train_set = daily_data.iloc[:15].reset_index(drop=True)
    test_set = daily_data.iloc[15:].reset_index(drop=True)

    # Normalize the data using only the training data
    scaler = MinMaxScaler()
    train_scaled = scaler.fit_transform(train_set[["c"]])

    # Prepare data for LSTM model
    sequence_length = 12
    train_generator = TimeseriesGenerator(train_scaled, train_scaled, length=sequence_length, batch_size=1)

    # Define and compile LSTM model
    model = Sequential([
        LSTM(64, activation='relu', input_shape=(sequence_length, 1), kernel_initializer=Orthogonal()),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.01), loss='mean_squared_error')

    # Fit the model
    model.fit(train_generator, epochs=100, verbose=0)

    # Prepare the last sequence for forecasting
    last_sequence = train_scaled[-sequence_length:]

    # Forecast the next steps
    forecast_steps = len(test_set)
    predictions_scaled = []
    for _ in range(forecast_steps):
        last_sequence_reshaped = last_sequence.reshape((1, sequence_length, 1))
        next_step_pred = model.predict(last_sequence_reshaped, verbose=0)
        predictions_scaled.append(next_step_pred.ravel()[0])
        last_sequence = np.roll(last_sequence, -1)
        last_sequence[-1] = next_step_pred

    # Inverse transform predictions
    predictions_inv = scaler.inverse_transform(np.array(predictions_scaled).reshape(-1, 1))

    # Actual values for comparison
    actuals = test_set["c"].values[:forecast_steps]
    
    # Calculate MAPE (Mean Absolute Percentage Error)
    mse = mean_squared_error(actuals, predictions_inv)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(actuals, predictions_inv)
    accuracy = np.round(100 - (mape * 100), 2)
    
	# Append these lines right after fetching the 'actuals' values
    actual_days = np.arange(1, len(actuals) + 1)
    actual_slope, _, _, _, _ = linregress(actual_days, actuals)

	# Determine the actual market trend based on the slope and outlier type
    if row['daily_return'] > 0:  # Positive outlier
        actual_market_trend = "Momentum Continuation" if actual_slope > 0 else "Mean Reversion"
    else:  # Negative outlier
        actual_market_trend = "Momentum Continuation" if actual_slope < 0 else "Mean Reversion"

    # Determine if the outlier is positive or negative
    outlier_return = row['daily_return']
    is_positive_outlier = outlier_return > 0  # Check if the outlier is positive based on it's daily return value
    outlier_type = "Positive" if is_positive_outlier else "Negative"  # Define outlier type

    # Create an array for time (days 1 to 12)
    days = np.arange(1, len(predictions_inv) + 1)

    # Perform linear regression to identify the trend
    slope, intercept, r_value, p_value, std_err = linregress(days, predictions_inv.ravel())

    market_trend = ""
    trade_initiated = False
    entry_price = None
    exit_price = None
    entry_date = None
    exit_date = None
    profit = 0
    days_held = 0
    position_type = None

    if outlier_type == "Positive":
        if slope > 0:
            market_trend = "Momentum Continuation"
            # Go long: find buy low, sell high
            exit_idx = np.argmax(predictions_inv)
            entry_idx = np.argmin(predictions_inv[:exit_idx + 1])

            exit_price = predictions_inv[exit_idx][0]
            entry_price = predictions_inv[entry_idx][0]
            entry_date = test_set.iloc[entry_idx]['date']
            exit_date = test_set.iloc[exit_idx]['date']

            # Calculate profit
            profit = (exit_price - entry_price) * 100 / entry_price
            days_held = (exit_idx - entry_idx)
            position_type = "Long"
            trade_initiated = True
        else:
            market_trend = "Mean Reversion"
            # Go short: find sell high, buy low
            entry_idx = np.argmax(predictions_inv)
            exit_idx = np.argmin(predictions_inv[entry_idx:]) + entry_idx

            entry_price = predictions_inv[entry_idx][0]
            exit_price = predictions_inv[exit_idx][0]
            entry_date = test_set.iloc[entry_idx]['date']
            exit_date = test_set.iloc[exit_idx]['date']

            # Calculate profit
            profit = (entry_price - exit_price) * 100 / entry_price
            days_held = (exit_idx - entry_idx)
            position_type = "Short"
            trade_initiated = True

    else:  # Negative outlier
        if slope < 0:
            market_trend = "Momentum Continuation"
            # Go short: find sell high, buy low
            entry_idx = np.argmax(predictions_inv)
            exit_idx = np.argmin(predictions_inv[entry_idx:]) + entry_idx

            entry_price = predictions_inv[entry_idx][0]
            exit_price = predictions_inv[exit_idx][0]
            entry_date = test_set.iloc[entry_idx]['date']
            exit_date = test_set.iloc[exit_idx]['date']

            # Calculate profit
            profit = (entry_price - exit_price) * 100 / entry_price
            days_held = (exit_idx - entry_idx)
            position_type = "Short"
            trade_initiated = True
        else:
            market_trend = "Mean Reversion"
            # Go long: find buy low, sell high
            exit_idx = np.argmax(predictions_inv)
            entry_idx = np.argmin(predictions_inv[:exit_idx + 1])

            exit_price = predictions_inv[exit_idx][0]
            entry_price = predictions_inv[entry_idx][0]
            entry_date = test_set.iloc[entry_idx]['date']
            exit_date = test_set.iloc[exit_idx]['date']

            # Calculate profit
            profit = (exit_price - entry_price) * 100 / entry_price
            days_held = (exit_idx - entry_idx)
            position_type = "Long"
            trade_initiated = True

    # Append trade results for every outlier, even if no trade was initiated
    trade_results_list.append({
		'Outlier Date': outlier_date_co,
		'Outlier Price': row['outlier_price'],
		'Outlier Type': outlier_type,
        'Actual Market Trend': actual_market_trend,
		'Predicted Market Trend': market_trend,
		'Position Type': position_type,
		'Entry Date': entry_date,
		'Entry Price': entry_price,
		'Exit Date': exit_date,
		'Exit Price': exit_price,
		'Trading Days': days_held,
		'Profit': profit,
		'model_RMSE': rmse,
		'model_accuracy': accuracy 
		})

# Convert the list of dictionaries to a DataFrame
trade_results_df = pd.DataFrame(trade_results_list)

  daily_data.fillna(method='bfill', inplace=True)
  daily_data.fillna(method='ffill', inplace=True)
  super().__init__(**kwargs)
  self._warn_if_super_not_called()
  daily_data.fillna(method='bfill', inplace=True)
  daily_data.fillna(method='ffill', inplace=True)
  super().__init__(**kwargs)
  self._warn_if_super_not_called()
  daily_data.fillna(method='bfill', inplace=True)
  daily_data.fillna(method='ffill', inplace=True)
  super().__init__(**kwargs)
  self._warn_if_super_not_called()
  daily_data.fillna(method='bfill', inplace=True)
  daily_data.fillna(method='ffill', inplace=True)
  super().__init__(**kwargs)
  self._warn_if_super_not_called()
  daily_data.fillna(method='bfill', inplace=True)
  daily_data.fillna(method='ffill', inplace=True)
  super().__init__(**kwargs)
  self._warn_if_super_not_called()
  daily_data.fillna(method='bfill', inplace=True)
  daily_data.fillna(method='ffill', inplace=True)
  super().__init__(**kwargs)
  self._warn_if_super_not_called()
  daily_data.fil

### Trading Strategies for Outliers in 2022 and 2023

We aim to optimize trading by selectively executing trades after detecting an outlier. Instead of executing every trade, we focus on:
1. We only trade when the forecasted values for the next 12 days closely match actual values (`i.e. Model RMSE < 0.05`)
2. If the prediction deviates too much (`i.e. Model RMSE is > 0.05`), we avoid the trade to prevent potential losses. 

In [7]:
trade_results_df

Unnamed: 0,Outlier Date,Outlier Price,Outlier Type,Actual Market Trend,Predicted Market Trend,Position Type,Entry Date,Entry Price,Exit Date,Exit Price,Trading Days,Profit,model_RMSE,model_accuracy
0,2022-03-09,0.90302,Negative,Momentum Continuation,Momentum Continuation,Short,2022-03-10,0.903963,2022-03-16,0.900645,4,0.367111,0.0075,99.31
1,2022-07-05,0.97493,Positive,Mean Reversion,Momentum Continuation,Long,2022-07-06,0.987365,2022-07-27,349190.28125,15,35365770.0,92497.814321,-3322559.27
2,2022-07-11,0.99537,Positive,Mean Reversion,Momentum Continuation,Long,2022-07-12,0.99692,2022-08-02,1.539102,15,54.38577,0.218219,84.35
3,2022-09-13,1.00213,Positive,Momentum Continuation,Momentum Continuation,Long,2022-09-26,0.098582,2022-10-05,9273.048828,7,9406306.0,2426.346388,-81379.87
4,2022-09-21,1.01711,Positive,Mean Reversion,Momentum Continuation,Long,2022-09-22,1.010215,2022-10-13,1.014668,15,0.4408389,0.015436,98.72
5,2022-09-23,1.0319,Positive,Mean Reversion,Momentum Continuation,Long,2022-09-26,1.04074,2022-10-14,36.050884,14,3363.966,10.836097,-406.87
6,2022-10-04,1.0015,Negative,Momentum Continuation,Mean Reversion,Long,2022-10-10,1.00063,2022-10-20,1.054371,8,5.370714,0.025059,97.69
7,2022-11-04,1.0015,Negative,Momentum Continuation,Mean Reversion,Long,2022-11-07,1.007077,2022-11-14,1.013982,5,0.6856556,0.042237,95.91
8,2022-11-10,0.98115,Negative,Momentum Continuation,Mean Reversion,Long,2022-11-15,0.97638,2022-12-02,0.98364,13,0.7435289,0.01884,98.23
9,2022-11-11,0.964,Negative,Momentum Continuation,Momentum Continuation,Short,2022-11-14,0.973745,2022-12-01,0.964409,13,0.9587784,0.007314,99.37


### Trades are filtered based on model accuracy to ensure a higher likelihood of success.

In [8]:
filtered_df = trade_results_df[(trade_results_df['model_RMSE'] < 0.05)]

filtered_df.reset_index(drop=True, inplace=True)

filtered_df

Unnamed: 0,Outlier Date,Outlier Price,Outlier Type,Actual Market Trend,Predicted Market Trend,Position Type,Entry Date,Entry Price,Exit Date,Exit Price,Trading Days,Profit,model_RMSE,model_accuracy
0,2022-03-09,0.90302,Negative,Momentum Continuation,Momentum Continuation,Short,2022-03-10,0.903963,2022-03-16,0.900645,4,0.367111,0.0075,99.31
1,2022-09-21,1.01711,Positive,Mean Reversion,Momentum Continuation,Long,2022-09-22,1.010215,2022-10-13,1.014668,15,0.440839,0.015436,98.72
2,2022-10-04,1.0015,Negative,Momentum Continuation,Mean Reversion,Long,2022-10-10,1.00063,2022-10-20,1.054371,8,5.370714,0.025059,97.69
3,2022-11-04,1.0015,Negative,Momentum Continuation,Mean Reversion,Long,2022-11-07,1.007077,2022-11-14,1.013982,5,0.685656,0.042237,95.91
4,2022-11-10,0.98115,Negative,Momentum Continuation,Mean Reversion,Long,2022-11-15,0.97638,2022-12-02,0.98364,13,0.743529,0.01884,98.23
5,2022-11-11,0.964,Negative,Momentum Continuation,Momentum Continuation,Short,2022-11-14,0.973745,2022-12-01,0.964409,13,0.958778,0.007314,99.37
6,2023-01-06,0.9376,Negative,Momentum Continuation,Momentum Continuation,Short,2023-01-09,0.943488,2023-01-10,0.943098,1,0.041335,0.020819,97.8
7,2023-02-01,0.9079,Negative,Mean Reversion,Momentum Continuation,Short,2023-02-02,0.916527,2023-02-23,0.914863,15,0.181599,0.019049,98.1
8,2023-03-07,0.9478,Positive,Mean Reversion,Momentum Continuation,Long,2023-03-13,0.94033,2023-03-22,0.945505,7,0.550332,0.013486,98.8
9,2023-03-17,0.9299,Negative,Momentum Continuation,Momentum Continuation,Short,2023-03-22,0.937256,2023-03-24,0.930694,2,0.700109,0.013369,98.67


In [9]:
min_profit = round(filtered_df['Profit'].min(), 2)
max_profit = round(filtered_df['Profit'].max(), 2)

print('Minimum Profit:', min_profit, '%')
print('Maximum Profit:', max_profit, '%')

Minimum Profit: 0.04 %
Maximum Profit: 5.37 %


## Project Outcome:
### >> 13 out of 20 outliers in years 2022 and 2023 can give us a profit ranging between 0.046% to 5.37% at a confidence level of 95% and more.