In [1]:
import pandas as pd
import numpy as np
import requests
from pandas.tseries.offsets import BDay

In [2]:
def calculate_daily_return(df):
    # Use pct_change() to calculate the percentage change in 'c' (close prices)
    df['daily_return'] = df['c'].pct_change()
    df['abs_daily_return'] = df['daily_return'].abs()
    return df

def get_top_outliers(df, n=10):
    # Filter for positive returns with 'c' > 0.95 and negative returns with 'c' < 0.8
    positive_condition = (df['c'] > 0.95) & (df['daily_return'] > 0)
    negative_condition = (df['c'] < 0.8) & (df['daily_return'] < 0)
    
    # Combine both conditions to create the final filter
    filtered_df = df[positive_condition | negative_condition]
    
    # Get the top n rows with the largest absolute daily returns
    return filtered_df.nlargest(n, 'abs_daily_return')

# Define the API key and base URL
api_key = 'beBybSi8daPgsTp5yx5cHtHpYcrjp5Jq'

# Define the currency pairs and years
pair = "C:USDEUR"
years = range(2022, 2024)

# Initialize DataFrames
full_data = pd.DataFrame()
outliers_data = pd.DataFrame()

# Loop over each year
for year in years:
    # Format the API endpoint
    start_date = f'{year}-01-01'
    end_date = f'{year}-12-31'
    url = f"https://api.polygon.io/v2/aggs/ticker/{pair}/range/1/day/{start_date}/{end_date}?adjusted=true&sort=asc&limit=50000&apiKey={api_key}"
    
    # Make the API request
    response = requests.get(url)
    data = response.json()
    
    # Check if the request was successful
    if response.status_code == 200 and 'results' in data:
        # Load data into a DataFrame
        df = pd.DataFrame(data['results'])
        # Convert timestamps
        df['date'] = pd.to_datetime(df['t'], unit='ms')
        df.drop(columns=['t'], inplace=True)

        df = df[df['date'].dt.weekday < 5]
        # Calculating returns
        df = calculate_daily_return(df)

        # Append the data to the full_data DataFrame for the current currency pair
        df['year'] = year
        df['day'] = df['date'].dt.day_name()
        # Find the top 10 outliers based on absolute values of the daily return value
        # df['abs_daily_return'] = df['c'].abs()
        top_outliers = df.nlargest(10, 'abs_daily_return')
        outlier_dates = top_outliers['date']

        # Create a new column 'is_outlier' in the full_data DataFrame
        df['is_outlier'] = df['date'].isin(outlier_dates).astype(int)
        full_data = pd.concat([full_data, df], ignore_index=True)
        
        # Append outliers to the outliers_data DataFrame for the current currency pair
        top_outliers['year'] = year
        outliers_data = pd.concat([outliers_data, top_outliers], ignore_index=True)
        

sorted_full_data = full_data.sort_values(by="date")
sorted_outliers_data = outliers_data.sort_values(by="date")

In [3]:
sorted_full_data

Unnamed: 0,v,vw,o,c,h,l,n,date,daily_return,abs_daily_return,year,day,is_outlier
0,57901,0.8828,0.87925,0.88478,0.88651,0.879020,57901,2022-01-03,,,2022,Monday,0
1,62051,0.8854,0.88460,0.88624,0.88709,0.883000,62051,2022-01-04,0.001650,0.001650,2022,Tuesday,0
2,64819,0.8838,0.88627,0.88390,0.88673,0.881100,64819,2022-01-05,-0.002640,0.002640,2022,Wednesday,0
3,68551,0.8844,0.88409,0.88520,0.88613,0.882300,68551,2022-01-06,0.001471,0.001471,2022,Thursday,0
4,45968,0.8833,0.88531,0.88000,0.88571,0.879662,45968,2022-01-07,-0.005874,0.005874,2022,Friday,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
515,6522,0.9078,0.90750,0.90747,0.90940,0.901600,6522,2023-12-25,0.000629,0.000629,2023,Monday,0
516,51232,0.9068,0.90747,0.90550,0.90833,0.905100,51232,2023-12-26,-0.002171,0.002171,2023,Tuesday,0
517,77389,0.9029,0.90557,0.90018,0.90670,0.898796,77389,2023-12-27,-0.005875,0.005875,2023,Wednesday,0
518,83788,0.9008,0.90018,0.90348,0.90453,0.897500,83788,2023-12-28,0.003666,0.003666,2023,Thursday,0


In [4]:
# Convert dates in dataset to datetime objects
sorted_outliers_data['date'] = pd.to_datetime(sorted_outliers_data['date'])

date_ranges = pd.DataFrame({
    "start_date": sorted_outliers_data['date'] - BDay(14), # To predict X days, keep this as X-1 (as 1 day of outlier will be considered in LSTM input)
    "end_date": sorted_outliers_data['date'] + BDay(15),
    "outlier_date": sorted_outliers_data['date'],
    "outlier_price": sorted_outliers_data['c'],
    "daily_return": sorted_outliers_data['daily_return']
})

date_ranges.reset_index(drop=True, inplace=True)

date_ranges

Unnamed: 0,start_date,end_date,outlier_date,outlier_price,daily_return
0,2022-02-17,2022-03-30,2022-03-09,0.90302,-0.015868
1,2022-06-15,2022-07-26,2022-07-05,0.97493,0.017163
2,2022-06-21,2022-08-01,2022-07-11,0.99537,0.014235
3,2022-08-24,2022-10-04,2022-09-13,1.00213,0.015134
4,2022-09-01,2022-10-12,2022-09-21,1.01711,0.014371
5,2022-09-05,2022-10-14,2022-09-23,1.0319,0.01529
6,2022-09-14,2022-10-25,2022-10-04,1.0015,-0.01495
7,2022-10-17,2022-11-25,2022-11-04,1.0015,-0.023575
8,2022-10-21,2022-12-01,2022-11-10,0.98115,-0.016913
9,2022-10-24,2022-12-02,2022-11-11,0.964,-0.017479


In [5]:
def fetch_daily_data(pair, start_date, end_date, api_key):
    formatted_start_date = start_date.strftime('%Y-%m-%d')
    formatted_end_date = end_date.strftime('%Y-%m-%d')

    url = f"https://api.polygon.io/v2/aggs/ticker/{pair}/range/1/day/{formatted_start_date}/{formatted_end_date}?adjusted=true&sort=asc&apiKey={api_key}"
    response = requests.get(url)

    if response.status_code != 200:
        print(f"Failed to fetch data: {response.status_code} - {response.text}")
        return None

    response_data = response.json()

    if 'results' not in response_data:
        print(f"No 'results' in response: {response_data}")
        return None

    df = pd.DataFrame(response_data['results'])
    df['date'] = pd.to_datetime(df['t'], unit='ms')
    df.drop(columns=['t'], inplace=True)

    daily_data = calculate_daily_return(df)
    daily_data.set_index('date', inplace=True)

    return daily_data

def fetch_and_process_daily_data(pair, start_date, end_date, api_key):
    daily_data = fetch_daily_data(pair, start_date, end_date, api_key)

    if daily_data is None:
        print("No data fetched")
        return None

    daily_data.reset_index(inplace=True)
    return daily_data

In [7]:
# Create an empty list to store the trade results for each outlier
trade_results_list = []

# Define the tolerance threshold for exit (0.05% tolerance)
tolerance = 0.005
max_days = 10

# Loop through each row in the date_ranges DataFrame
for idx, row in date_ranges.iterrows():
    outlier_id = idx + 1  # Assign a unique outlier_id for each iteration (starting with 1)
    
    # Convert all dates to pd.Timestamp and normalize to remove any time component
    start_date_co = pd.Timestamp(row['start_date']).normalize()
    end_date_co = (pd.Timestamp(row['end_date']) + pd.Timedelta(days=1)).normalize()
    outlier_date_co = pd.Timestamp(row['outlier_date']).normalize()

    # Fetch and process daily data for the current range
    daily_data = fetch_and_process_daily_data(pair, start_date_co, end_date_co, api_key)

    if daily_data is None:
        print(f"No data fetched for outlier_id: {outlier_id}")
        continue  # Skip to the next iteration if no data

    # Ensure 'date' column is in pd.Timestamp format and normalized (remove time component)
    daily_data['date'] = pd.to_datetime(daily_data['date']).dt.normalize()

    # Assign the current outlier_id to the data
    daily_data['outlier_id'] = outlier_id

    # Filter out weekends
    daily_data = daily_data[~daily_data['date'].dt.weekday.isin([5, 6])]

    # Sort data by date
    daily_data = daily_data.sort_values(by='date', ascending=True)

    # Fill missing values
    daily_data.fillna(method='bfill', inplace=True)
    daily_data.fillna(method='ffill', inplace=True)

    # Determine if the outlier is positive or negative
    outlier_return = row['daily_return']
    is_positive_outlier = outlier_return > 0  # Check if the outlier is positive
    outlier_type = "Positive" if is_positive_outlier else "Negative"  # Define outlier type
    position_type = "Long" if is_positive_outlier else "Short"

    # Trade logic based on outlier type
    entry_price = None
    exit_price = None
    entry_date = None
    exit_date = None
    profit = 0
    days_held = 0
    trade_initiated = False

    # Find entry point (change in sign of daily_return)
    try:
        outlier_index = daily_data[daily_data['date'] == outlier_date_co].index[0]
    except IndexError:
        print(f"Outlier date {outlier_date_co} not found in daily data for outlier_id: {outlier_id}")
        continue

    for i in range(outlier_index + 1, len(daily_data)):
        current_return = daily_data.iloc[i]['daily_return']
        if (current_return * outlier_return) < 0:  # Sign change detected
            entry_date = daily_data.iloc[i]['date']
            entry_price = daily_data.iloc[i]['c']
            trade_initiated = True
            break

    # Skip if no entry point found
    if not trade_initiated:
        print(f"No entry point found for outlier_id: {outlier_id}")
        continue

    # Reference price (close price of the day before outlier date)
    try:
        ref_price = daily_data.iloc[outlier_index - 1]['c']
    except IndexError:
        print(f"No reference price found for outlier_id: {outlier_id}")
        continue

    # Find exit point based on the new exit logic:
    # Exit when the price reaches within 0.05% of the reference price or after 10 days from the entry date
    for i in range(outlier_index + 1, len(daily_data)):
        current_close = daily_data.iloc[i]['c']
        if abs(current_close - ref_price) / ref_price <= tolerance:  # Within 0.05% tolerance
            exit_date = daily_data.iloc[i]['date']
            exit_price = current_close
            break

    # If no exit point is found, close the trade 10 days after the entry date
    if exit_date is None:
        try:
            entry_index = daily_data[daily_data['date'] == entry_date].index[0]
            exit_index = min(entry_index + max_days, len(daily_data) - 1)
            exit_date = daily_data.iloc[exit_index]['date']
            exit_price = daily_data.iloc[exit_index]['c']
        except IndexError:
            print(f"No exit point found after 10 days for outlier_id: {outlier_id}")
            continue

    # Calculate days held and profit
    days_held = min((pd.Timestamp(exit_date) - pd.Timestamp(entry_date)).days, max_days)
    if position_type == "Long":
        profit = (exit_price - entry_price) * 100 / entry_price  # Long trade profit formula
    else:
        profit = (entry_price - exit_price) * 100 / entry_price  # Short trade profit formula

    # Append trade results
    if profit > 0 and days_held > 0:
         trade_results_list.append({
			'Outlier Date': outlier_date_co,
			'Outlier Price': row['outlier_price'],
			'Outlier Type': outlier_type,
			'Position Type': position_type,
			'Entry Date': entry_date,
			'Entry Price': entry_price,
			'Exit Date': exit_date,
			'Exit Price': exit_price,
			'Days Held': days_held,
			'Profit': np.round(profit, 2)
		})

# Convert the list of dictionaries to a DataFrame
trade_results_df = pd.DataFrame(trade_results_list)

  daily_data.fillna(method='bfill', inplace=True)
  daily_data.fillna(method='ffill', inplace=True)
  daily_data.fillna(method='bfill', inplace=True)
  daily_data.fillna(method='ffill', inplace=True)
  daily_data.fillna(method='bfill', inplace=True)
  daily_data.fillna(method='ffill', inplace=True)
  daily_data.fillna(method='bfill', inplace=True)
  daily_data.fillna(method='ffill', inplace=True)
  daily_data.fillna(method='bfill', inplace=True)
  daily_data.fillna(method='ffill', inplace=True)
  daily_data.fillna(method='bfill', inplace=True)
  daily_data.fillna(method='ffill', inplace=True)
  daily_data.fillna(method='bfill', inplace=True)
  daily_data.fillna(method='ffill', inplace=True)
  daily_data.fillna(method='bfill', inplace=True)
  daily_data.fillna(method='ffill', inplace=True)
  daily_data.fillna(method='bfill', inplace=True)
  daily_data.fillna(method='ffill', inplace=True)
  daily_data.fillna(method='bfill', inplace=True)
  daily_data.fillna(method='ffill', inplace=True)


In [8]:
trade_results_df

Unnamed: 0,Outlier Date,Outlier Price,Outlier Type,Position Type,Entry Date,Entry Price,Exit Date,Exit Price,Days Held,Profit
0,2022-09-23,1.0319,Positive,Long,2022-09-29,1.01733,2022-10-14,1.0283,10,1.08
1,2022-10-04,1.0015,Negative,Short,2022-10-10,1.0295,2022-10-13,1.02377,3,0.56
2,2023-04-17,0.91533,Positive,Long,2023-04-21,0.901,2023-04-25,0.91089,4,1.1
3,2023-05-01,0.91164,Positive,Long,2023-05-05,0.8919,2023-05-23,0.9284,10,4.09
