In [3]:
import pandas as pd
from scipy.stats import skew, kurtosis
import matplotlib.pyplot as plt
from datetime import timedelta
from polygon import RESTClient
import requests
from pandas.tseries.offsets import BDay

#### calculate one outlier for each year


In [2]:
def calculate_daily_returns(df, prev_close=None):
    if prev_close is not None:
        df.loc[df.index[0], 'prev_close'] = prev_close
    else:
        df['prev_close'] = df['c'].shift(1)
    df['daily_return'] = (df['c'] - df['prev_close']) / df['prev_close']
    df['abs_daily_return'] = df['daily_return'].abs()
    return df

def get_top_outliers(df, n=1):
    return df.nlargest(n, 'abs_daily_return')

# Define the API key and base URL
api_key = 'beBybSi8daPgsTp5yx5cHtHpYcrjp5Jq'

# Define the currency pairs and years
pair = "C:USDCHF"
years = range(2010, 2024)

# Initialize DataFrames
stats_data = pd.DataFrame(columns=['Year', 'Mean', 'Variance', 'Skewness', 'Kurtosis'])
full_data = pd.DataFrame()
outliers_data = pd.DataFrame()

# Loop over each year
for year in years:
	# Format the API endpoint
	start_date = f'{year}-01-01'
	end_date = f'{year}-12-31'
	url = f"https://api.polygon.io/v2/aggs/ticker/{pair}/range/1/day/{start_date}/{end_date}?adjusted=true&sort=asc&limit=50000&apiKey={api_key}"
	
	# Make the API request
	response = requests.get(url)
	data = response.json()
	
	# Check if the request was successful
	if response.status_code == 200 and 'results' in data:
		# Load data into a DataFrame
		df = pd.DataFrame(data['results'])
        # Converting timestamps
		df['date'] = pd.to_datetime(df['t'], unit='ms')
		df.drop(columns=['t'], inplace=True)
        # Calculating returns
		df = calculate_daily_returns(df)
        
		# Calculate basic statistical properties of the daily returns
		returns = df['daily_return'] 
		mean_return = returns.mean()
		variance_return = returns.var()
		skewness_return = skew(returns)
		kurtosis_return = kurtosis(returns)

		# Collect stats data by year
		year_stats = pd.DataFrame([[year, mean_return, variance_return, skewness_return, kurtosis_return]],
									columns=['Year', 'Mean', 'Variance', 'Skewness', 'Kurtosis'])
		stats_data = pd.concat([stats_data, year_stats], ignore_index=True)

		# Append the data to the full_data DataFrame for the current currency pair
		df['year'] = year
		full_data = pd.concat([full_data, df], ignore_index=True)

		# Find the top 10 outliers based on absolute values of the daily return value
		df['abs_daily_return'] = df['c'].abs()
		outliers = df.nlargest(1, 'abs_daily_return')

		# Append outliers to the outliers_data DataFrame for the current currency pair
		outliers['year'] = year
		outliers_data = pd.concat([outliers_data, outliers], ignore_index=True)

# Sort the data by date so that visualization could be done in a better way         
sorted_full_data = full_data.sort_values(by="date")
sorted_outliers_data = outliers_data.sort_values(by="date")

  stats_data = pd.concat([stats_data, year_stats], ignore_index=True)


In [4]:
sorted_outliers_data

Unnamed: 0,v,vw,o,c,h,l,n,date,prev_close,daily_return,abs_daily_return,year
0,119982,1.1629,1.16569,1.16271,1.1673,1.15835,119982,2010-06-07,1.16155,0.000999,1.16271,2010
1,3484,0.9733,0.97398,0.97371,0.9743,0.97248,3484,2011-02-13,0.97318,0.000545,0.97371,2011
2,194696,0.9928,0.99048,0.99556,0.99709,0.98935,194696,2012-07-24,0.99048,0.005129,0.99556,2012
3,224744,0.9751,0.96921,0.98024,0.98378,0.96782,224744,2013-05-22,0.9692,0.011391,0.98024,2013
4,90053,0.9907,0.98856,0.99368,0.9938,0.98787,90053,2014-12-31,0.98856,0.005179,0.99368,2014
5,2983,1.0299,1.0294,1.03026,1.03043,1.0294,2983,2015-11-29,1.02886,0.001361,1.03026,2015
6,400432,1.0274,1.02234,1.02974,1.03436,1.02148,400432,2016-12-15,1.02235,0.007228,1.02974,2016
7,288871,1.0268,1.02416,1.02618,1.03351,1.02106,288871,2017-01-03,1.02416,0.001972,1.02618,2017
8,160257,1.0089,1.00596,1.01031,1.01114,1.00555,160257,2018-11-12,1.00597,0.004314,1.01031,2018
9,185189,1.0187,1.01971,1.02039,1.02081,1.0169,185189,2019-05-08,1.0197,0.000677,1.02039,2019


In [None]:
def pre_outlier_baseline(symbol, start_date, outlier_date):
    end_date = outlier_date - pd.DateOffset(days=1)
    url = f'https://api.polygon.io/v2/aggs/ticker/{symbol}/range/1/day/{start_date}/{end_date}?adjusted=true&sort=asc&apiKey={api_key}'
    response = requests.get(url)
    if response.status_code != 200:
        print("Error fetching data:", response.status_code, response.text)
        return None
    data = response.json()
    if 'results' not in data:
        print("No 'results' key in response:", data)
        return None
    df = pd.DataFrame(data['results'])
    df['date'] = pd.to_datetime(df['t'], unit='ms')
    df.drop(columns=df['t'], inplace=True)
    df = calculate_daily_returns(df)

    #calculating mean pre outlier baseline
    returns = df['daily_return'] 
    mean_return = returns.mean()