# Single Pair

# Import python libraries

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import statsmodels
from statsmodels.tsa.stattools import coint
from statsmodels.tsa.stattools import adfuller

import yfinance as yf

import time

import datetime
from datetime import datetime
from datetime import timedelta

import statistics
import scipy.stats as st
from scipy.stats import norm
import math

import pywt # Wavelet Analysis for removing noise


In [2]:
def add_curr_to_ticker(ticker_list):
    
    price_list = []
 
    for ticker in ticker_list:
        if ".HK" in ticker:
            price_list.append([ticker, 'HKD'])
        elif ".T" in ticker:
            price_list.append([ticker, 'JPY'])
        elif ".AX" in ticker:
            price_list.append([ticker, 'AUD'])
        else:
            price_list.append([ticker, 'USD'])
            
    ticker_list = price_list
            
    return ticker_list

# SELECT PAIR

In [3]:
input_tickers = ['LVS','1928.HK']

In [4]:
apply_noise_filter = True
wavelet = 'sym8'
level = 1


The level in the Wavelet Transform refers to the number of times the data is decomposed. In each level of decomposition, the data is halved, so the size of the input data should be at least 2^N for N levels of decomposition.

The level parameter defines the number of decomposition steps to be performed. The larger the level, the more high-frequency components (usually associated with noise) it will remove.  But the higher the level the more of our data will be lost.




In [5]:
# If fed from another workbook, do not adjust
try:
    print(ticker_list)
    print('Ticker List supplied by parent workbook')
    
# Otherwise add the currency to the ticker
except:

    ticker_list = add_curr_to_ticker(input_tickers)

ticker_list

[['LVS', 'USD'], ['1928.HK', 'HKD']]

# Download Prices from Yahoo

In [6]:
i = 0
for ticker in ticker_list:
  print('Ticker = ', ticker[0])
  print('i = ', i)
  data = yf.download(ticker[0], start="1972-01-01", end="2030-01-01")
  data['Ticker'] = ticker[0]
  data['Currency'] = ticker[1]
  data.reset_index(inplace=True)
  if i == 0:
    stock_prices_df = data
  if i> 0:
    stock_prices_df = pd.concat([stock_prices_df, data])

  time.sleep(0.25)    # pause 0.25 seconds
  
  i = i+1

stock_prices_df.rename(columns={'Close': 'Stock Close','Adj Close': 'Stock Adj Close'}, inplace=True)

stock_prices_df

Ticker =  LVS
i =  0
[*********************100%***********************]  1 of 1 completed
Ticker =  1928.HK
i =  1
[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Open,High,Low,Stock Close,Stock Adj Close,Volume,Ticker,Currency
0,2004-12-15,41.900002,49.450001,41.650002,46.560001,30.944597,26246300,LVS,USD
1,2004-12-16,48.099998,50.599998,47.900002,49.110001,32.639370,7778200,LVS,USD
2,2004-12-17,49.110001,53.250000,48.110001,53.000000,35.224743,4755800,LVS,USD
3,2004-12-20,52.700001,53.980000,48.330002,48.599998,32.300419,6098000,LVS,USD
4,2004-12-21,48.599998,49.180000,45.599998,47.380001,31.489599,3209500,LVS,USD
...,...,...,...,...,...,...,...,...,...
3378,2023-08-14,28.000000,28.299999,27.000000,28.200001,28.200001,11316062,1928.HK,HKD
3379,2023-08-15,28.250000,28.299999,27.750000,27.900000,27.900000,4361299,1928.HK,HKD
3380,2023-08-16,27.600000,27.799999,27.250000,27.500000,27.500000,7436009,1928.HK,HKD
3381,2023-08-17,27.000000,27.250000,26.350000,27.049999,27.049999,16649837,1928.HK,HKD


In [7]:
stock_prices_df['Ticker'].unique()

array(['LVS', '1928.HK'], dtype=object)

# Add FX Rates

In [8]:
def download_fx_rates():
  sgd_df = yf.download('SGD=X', start="1972-01-01", end="2030-01-01")
  sgd_df.reset_index(inplace=True)
  sgd_df.rename(columns={'Close': 'SGD'}, inplace=True)
  sgd_df = sgd_df[['Date','SGD']].copy()

  aud_df = yf.download('AUDUSD=X', start="1972-01-01", end="2030-01-01")
  aud_df.reset_index(inplace=True)
  aud_df.rename(columns={'Close': 'AUD'}, inplace=True)
  aud_df = aud_df[['Date','AUD']].copy()

  jpy_df = yf.download('USDJPY=X', start="1972-01-01", end="2030-01-01")
  jpy_df.reset_index(inplace=True)
  jpy_df.rename(columns={'Close': 'JPY'}, inplace=True)
  jpy_df = jpy_df[['Date','JPY']].copy()

  eur_df = yf.download('EUR=X', start="1972-01-01", end="2030-01-01")
  eur_df.reset_index(inplace=True)
  eur_df.rename(columns={'Close': 'EUR'}, inplace=True)
  eur_df = eur_df[['Date','EUR']].copy()

  return sgd_df, aud_df, jpy_df, eur_df

In [9]:
def merge_fx_rates(stock_prices_df, sgd_df, aud_df, jpy_df, eur_df):
  stock_df = stock_prices_df.copy()

  stock_df = pd.merge(stock_df, sgd_df, how='outer', left_on='Date', right_on="Date")
  stock_df = pd.merge(stock_df, aud_df, how='outer', left_on='Date', right_on="Date")
  stock_df['AUD'] = 1 / stock_df['AUD'] #Invert AUD rate so we can treat it consisently with other currencies
  stock_df = pd.merge(stock_df, jpy_df, how='outer', left_on='Date', right_on="Date")
  stock_df = pd.merge(stock_df, eur_df, how='outer', left_on='Date', right_on="Date")
  stock_df['EUR'] = 1 / stock_df['EUR'] #Invert EUR rate so we can treat it consisently with other currencies
  stock_df['USD'] = 1
  stock_df['HKD'] = 7.75

  stock_df.dropna(subset=['Ticker'], inplace=True)
  stock_df.sort_values('Date', inplace=True)

  stock_df['Stock USD Close'] = stock_df['Stock Close']
  stock_df['Stock USD Adj Close'] = stock_df['Stock Adj Close']

  ccy_list = stock_df['Currency'].unique().tolist()

  for ccy in ccy_list:

    stock_df.loc[stock_df['Currency'] == ccy, 'Stock USD Close'] = \
          stock_df['Stock USD Close'] / stock_df[ccy]

    stock_df.loc[stock_df['Currency'] == ccy, 'Stock USD Adj Close'] = \
          stock_df['Stock USD Adj Close'] / stock_df[ccy]

  return stock_df, ccy_list


In [10]:
#Run functions to download and merge FX Rates

sgd_df, aud_df, jpy_df, eur_df = download_fx_rates()
stock_prices_df, ccy_list = merge_fx_rates(stock_prices_df, sgd_df, aud_df, jpy_df, eur_df)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [11]:
stock_prices_df.sort_values('Date').tail()

Unnamed: 0,Date,Open,High,Low,Stock Close,Stock Adj Close,Volume,Ticker,Currency,SGD,AUD,JPY,EUR,USD,HKD,Stock USD Close,Stock USD Adj Close
7993,2023-08-16,54.279999,54.75,53.59,53.630001,53.630001,3013300.0,LVS,USD,1.35815,1.54849,145.608002,1.090417,1,7.75,53.630001,53.630001
7995,2023-08-17,53.830002,54.209999,53.119999,53.48,53.48,3824300.0,LVS,USD,1.36036,1.55892,146.332993,1.087488,1,7.75,53.48,53.48
7996,2023-08-17,27.0,27.25,26.35,27.049999,27.049999,16649837.0,1928.HK,HKD,1.36036,1.55892,146.332993,1.087488,1,7.75,3.490322,3.490322
7998,2023-08-18,26.700001,26.75,25.75,26.049999,26.049999,22248016.0,1928.HK,HKD,1.35736,1.5604,145.714005,1.087429,1,7.75,3.36129,3.36129
7997,2023-08-18,52.799999,53.720001,52.459999,53.34,53.34,3210000.0,LVS,USD,1.35736,1.5604,145.714005,1.087429,1,7.75,53.34,53.34


# Format Data Files

## Convert to Datetime

In [12]:
#Convert date to datetime : Stocks File
stock_prices_df['Date'] = pd.to_datetime(stock_prices_df['Date'], format='%Y-%m-%d')
stock_prices_df.sort_values(['Ticker', 'Date'], inplace=True)

## Delete Un-necessary columns

In [13]:
stock_prices_df


Unnamed: 0,Date,Open,High,Low,Stock Close,Stock Adj Close,Volume,Ticker,Currency,SGD,AUD,JPY,EUR,USD,HKD,Stock USD Close,Stock USD Adj Close
1249,2009-11-30,9.350000,9.400000,8.780000,9.310000,6.099968,339316200.0,1928.HK,HKD,1.38200,1.089800,86.250000,1.503895,1,7.75,1.201290,0.787093
1251,2009-12-01,9.540000,9.690000,9.360000,9.540000,6.250666,94339200.0,1928.HK,HKD,1.38010,1.080400,86.650002,1.509001,1,7.75,1.230968,0.806538
1253,2009-12-02,10.020000,10.260000,9.880000,10.180000,6.669999,84198300.0,1928.HK,HKD,1.37990,1.077800,87.559998,1.506977,1,7.75,1.313548,0.860645
1255,2009-12-03,10.220000,10.320000,10.120000,10.180000,6.669999,41502200.0,1928.HK,HKD,1.38130,1.081800,88.259003,1.506546,1,7.75,1.313548,0.860645
1257,2009-12-04,10.300000,10.320000,10.140000,10.280000,6.735518,25748200.0,1928.HK,HKD,1.38880,1.092500,90.519997,1.485707,1,7.75,1.326452,0.869099
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7989,2023-08-14,55.660000,56.259998,55.400002,55.799999,55.799999,3311000.0,LVS,USD,1.35317,1.539550,144.817001,1.094439,1,7.75,55.799999,55.799999
7991,2023-08-15,55.290001,55.389999,54.580002,54.639999,54.639999,4004400.0,LVS,USD,1.35562,1.541212,145.436996,1.090988,1,7.75,54.639999,54.639999
7993,2023-08-16,54.279999,54.750000,53.590000,53.630001,53.630001,3013300.0,LVS,USD,1.35815,1.548490,145.608002,1.090417,1,7.75,53.630001,53.630001
7995,2023-08-17,53.830002,54.209999,53.119999,53.480000,53.480000,3824300.0,LVS,USD,1.36036,1.558920,146.332993,1.087488,1,7.75,53.480000,53.480000


In [14]:
stock_prices_df.drop(columns=['Open', 'High', 'Low', 
                              #'Stock Close',
                              'Volume'], inplace=True)


In [15]:
stock_prices_df.tail()

Unnamed: 0,Date,Stock Close,Stock Adj Close,Ticker,Currency,SGD,AUD,JPY,EUR,USD,HKD,Stock USD Close,Stock USD Adj Close
7989,2023-08-14,55.799999,55.799999,LVS,USD,1.35317,1.53955,144.817001,1.094439,1,7.75,55.799999,55.799999
7991,2023-08-15,54.639999,54.639999,LVS,USD,1.35562,1.541212,145.436996,1.090988,1,7.75,54.639999,54.639999
7993,2023-08-16,53.630001,53.630001,LVS,USD,1.35815,1.54849,145.608002,1.090417,1,7.75,53.630001,53.630001
7995,2023-08-17,53.48,53.48,LVS,USD,1.36036,1.55892,146.332993,1.087488,1,7.75,53.48,53.48
7997,2023-08-18,53.34,53.34,LVS,USD,1.35736,1.5604,145.714005,1.087429,1,7.75,53.34,53.34


# CoIntegration Between Individual Stocks

### Limit Date Range

In [16]:
start_date = '1960-01-01'
end_date = '2030-12-31'


In [17]:
ticker_df = stock_prices_df.copy()
ticker_df = ticker_df[ticker_df['Date'] > start_date].copy()
ticker_df = ticker_df[ticker_df['Date'] < end_date].copy()

In [18]:
ticker_df.set_index('Date', inplace=True)

## Set Historical Vol Period and Stop Loss Limit

In [19]:
vol_period = 252
loss_limit = 10

## Add Daily Returns

In [20]:
#Add daily returns & drop Closing Price Column

#Stocks
ticker_df['Stock Daily Return'] = \
ticker_df.groupby('Ticker')['Stock USD Adj Close'].pct_change()*100

In [21]:
ticker_df = ticker_df[ticker_df['Stock Daily Return'] < 50]
ticker_df = ticker_df[ticker_df['Stock Daily Return'] > -50]
ticker_df

Unnamed: 0_level_0,Stock Close,Stock Adj Close,Ticker,Currency,SGD,AUD,JPY,EUR,USD,HKD,Stock USD Close,Stock USD Adj Close,Stock Daily Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2009-12-01,9.540000,6.250666,1928.HK,HKD,1.38010,1.080400,86.650002,1.509001,1,7.75,1.230968,0.806538,2.470459
2009-12-02,10.180000,6.669999,1928.HK,HKD,1.37990,1.077800,87.559998,1.506977,1,7.75,1.313548,0.860645,6.708621
2009-12-03,10.180000,6.669999,1928.HK,HKD,1.38130,1.081800,88.259003,1.506546,1,7.75,1.313548,0.860645,0.000000
2009-12-04,10.280000,6.735518,1928.HK,HKD,1.38880,1.092500,90.519997,1.485707,1,7.75,1.326452,0.869099,0.982292
2009-12-07,10.100000,6.617581,1928.HK,HKD,1.38880,1.092700,89.290001,1.484120,1,7.75,1.303226,0.853881,-1.750973
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-14,55.799999,55.799999,LVS,USD,1.35317,1.539550,144.817001,1.094439,1,7.75,55.799999,55.799999,-1.081370
2023-08-15,54.639999,54.639999,LVS,USD,1.35562,1.541212,145.436996,1.090988,1,7.75,54.639999,54.639999,-2.078853
2023-08-16,53.630001,53.630001,LVS,USD,1.35815,1.548490,145.608002,1.090417,1,7.75,53.630001,53.630001,-1.848460
2023-08-17,53.480000,53.480000,LVS,USD,1.36036,1.558920,146.332993,1.087488,1,7.75,53.480000,53.480000,-0.279697


## Create Price Series

In [22]:
### Creating Price Series1 and Price Series2 
def create_series(ticker1, ticker2, ticker_df):

    series1 = ticker_df.loc[ticker_df['Ticker'] == ticker1, 'Stock USD Close']
    series2 = ticker_df.loc[ticker_df['Ticker'] == ticker2, 'Stock USD Close']
   
    ## If one series has a new date, duplicate the last price for the shorter series
    # This helps us get the current day status of the pair
    if series1.index[-1] > series2.index[-1]:
        final_val = pd.Series( [series2[-1]], index= [series1.index[-1]])
        series2 = pd.concat([series2,final_val]) 
        series2.name = ticker2
    elif series1.index[-1] < series2.index[-1]:
        final_val = pd.Series( [series1[-1]], index= [series2.index[-1]])
        series1 = pd.concat([series1,final_val])
        series1.name = ticker1
    ##
    

    #This section is to ensure the two series are the same length
    # Deals with public holidays etc for one market during the timeframe
    sample1_df = pd.DataFrame(data=series1)
    sample1_df.rename(columns={'Stock USD Close': ticker1}, inplace=True)

    sample2_df = pd.DataFrame(data=series2)
    sample2_df.rename(columns={'Stock USD Close': ticker2}, inplace=True)

    sample_df = pd.merge(sample1_df, sample2_df, left_index=True, right_index=True)

    sample_df = sample_df.dropna()
    series1 = sample_df.iloc[:,0]
    series2 = sample_df.iloc[:,1]
    ##

    return series1, series2


## Apply Noise Filter / Wavelet Transform

In [23]:
def wavelet_transform_func(s1, s2):
    
    wavelet_len = pywt.Wavelet(wavelet).dec_len #This will be 2 * wavelet number (EG 2 * 8 for sym8)
    desired_len = wavelet_len * (2 ** level)  # Desired length of series

    noisy_ratio = s1/s2

    # Trimming the start of the series if necessary
    if len(noisy_ratio) % desired_len != 0:
        excess_size = len(noisy_ratio) % desired_len
        trimmed_noisy_ratio = noisy_ratio[excess_size:]  # Remove excess values from the start
        
        trimmed_s1 = s1[excess_size:]
        trimmed_s2 = s2[excess_size:]
    else:
        trimmed_noisy_ratio = noisy_ratio
        trimmed_s1 = s1
        trimmed_s2 = s2
        excess_size = 0
        
    print(f"\nWAVELET: {wavelet}  LEVEL: {level}\n")
    print(f"Number of records trimmed: {excess_size}")
    
    if len(trimmed_noisy_ratio)>0:
        
        coeffs = pywt.swt(trimmed_noisy_ratio, wavelet=wavelet, level=level, trim_approx=True, norm=True)
        coeffs_s1 = pywt.swt(trimmed_s1, wavelet=wavelet, level=level, trim_approx=True, norm=True)
        coeffs_s2 = pywt.swt(trimmed_s2, wavelet=wavelet, level=level, trim_approx=True, norm=True)

        # Extract the approximation coefficients 
        filtered_ratio = coeffs[0]  
        filtered_ratio = pd.Series(filtered_ratio, index=trimmed_noisy_ratio.index)  # Convert back to a Series
        #print(f"Filtered Ratio {filtered_ratio[:5]}")

        filtered_s1 = coeffs_s1[0] 
        filtered_s1 = pd.Series(filtered_s1, index=trimmed_s1.index, name=s1.name)  # Convert back to a Series

        filtered_s2 = coeffs_s2[0] 
        filtered_s2 = pd.Series(filtered_s2, index=trimmed_s1.index, name=s2.name)  # Convert back to a Series
    else:
        print('Filter NOT Applied.  Insufficient Data')
        filtered_ratio = noisy_ratio
        filtered_s1 = s1
        filtered_s2 = s2

    return filtered_ratio, filtered_s1, filtered_s2



In the context of Symlet wavelets (denoted as 'symN' where N is the order of the wavelet), the number following 'sym' represents the order of the wavelet, and it directly affects the length of the wavelet filter (which is 2N for Symlets) and the number of vanishing moments.

Length of the Wavelet Filter: Higher order wavelets like 'sym8' have longer filters, which means they use more data points in the calculation of each wavelet coefficient. This results in smoother and more precise representations of the data, especially for larger scale features, but can lead to edge effects and less precise localization of smaller scale features.

Number of Vanishing Moments: The order of the Symlet wavelet also corresponds to the number of vanishing moments, which is a measure of the ability of the wavelet to compactly represent polynomial trends in the data. A higher order Symlet wavelet can represent higher order polynomial trends with fewer coefficients, leading to a more efficient representation.

As for the maximum and minimum wavelet orders for Symlets in the PyWavelets library, as of my knowledge cutoff in September 2021, Symlets are available from order 2 ('sym2') to order 20 ('sym20'). The availability of specific wavelet types and orders might be dependent on the specific wavelet library and its version that you're using.

It's worth noting that while higher order wavelets can provide more precise and efficient representations of the data, they also require more computation and may be more susceptible to noise and other small-scale features. Therefore, the choice of wavelet order should be informed by the specific characteristics and requirements of your data and analysis task.

## Calculate Number of Crosses

In [24]:
#Calculate number of times ratio passes through median ratio value

def median_cross_count(series1,series2, series_median):
  series_df = pd.DataFrame(data=series1/series2, columns=['Ratio'])

  #If no median value is "force fed", use the median for the current series
  # We force feed the median from the train set into the test set
  if series_median == 0:
    series_median = np.median(series1/series2)
  series_df['Above Below'] = 'B'
  series_df.loc[series_df['Ratio'] > series_median, 'Above Below'] = "A"

  series_df['Prior Above Below'] = 'B'
  # Need to make the first row NA given no prior to reference
  series_df.loc[series_df.index[0], 'Prior Above Below'] = "NA"
  series_df.loc[series_df['Ratio'].shift(1) > series_median, 'Prior Above Below'] = "A"

  series_df['Counter'] = 0
  series_df.loc[series_df[ 'Above Below'] != series_df['Prior Above Below'], 'Counter'] = 1
  # Need to make the first row NA given no prior to reference
  series_df.loc[series_df.index[0], 'Counter'] = 0

  #Make Data A Column Instead of the Index
  series_df.reset_index(inplace=True)

  median_count = series_df['Counter'].sum()
  #print('The number of times passing through Median Value: ', median_count)

  #When was the last median cross

  last_cross = series_df.groupby('Counter').last()
  first_cross = series_df.groupby('Counter').first()
  last_date = series_df.loc[series_df.index[-1],'Date']

  if median_count != 0:
    last_cross = last_cross.loc[1,'Date']
    first_cross = first_cross.loc[1,'Date']

    last_cross = pd.to_datetime(last_cross, format='%Y-%m-%d')
    first_cross = pd.to_datetime(first_cross, format='%Y-%m-%d')
    last_date = pd.to_datetime(last_date, format='%Y-%m-%d')

    days_last_cross = (last_date - last_cross).days

  else:
    days_last_cross = 10000
    last_cross = pd.to_datetime('2000-01-01', format='%Y-%m-%d')
    first_cross = pd.to_datetime('1990-01-01', format='%Y-%m-%d')

  #print('Train Set Series Median: ', series_median)

  return series_df, median_count, first_cross, last_cross, series_median, days_last_cross

## Function for Calculating Regularity of Crossing

In [25]:
def regularity_of_crossing_func(series1, series2, series_median):
    
    ratio = series1 / series2
    total_records = len(ratio)
   
    #Split the term into periods
    num_periods = 6    
    mini_records = int(total_records/ num_periods)
    
    period_cross_count = 0
    
    for i in range(num_periods):
       
        mini_ratio = ratio[i*mini_records:(i+1)*mini_records]
        
        # If initial value is below median but max is above median, cross has occurred
        if mini_ratio[0] < series_median:
            if mini_ratio.max() >= series_median:
                mini_cross = 1
            else:
                mini_cross = 0
        # If initial value is above median but min is below median, cross has occurred
        if mini_ratio[0] > series_median:
            if mini_ratio.min() <= series_median:
                mini_cross = 1
            else:
                mini_cross = 0
        
        period_cross_count = period_cross_count + mini_cross
    
    period_cross_frequency_perc = (period_cross_count / num_periods) * 100
    
    return period_cross_count, period_cross_frequency_perc
        

## Function for Building Crossing Results DF

In [26]:
def prob_crossing_again(ticker_df,
                        ticker1, ticker2,
                        train_start_date,train_end_date,
                        median_duration):

    ticker_df = ticker_df.reset_index()
    
    #create additional train start dates to look back further for checking long term medium & correlation levels
    train_start_date_2yr = train_start_date - timedelta(days = 365*2) 
    train_start_date_3yr = train_start_date - timedelta(days = 365*3) 
    train_start_date_5yr = train_start_date - timedelta(days = 365*5) 
    train_start_date_10yr = train_start_date - timedelta(days = 365*10) 
    
    #summary_array = np.empty((0, 16))
  
    #Create train df
    train_ticker_df = ticker_df[(ticker_df['Date'] > train_start_date) & (ticker_df['Date'] < train_end_date)]

    #Create Longer Term train dfs for checking long term medium & correlation levels
    train_ticker_df_2yr = ticker_df[(ticker_df['Date'] > train_start_date_2yr) & (ticker_df['Date'] < train_end_date)]
    train_ticker_df_3yr = ticker_df[(ticker_df['Date'] > train_start_date_3yr) & (ticker_df['Date'] < train_end_date)]
    train_ticker_df_5yr = ticker_df[(ticker_df['Date'] > train_start_date_5yr) & (ticker_df['Date'] < train_end_date)]
    train_ticker_df_10yr = ticker_df[(ticker_df['Date'] > train_start_date_10yr) & (ticker_df['Date'] < train_end_date)]

    train_ticker_df.set_index('Date', inplace=True)
    train_ticker_df_2yr.set_index('Date', inplace=True)
    train_ticker_df_3yr.set_index('Date', inplace=True)
    train_ticker_df_5yr.set_index('Date', inplace=True)
    train_ticker_df_10yr.set_index('Date', inplace=True)
       
    # create prices series
    train_series1, train_series2 = create_series(ticker1, ticker2, train_ticker_df)
    # create longer term price serieses for checking long term medium & correlation levels
    train_series1_2yr, train_series2_2yr = create_series(ticker1, ticker2, train_ticker_df_2yr)
    train_series1_3yr, train_series2_3yr = create_series(ticker1, ticker2, train_ticker_df_3yr)
    train_series1_5yr, train_series2_5yr = create_series(ticker1, ticker2, train_ticker_df_5yr)
    train_series1_10yr, train_series2_10yr = create_series(ticker1, ticker2, train_ticker_df_10yr)
    
    
    # If we are applying a noise filter, filter the price series data
    if apply_noise_filter:
        filtered_ratio, train_series1, train_series2 = wavelet_transform_func(train_series1, train_series2)
        filtered_ratio_2yr, train_series1_2yr, train_series2_2yr = wavelet_transform_func(train_series1_2yr, train_series2_2yr)
        filtered_ratio_3yr, train_series1_3yr, train_series2_3yr = wavelet_transform_func(train_series1_3yr, train_series2_3yr)
        filtered_ratio_5yr, train_series1_5yr, train_series2_5yr = wavelet_transform_func(train_series1_5yr, train_series2_5yr)
        filtered_ratio_10yr, train_series1_10yr, train_series2_10yr = wavelet_transform_func(train_series1_10yr, train_series2_10yr)
    """
    """
     
    
    # Calculate Series Median
    series_median_1yr = np.median(train_series1/train_series2)

    #Calculate Longer Term Series Medians
    series_median_2yr = np.median(train_series1_2yr/train_series2_2yr)
    series_median_3yr = np.median(train_series1_3yr/train_series2_3yr)
    series_median_5yr = np.median(train_series1_5yr/train_series2_5yr)
    series_median_10yr = np.median(train_series1_10yr/train_series2_10yr)

    # Set the median to use based on median_duration variable
    if median_duration == 1:
        series_median = series_median_1yr
    if median_duration == 2:
        series_median = series_median_2yr
    if median_duration == 3:
        series_median = series_median_3yr
    if median_duration == 5:
        series_median = series_median_5yr
    if median_duration == 10:
        series_median = series_median_10yr
        
    #Calculate the size of the devn from the median
    current_ratio = train_series1[-1]/ train_series2[-1]
    devn_from_median = (current_ratio - series_median)/ series_median * 100
    # Calculate deviatin from long term median
    devn_from_5yr_median = (current_ratio - series_median_5yr)/ series_median_5yr * 100
    
    #### Calculate Cross Statistics for 'Training' Set ####
    train_series_df, train_median_count, train_first_cross, train_last_cross, series_median_train, train_days_last_cross = \
    median_cross_count(train_series1,train_series2, series_median)

    ### Calculate correlation across the multiple timeframes
    ###     Only use time series up to last crossing date for purposes of correlation calculation
    ##      Do this as that way the recent divergence does not impact on the correlation calc
    
    pair_corr_1yr = train_series1.corr(train_series2)
    pair_corr_2yr = train_series1_2yr.corr(train_series2_2yr)
    pair_corr_3yr = train_series1_3yr.corr(train_series2_3yr)
    pair_corr_5yr = train_series1_5yr.corr(train_series2_5yr)
    pair_corr_10yr = train_series1_10yr.corr(train_series2_10yr)
    
    # Calculate pvalue across multiple timeframes
    pscore_1yr, pvalue_1yr, _ = coint(train_series1,train_series2)
    pscore_2yr, pvalue_2yr, _ = coint(train_series1_2yr,train_series2_2yr)
    pscore_3yr, pvalue_3yr, _ = coint(train_series1_3yr,train_series2_3yr)
    pscore_5yr, pvalue_5yr, _ = coint(train_series1_5yr,train_series2_5yr)
    pscore_10yr, pvalue_10yr, _ = coint(train_series1_10yr,train_series2_10yr)

    #Calculate Regularity of Crosses
    period_cross_count, period_cross_frequency_perc = \
    regularity_of_crossing_func(train_series1_5yr, train_series2_5yr, series_median_5yr)
    
    # Build Results Array
    results_array = [[ticker1, ticker2, 
                            train_median_count, 
                            train_days_last_cross, 
                            current_ratio, 
                            series_median, devn_from_median, devn_from_5yr_median,
                            train_start_date, train_end_date,
                            pair_corr_1yr, pair_corr_2yr, pair_corr_3yr, pair_corr_5yr, pair_corr_10yr,
                            pvalue_1yr, pvalue_2yr, pvalue_3yr, pvalue_5yr, pvalue_10yr,
                            period_cross_count, period_cross_frequency_perc,
                            ]]
      
    results_df = pd.DataFrame(data=results_array, columns=['Ticker1', 'Ticker2', 'Train Median Cross Count',
                                   'Days Since Last TRAIN Cross', 
                                   'Current Ratio', 
                                   'Median Ratio', '% Deviation From Median', '% Deviation From 5Yr Median',
                                   'Train Start Date', 'Train End Date',
                                   '1 Yr Corr', '2 Yr Corr', '3 Yr Corr', '5 Yr Corr', '10 Yr Corr',
                                   '1 Yr P-Val', '2 Yr P-Val', '3 Yr P-Val', '5 Yr P-Val', '10 Yr P-Val',                                                            
                                    'Cross Freq Count', 'Cross Freq %'])

    return results_df


## Calculate Vol and Number of Std Devns

In [27]:
def pvalue_calc(series1,series2):
  score, pvalue, _ = coint(series1,series2)
  #print('CoInt score:', score, 'pvalue: ', pvalue)
  
  return score, pvalue

In [28]:
#Calculate the volatility of returns of the price series
# Vol period is the number of days to utilize to calculate the historical vol

def calculate_vol(series1, series2, vol_period, loss_limit):
    
    ratio = series1/ series2
    vol_period_ratio = ratio[-vol_period:]
    ratio_df = pd.DataFrame(data=vol_period_ratio, columns=['Ratio'])
    ratio_df['Ratio Return'] = ratio_df['Ratio'].pct_change() * 100
    ratio_df.dropna(inplace=True)
    daily_std_dev  = statistics.stdev(ratio_df['Ratio Return'])
    ratio_std_dev = statistics.stdev(ratio_df['Ratio'])
    sd_asa_perc_median = ratio_std_dev/ ratio.median() * 100


    #annualize the standard dev of returns
    vol = daily_std_dev * math.sqrt(252)  #annualized vol
    period_vol = daily_std_dev * math.sqrt(vol_period) #std devn for the period length chosen

    current_ratio = ratio[-1]
    median_ratio = ratio.median()
    devn_from_median = ((current_ratio - median_ratio) / median_ratio) * 100

    ##### Number of Standard Deviations and Z Score Probability ####
    #num_std_devs = (current_ratio - series_median) / vol #Calculating from Median Not Mean
    num_std_devs = devn_from_median / vol #Calculating from Median Not Mean
    prob_z_score = 100 - st.norm.cdf(abs(num_std_devs)) * 100

    #### Probability of Exceeding Set Loss %
    annual_stop_loss_sds = loss_limit / vol  #the number of standard deviations to reach the stop loss limit
    annual_prob_stop_loss = 100 - st.norm.cdf(abs(annual_stop_loss_sds)) * 100
    period_stop_loss_sds = loss_limit / period_vol  #the number of standard deviations to reach the stop loss limit
    period_prob_stop_loss = 100 - st.norm.cdf(abs(period_stop_loss_sds)) * 100

    #### Position Size Adjustment to Limit Dollar Loss to Target Stop Loss with 82% Confidence
    # 1 Std deviation = 68% Confidence = 1- 18% = 82% Confidence given only worried about one side of the curve
    annual_size_adjustment_multiple = loss_limit / vol
    period_size_adjustment_multiple = loss_limit / period_vol

    return vol, num_std_devs, sd_asa_perc_median, prob_z_score, loss_limit, annual_prob_stop_loss, \
    annual_size_adjustment_multiple, period_prob_stop_loss, period_size_adjustment_multiple

## Function to Plot Price Ratios vs Mean

In [29]:
##Plotting Price of One ticker divided by Price of Second Ticker vs the average of that
# Shows whether the ratio is mean reverting.
def plot_ratios_v_mean(series1, series2, filtered_ratio):

    start_date = str(series1.index[0].day) + "-" + str(series1.index[0].month)  + "-" + str(series1.index[0].year)  
    end_date = str(series1.index[-1].day) + "-" + str(series1.index[-1].month) + "-" + str(series1.index[-1].year)
    
    ratio = series1/series2

    medn = np.round((series1/series2).median(),4)
    minimum = np.round((series1/series2).min(),4)
    maximum = np.round((series1/series2).max(),4)
    
    current = np.round(series1[-1]/ series2[-1],4)
    
    trace1 = go.Scatter(x=list(series1.index),
                   y=list(ratio),
                   mode='lines',
                   text=ratio,
                   name='Price Ratio')

    trace2 = go.Scatter(x=list(series1.index),
                   y=list(ratio/ratio * medn),
                   mode='lines',
                   text=ratio,
                   name='Median Ratio')

    trace3 = go.Scatter(x=list(series1.index),
                   y=list(ratio/ratio * current),
                   mode='lines',
                   text=ratio,
                   name='Current Ratio')

    trace4 = go.Scatter(x=list(series1.index),
                   y=list(ratio/ratio * minimum),
                   mode='lines',
                   text=ratio,
                   name='Min Ratio')
    
    trace5 = go.Scatter(x=list(series1.index),
                   y=list(ratio/ratio * maximum),
                   mode='lines',
                   text=ratio,
                   name='Max Ratio') 
    
    data=[trace1, trace2, trace3, trace4, trace5]
    
    layout = go.Layout(title=series1.name + " vs " + series2.name + "   "  + start_date + " to " + end_date )

    figure = go.Figure(data=data, layout=layout)
    
    if apply_noise_filter:
        
        figure.add_trace(go.Scatter(x=list(filtered_ratio.index), y=list(filtered_ratio), mode='lines', name='Filtered Ratio',
                                    text=filtered_ratio))

    f1 = go.FigureWidget(figure)
    f1.show()
    

# Run Functions

In [30]:
ticker1 = ticker_list[0][0]
ticker2 = ticker_list[1][0]

In [31]:
#Use the Close or the Adj Close
close_or_adj_close = 'Adj Close'
      
if close_or_adj_close == 'Close':
  price_to_use = 'Stock USD Close'
if close_or_adj_close == 'Adj Close':
  price_to_use = 'Stock USD Adj Close'

## Calculate Number of Crosses

In [32]:
last_train_date = datetime.today().date()
#last_train_date = '2019-12-31'
last_train_date = pd.to_datetime(last_train_date, format='%Y-%m-%d')
train_period_length = timedelta(days = 365)

# Training Period
train_start_date = last_train_date - timedelta(days = 365)
train_end_date = last_train_date

print('Training Period: ',train_start_date, 'to', train_end_date)

Training Period:  2022-08-19 00:00:00 to 2023-08-19 00:00:00


In [33]:
median_duration = 2 #Use the 1, 2, 5 or 10 year median to check deviations against and set as trading reversion point

#Run Prob of Crossing Again Function
results_df = prob_crossing_again(ticker_df,
                      ticker1, ticker2,
                      train_start_date, train_end_date,
                      median_duration)

results_df['Av Corr'] = (results_df['1 Yr Corr'] + results_df['2 Yr Corr'] + results_df['5 Yr Corr'] + 
                         results_df['10 Yr Corr'])  / 4

results_df['Median P-Val'] = results_df.loc[:,'1 Yr P-Val':'10 Yr P-Val'].median(axis=1)

results_df


WAVELET: sym8  LEVEL: 1

Number of records trimmed: 14

WAVELET: sym8  LEVEL: 1

Number of records trimmed: 14

WAVELET: sym8  LEVEL: 1

Number of records trimmed: 0

WAVELET: sym8  LEVEL: 1

Number of records trimmed: 0

WAVELET: sym8  LEVEL: 1

Number of records trimmed: 16


Unnamed: 0,Ticker1,Ticker2,Train Median Cross Count,Days Since Last TRAIN Cross,Current Ratio,Median Ratio,% Deviation From Median,% Deviation From 5Yr Median,Train Start Date,Train End Date,...,10 Yr Corr,1 Yr P-Val,2 Yr P-Val,3 Yr P-Val,5 Yr P-Val,10 Yr P-Val,Cross Freq Count,Cross Freq %,Av Corr,Median P-Val
0,LVS,1928.HK,18,1,16.363493,15.468538,5.785643,24.635485,2022-08-19,2023-08-19,...,0.894188,0.032634,0.479161,0.434909,0.302499,0.067833,5,83.333333,0.905335,0.302499


## Run Graphs

In [34]:
start_dates = ['2002-07-01', '2010-01-01','2015-01-01', 
               '2019-01-01','2022-01-01', '2023-01-01']

start_dates = ['2005-01-01', '2010-01-01','2021-05-01', 
               '2022-01-01','2022-05-01', '2023-01-01']

end_date = '2030-01-01'
#end_date = '2018-04-01'

In [35]:
#results_df.columns

In [36]:
"""
results_df[['Ticker1', 'Ticker2', 'Train Median Cross Count',
       'Days Since Last TRAIN Cross', 'Current Ratio', 'Median Ratio',
       '% Deviation From Median', '% Deviation From 5Yr Median',
       'Train Start Date', 'Train End Date', 
       'Cross Freq %', 'Av Corr', 'Median P-Val',
        #'1 Yr Corr', '2 Yr Corr','3 Yr Corr', '5 Yr Corr', '10 Yr Corr', 
        '1 Yr P-Val', '2 Yr P-Val','3 Yr P-Val', '5 Yr P-Val', '10 Yr P-Val', 
        'Cross Freq Count']]
"""

"\nresults_df[['Ticker1', 'Ticker2', 'Train Median Cross Count',\n       'Days Since Last TRAIN Cross', 'Current Ratio', 'Median Ratio',\n       '% Deviation From Median', '% Deviation From 5Yr Median',\n       'Train Start Date', 'Train End Date', \n       'Cross Freq %', 'Av Corr', 'Median P-Val',\n        #'1 Yr Corr', '2 Yr Corr','3 Yr Corr', '5 Yr Corr', '10 Yr Corr', \n        '1 Yr P-Val', '2 Yr P-Val','3 Yr P-Val', '5 Yr P-Val', '10 Yr P-Val', \n        'Cross Freq Count']]\n"

In [37]:
print(stock_prices_df[stock_prices_df['Ticker']==ticker1]['Stock Adj Close'].values[-1])
print(stock_prices_df[stock_prices_df['Ticker']==ticker2]['Stock Adj Close'].values[-1])


53.34000015258789
26.049999237060547


In [38]:
stock_prices_df.columns

Index(['Date', 'Stock Close', 'Stock Adj Close', 'Ticker', 'Currency', 'SGD',
       'AUD', 'JPY', 'EUR', 'USD', 'HKD', 'Stock USD Close',
       'Stock USD Adj Close'],
      dtype='object')

In [39]:
for start_date in start_dates:
    
    print("\n----------------------------------------")

    indiv_df = stock_prices_df[stock_prices_df['Date'] > start_date].copy()
    indiv_df = indiv_df[indiv_df['Date'] < end_date].copy()
    indiv_df.set_index('Date', inplace=True)

    series1, series2 = create_series(ticker1, ticker2, indiv_df)
        
    if apply_noise_filter:
        
        filtered_ratio, filtered_s1, filtered_s2 = wavelet_transform_func(series1, series2)
        print(f"Unfiltered Median: {(series1/series2).median():0.2f} Filtered: {filtered_ratio.median():0.2f}")
        print(f"Unfiltered Max: {(series1/series2).max():0.2f} Filtered: {filtered_ratio.max():0.2f}")
        
    else:
        filtered_ratio = series1/ series2 
        print('Filter Not Applied')

    plot_ratios_v_mean(series1, series2, filtered_ratio) 

    # print the pvalue & correlation
    score, pvalue = pvalue_calc(series1,series2)
    pair_corr = series1.corr(series2)
    print('pvalue: ', np.round(pvalue,4), 'Correlation: ', np.round(pair_corr,4))
    if apply_noise_filter:
        filtered_score, filtered_pvalue = pvalue_calc(filtered_s1,filtered_s2)
        filtered_corr = filtered_s1.corr(filtered_s2)
        print('filtered_pvalue: ', np.round(filtered_pvalue,4), 'Correlation: ', np.round(filtered_corr,4))

    
    
    # print the results from the adfuller stationarity test
    ratio_series = series1/ series2
    adf = adfuller(ratio_series, maxlag=1)
    print('ADF Results')
    print(adf)

    current_ratio = series1[-1]/ series2[-1]
    median_ratio = (series1/series2).median()
    diff = (current_ratio-median_ratio) / median_ratio * 100

    print('Median: ', np.round(median_ratio,4))
    print('Current Ratio: ', np.round(current_ratio,4))
    print('Current Diff Vs Median: ',  np.round(diff,3), '%')
    
    last_price_1 = stock_prices_df[stock_prices_df['Ticker']==ticker1][price_to_use].values[-1].round(2)
    last_price_2 = stock_prices_df[stock_prices_df['Ticker']==ticker2][price_to_use].values[-1].round(2)
    print(f"Last prices used in USD Terms: {ticker1} {last_price_1} {ticker2} {last_price_2}") 

    #print the vol and number of standard deviations divergence
    vol, num_std_devs, sd_asa_perc_median, prob_z_score, loss_limit, annual_prob_stop_loss, annual_size_adjustment_multiple, \
    period_prob_stop_loss, period_size_adjustment_multiple = \
    calculate_vol(series1, series2, vol_period, loss_limit)

    print(vol_period, 'Day Historical Volality', np.round(vol,2),'%')
    print(f"Std Devn of Ratio as a % of the Ratio Median: {sd_asa_perc_median:0.2f}%")
    print('Probability of stop loss (', loss_limit,'%) being exceeded within', vol_period,'days', np.round(period_prob_stop_loss,2),'%')
    print('Size Adjustment Multiple for 82% Confidence at Stop Loss Level of', loss_limit,'% =', np.round(period_size_adjustment_multiple,3))

results_df   #display crossing results


----------------------------------------

WAVELET: sym8  LEVEL: 1

Number of records trimmed: 2
Unfiltered Median: 13.01 Filtered: 13.00
Unfiltered Max: 24.27 Filtered: 24.26


pvalue:  0.0078 Correlation:  0.909
filtered_pvalue:  0.0018 Correlation:  0.9098
ADF Results
(-4.1480949422049544, 0.0008056551413004422, 1, 3296, {'1%': -3.4323355582984356, '5%': -2.8624173022673167, '10%': -2.5672370061423377}, 3941.5710999730736)
Median:  13.0124
Current Ratio:  15.8689
Current Diff Vs Median:  21.952 %
Last prices used in USD Terms: LVS 53.34 1928.HK 3.36
252 Day Historical Volality 51.42 %
Std Devn of Ratio as a % of the Ratio Median: 10.36%
Probability of stop loss ( 10 %) being exceeded within 252 days 42.29 %
Size Adjustment Multiple for 82% Confidence at Stop Loss Level of 10 % = 0.194

----------------------------------------

WAVELET: sym8  LEVEL: 1

Number of records trimmed: 11
Unfiltered Median: 13.02 Filtered: 13.01
Unfiltered Max: 24.27 Filtered: 24.26


pvalue:  0.0071 Correlation:  0.9082
filtered_pvalue:  0.0009 Correlation:  0.9088
ADF Results
(-4.126958432151621, 0.0008737843610341803, 1, 3273, {'1%': -3.43234952216411, '5%': -2.862423470012863, '10%': -2.567240289714035}, 3918.9006003996865)
Median:  13.0211
Current Ratio:  15.8689
Current Diff Vs Median:  21.87 %
Last prices used in USD Terms: LVS 53.34 1928.HK 3.36
252 Day Historical Volality 51.42 %
Std Devn of Ratio as a % of the Ratio Median: 10.35%
Probability of stop loss ( 10 %) being exceeded within 252 days 42.29 %
Size Adjustment Multiple for 82% Confidence at Stop Loss Level of 10 % = 0.194

----------------------------------------

WAVELET: sym8  LEVEL: 1

Number of records trimmed: 6
Unfiltered Median: 15.93 Filtered: 16.01
Unfiltered Max: 21.91 Filtered: 21.62


pvalue:  0.3214 Correlation:  0.8802
filtered_pvalue:  0.2392 Correlation:  0.8813
ADF Results
(-3.8314810563878656, 0.0026011560882547025, 1, 548, {'1%': -3.442339408568417, '5%': -2.866828612425054, '10%': -2.5695866531248335}, 1023.0816036010577)
Median:  15.9318
Current Ratio:  15.8689
Current Diff Vs Median:  -0.395 %
Last prices used in USD Terms: LVS 53.34 1928.HK 3.36
252 Day Historical Volality 51.42 %
Std Devn of Ratio as a % of the Ratio Median: 8.46%
Probability of stop loss ( 10 %) being exceeded within 252 days 42.29 %
Size Adjustment Multiple for 82% Confidence at Stop Loss Level of 10 % = 0.194

----------------------------------------

WAVELET: sym8  LEVEL: 1

Number of records trimmed: 4
Unfiltered Median: 16.16 Filtered: 16.12
Unfiltered Max: 21.91 Filtered: 21.62


pvalue:  0.0318 Correlation:  0.9535
filtered_pvalue:  0.0089 Correlation:  0.9553
ADF Results
(-4.415900617156722, 0.0002781788599180132, 1, 386, {'1%': -3.447405233596701, '5%': -2.8690569369014605, '10%': -2.5707743450830893}, 683.5107309787713)
Median:  16.1609
Current Ratio:  15.8689
Current Diff Vs Median:  -1.807 %
Last prices used in USD Terms: LVS 53.34 1928.HK 3.36
252 Day Historical Volality 51.42 %
Std Devn of Ratio as a % of the Ratio Median: 8.34%
Probability of stop loss ( 10 %) being exceeded within 252 days 42.29 %
Size Adjustment Multiple for 82% Confidence at Stop Loss Level of 10 % = 0.194

----------------------------------------

WAVELET: sym8  LEVEL: 1

Number of records trimmed: 23
Unfiltered Median: 16.21 Filtered: 16.14
Unfiltered Max: 21.91 Filtered: 21.62


pvalue:  0.0921 Correlation:  0.951
filtered_pvalue:  0.1234 Correlation:  0.9464
ADF Results
(-3.929405637846494, 0.0018287857958748188, 0, 310, {'1%': -3.451621854687657, '5%': -2.870908950689806, '10%': -2.571761810613944}, 521.1079507316439)
Median:  16.2117
Current Ratio:  15.8689
Current Diff Vs Median:  -2.115 %
Last prices used in USD Terms: LVS 53.34 1928.HK 3.36
252 Day Historical Volality 51.42 %
Std Devn of Ratio as a % of the Ratio Median: 8.31%
Probability of stop loss ( 10 %) being exceeded within 252 days 42.29 %
Size Adjustment Multiple for 82% Confidence at Stop Loss Level of 10 % = 0.194

----------------------------------------

WAVELET: sym8  LEVEL: 1

Number of records trimmed: 21
Unfiltered Median: 15.94 Filtered: 16.11
Unfiltered Max: 19.26 Filtered: 19.06


pvalue:  0.0527 Correlation:  0.1469
filtered_pvalue:  0.3912 Correlation:  0.234
ADF Results
(-2.690646828342483, 0.07567080783715649, 1, 147, {'1%': -3.4756368462466662, '5%': -2.8814104466172608, '10%': -2.5773652982553568}, 161.68530310200748)
Median:  15.9388
Current Ratio:  15.8689
Current Diff Vs Median:  -0.438 %
Last prices used in USD Terms: LVS 53.34 1928.HK 3.36
252 Day Historical Volality 42.24 %
Std Devn of Ratio as a % of the Ratio Median: 6.04%
Probability of stop loss ( 10 %) being exceeded within 252 days 40.64 %
Size Adjustment Multiple for 82% Confidence at Stop Loss Level of 10 % = 0.237


Unnamed: 0,Ticker1,Ticker2,Train Median Cross Count,Days Since Last TRAIN Cross,Current Ratio,Median Ratio,% Deviation From Median,% Deviation From 5Yr Median,Train Start Date,Train End Date,...,10 Yr Corr,1 Yr P-Val,2 Yr P-Val,3 Yr P-Val,5 Yr P-Val,10 Yr P-Val,Cross Freq Count,Cross Freq %,Av Corr,Median P-Val
0,LVS,1928.HK,18,1,16.363493,15.468538,5.785643,24.635485,2022-08-19,2023-08-19,...,0.894188,0.032634,0.479161,0.434909,0.302499,0.067833,5,83.333333,0.905335,0.302499


# SIZE OF TRADE

In [40]:
series1[-1]


53.34000015258789

In [41]:
#Input Desired Position Dollar Size
pair_size = 10000

In [42]:
#Set Direction
if results_df.loc[0,'% Deviation From Median'] < 0:
    ticker1_direction = 'BUY'
    ticker2_direction = 'SELL'
else:
    ticker1_direction = 'SELL'
    ticker2_direction = 'BUY'   
    

In [43]:
ticker1_price = series1[-1]
ticker2_price = series2[-1]

ticker1_qty = pair_size /ticker1_price
ticker2_qty = pair_size /ticker2_price

print(ticker1_direction, int(np.round(ticker1_qty,0)), 'shares of', ticker1 )
print(ticker2_direction, int(np.round(ticker2_qty,0)), 'shares of', ticker2 )


SELL 187 shares of LVS
BUY 2975 shares of 1928.HK
