In [1]:
# install packages and libraries
%matplotlib inline
%matplotlib widget

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#plt.rcParams["figure.figsize"] = (11, 5)  #set default figure size

plt.rcParams["figure.figsize"] = (16, 9) #set default figure size (w, h) 
plt.style.use("ggplot")
import numpy.matlib
import copy
import scipy.sparse as sparse
from numpy.random import default_rng

In [2]:
import sys # importing sys
  
# adding Latest_scripts to the system path
sys.path.insert(0, '../../')
sys.path.insert(0, '../../SteveMorseCode/hawkes-master/')

import HP_scripts as HP # import module containing functions for the Masters project
import MHP as MHP # import module containing EM functions from Steve Morse for the Masters project

In [11]:
from scipy import stats

In [4]:
# Read data
# Import the data as a dataframe (2D data structure with labelled axes)

df = pd.read_csv('../financial_data/dataset-djia-2018-subset2.csv')
dates = pd.to_datetime(df['Date']) # set dates as the Date of closing price column


del df['Date'] # delete Date column

# Fill missing values
df.ffill(inplace=True)
df

Unnamed: 0,AABA,AAPL,AMZN,AXP,BA,CAT,CSCO,CVX,DIS,GE,...,MSFT,NKE,PFE,PG,TRV,UNH,UTX,VZ,WMT,XOM
0,40.91,10.68,47.58,52.58,70.44,57.80,17.45,59.08,24.40,35.37,...,26.84,10.74,23.78,58.78,45.99,61.73,56.53,30.38,46.23,58.47
1,40.97,10.71,47.25,51.95,71.17,59.27,17.85,58.91,23.99,35.32,...,26.97,10.69,24.55,58.89,46.50,61.88,56.19,31.27,46.32,58.57
2,41.53,10.63,47.65,52.50,70.33,59.27,18.35,58.19,24.41,35.23,...,26.99,10.76,24.58,58.70,46.95,61.69,55.98,31.63,45.69,58.28
3,43.21,10.90,47.87,52.68,69.35,60.45,18.77,59.25,24.74,35.47,...,26.91,10.72,24.85,58.64,47.21,62.90,56.16,31.35,45.88,59.43
4,43.42,10.86,47.08,53.99,68.77,61.55,19.06,58.95,25.00,35.38,...,26.86,10.88,24.85,59.08,47.23,61.40,56.80,31.48,45.71,59.40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3015,71.58,175.01,1168.36,98.74,295.10,155.75,38.55,124.98,108.67,17.50,...,85.51,63.29,36.14,92.13,134.39,220.00,127.23,53.19,98.21,83.97
3016,69.86,170.57,1176.76,98.57,295.36,156.44,38.48,125.98,108.12,17.43,...,85.40,63.65,36.21,92.48,134.78,219.60,127.14,53.22,99.16,83.98
3017,70.06,170.60,1182.26,99.13,295.62,157.52,38.56,125.55,107.64,17.38,...,85.71,62.95,36.33,92.10,134.77,220.42,127.58,53.28,99.26,83.90
3018,69.82,171.08,1186.10,99.70,296.35,158.42,38.59,125.58,107.77,17.36,...,85.72,62.95,36.37,92.07,135.66,222.77,128.12,53.43,99.40,84.02


In [5]:
# Google and amazon are not in DJIA list

ten_companies = ['AAPL','MSFT', 'JPM', 'GS', 'PFE', 'MRK','NKE', 'HD',  'GOOGL','AMZN']


ticker_id = [] # collect integer marker of each stock, index correspodns to stock AABA, 1 correponds to stock AAPL
ticker = []
ticker_dict = {} # collect key-value pairs where key represents tikcer of a stock and it's corresponding value an integer marker


for u,col in enumerate(df[ten_companies]):
    ticker_dict[col] = u
    ticker_id += [u]
    ticker += [col]

### Collect relevant data such as event times (largest $10 \%$ price drop, only include days were return was below 10% quantile), markers,...

In [13]:
# Collect event times

t_i=[]
u_i=[] # collect marker for each event times

ticker_id = [] # collect integer marker of each stock, index correspodns to stock AABA, 1 correponds to stock AAPL
ticker = []
ticker_dict = {} # collect key-value pairs where key represents tikcer of a stock and it's corresponding value an integer marker


for u,col in enumerate(df[ten_companies]):
    Tdiff = df[col].diff()
    timestamps=dates[Tdiff<=Tdiff.quantile(0.1)] # return timestamps where subsequent price difference was less than 10% quantile
    t_i.extend((timestamps - pd.Timestamp(2006,1,3)).dt.days.astype(float)) # measure time 
    u_i.extend(np.repeat(u,len(timestamps)))
    ticker_dict[col] = u
    ticker_id += [u]
    ticker += [col]

seed = 1000
np.random.seed(seed)

t_i = np.array(t_i)/30
#t_i=np.array(t_i)+np.random.rand(len(t_i))
u_i=np.array(u_i)
perm = np.argsort(t_i)
t_i = t_i[perm] # superposed timestamps
u_i = u_i[perm]

In [6]:
tstamps_ten_univ = []
tstamps_ten_univ_dates = []

count = 0
for i in ticker:
    tdiff_i = df[i].diff()
    tstamps_ten_univ_dates += [dates[tdiff_i<=tdiff_i.quantile(0.1)]]
    tstamps_i = (tstamps_ten_univ_dates[count] - pd.Timestamp(2006,1,3)).dt.days.astype(float) # change time units from days to months
    tstamps_ten_univ += [tstamps_i]
    count += 1

In [7]:
ts_i_with_dates = []
for i in range(len(ten_companies)):
    ts_i_with_dates += [pd.DataFrame({'Date': tstamps_ten_univ_dates[i].values, 'event time': tstamps_ten_univ[i].values})]
    #ts_i_with_dates[i].to_csv('ten_stocks_event_times/'+ten_companies[i]+'_ts_with_dates.csv', index=None)

## Use Kolmogorov-Smirnov test to check if the 10 univariates follow homogenous Poisson process

In [14]:
stats_res = []
counter = 0
print("Kolmogorov-Smirnov test to check if the 10 univariates follow homogenous Poisson process")

for i in ticker:
    delta_t = np.diff(t_i[np.where(u_i == ticker_dict[i])])
    t = np.cumsum(delta_t) # cumulative sum of the waiting times
    sample = t/t.max()
    sample = np.array(sample, dtype=float)
    stats_res += [stats.kstest(sample, 'uniform')]
    print("")
    print("K-S test result of "+i+':')
    print(f'p-value =',stats_res[counter][1])
    counter += 1

Kolmogorov-Smirnov test to check if the 10 univariates follow homogenous Poisson process

K-S test result of AAPL:
p-value = 1.8534496185598036e-16

K-S test result of MSFT:
p-value = 3.6652955202297536e-05

K-S test result of JPM:
p-value = 4.532156439816381e-08

K-S test result of GS:
p-value = 1.6148943786216034e-16

K-S test result of PFE:
p-value = 0.00889706899897158

K-S test result of MRK:
p-value = 0.0019518544859520599

K-S test result of NKE:
p-value = 1.227447209122274e-24

K-S test result of HD:
p-value = 1.6394194151157448e-21

K-S test result of GOOGL:
p-value = 5.0440849372493556e-17

K-S test result of AMZN:
p-value = 8.769669112830127e-35


## The results (i.e.,very small p-values) conclude that all timestamps do not come from a homogeneous Poisson process, meaning they come from non-homogenneous Poisson processes.