In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df: pd.DataFrame = pd.read_csv('cac40.csv')
df.dtypes

In [None]:
df["Close"].plot()
plt.show()

In [None]:
diff = np.diff(df["Close"])
fig, ax = plt.subplots(figsize=(20,5))
df["Close"].plot(x="Date", ax=ax)
events = np.argwhere(diff < -150).ravel().astype(float)
plt.vlines(events, 0, df["Close"].max(), color="red", zorder=10)
plt.show()

In [None]:
from tick.hawkes import HawkesExpKern

In [None]:
betas = np.logspace(-4, 0, 100)
scores = []
for beta in betas:
    process = HawkesExpKern(beta).fit([events])
    scores.append(process.score([events]))
plt.semilogx(betas, scores)
plt.show()
best_score = np.argmax(scores)
print(f"Best beta: {betas[best_score]}")
process = HawkesExpKern(betas[best_score]).fit([events])
process.fit([events])
print(f"Best mu and alpha/beta: {process.coeffs[0], process.coeffs[1]}")
print(process.score([events]))
intens, x_steps = process.estimated_intensity([events], 1)
intens = intens[0]
print(intens.shape, x_steps.shape)
fig, ax = plt.subplots(figsize=(20,5))
plt.plot(x_steps, intens)
plt.vlines(events, 0, intens.max(), color="red", alpha=.1, zorder=10)
plt.show()

In [None]:
# Visual goodness test based on https://pat-laub.github.io/pdfs/honours_thesis.pdf
cum_intens = np.cumsum(intens)  # compute cumulative intensity
fig, ax = plt.subplots(figsize=(20,5))
plt.plot(x_steps, cum_intens)
plt.vlines(events, 0, cum_intens.max(), color="red", alpha=.1, zorder=10)
plt.show()

In [None]:
from scipy.stats import expon
def cum_intens_func(t):
    return cum_intens[np.searchsorted(x_steps, t-1)]
events_transformed = cum_intens_func(events)  # this should be a Poisson process with intensity 1
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(20,5))
ax1.scatter(events_transformed, np.arange(1, len(events_transformed)+1))
ax1.plot([0, events_transformed.max()], [0, events_transformed.max()], color="red", linestyle="dashed")  # this should align on the line y=x (goodness of fit)
ax2.scatter(expon.cdf(np.diff(events_transformed)), expon.cdf(np.diff(np.roll(events_transformed, 1))))  # this should be uniformly distributed (independence of arrival times)
plt.show()
# same plots but directly for the Hawkes process (to show that it doesn't fit)
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(20,5))
ax1.scatter(events, np.arange(1, len(events)+1))
ax2.scatter(expon.cdf(np.diff(events)), expon.cdf(np.diff(np.roll(events, 1))))
plt.show()