In [15]:
import numpy as np
from sklearn.cluster import KMeans
%run finance_data.ipynb
from datetime import date

In [16]:
class HMM:
    def __init__(self, N=4, K=50, M=5, D=10, H=50, RANGE=4.0/100, T=100, max_iterations=100, eps=1e-4):
        self.N = N
        self.K = K
        self.M = M
        self.D = D
        self.H = H
        self.RANGE = RANGE
        self.T = T
        self.max_iterations = max_iterations
        self.eps = eps

        self.Pi = np.random.randint(100, size=self.N)
        self.Pi = self.Pi / sum(self.Pi)
        self.A = np.array([np.random.randint(100, size=self.N) for i in range(self.N)]) / 99
        self.A = np.array([row / sum(row) for row in self.A])
        self.B = np.zeros([self.N, self.H])
        self.alpha = np.zeros([self.N, self.T])
        self.beta = np.zeros([self.N, self.T])
        self.gamma = np.zeros([self.N, self.T])
        self.xi = np.zeros([self.N, self.N, self.T])

        self.Data = np.zeros(self.T)
        self.O = np.zeros(self.T)
        self.O_test = self.O[-self.D:]

        self.deltas = np.ones(self.max_iterations)
        self.posteriors = np.zeros(self.H)

    def FracChange(self, index_of_frac_change):
        return (index_of_frac_change-(self.H/2))/(self.H/2)*self.RANGE

    def index(self, y):
        return np.clip(int(y/(self.RANGE)*(self.H/2) + (self.H/2)), 0, self.H-1)

    def indicatorK(self, y, k):
        return int(y==k)

    def multivariate3_normal(self, mu, cov, X):
        diff = (X - mu)
        numerator = np.exp(-(1/2) * diff.dot(np.linalg.inv(cov)).dot(diff))
        denominator = np.sqrt((2 * np.pi)**3 * np.abs(np.linalg.det(cov)))
        return numerator / denominator

    def normal(self, mu, std, X):
        diff = X - mu
        numerator = np.exp(-(1/2) * np.square(diff) / np.square(std))
        denominator = np.sqrt(2 * np.pi * np.square(std))
        return numerator / denominator

    def loadData(self, data, mode="intra", reinitialize=True):
        if mode=="inter":
            self.Data = data.getOInterDay()
        else:
            self.Data = data.getO()
        self.O = self.Data
        self.T = len(self.O)
        if reinitialize:
            self.initializeB()

    def initializeB(self):
        self.O = self.O * 100
        kmeans = KMeans(n_clusters=self.M, random_state=0, n_init="auto")
        kmeans.fit(np.array([[frac_change] for frac_change in self.O]))
        clusters = [ [y[1] for y in zip(kmeans.labels_, self.O) if y[0]==m] for m in range(self.M)]
        mu = [np.mean(cluster, axis=0) for cluster in clusters]
        i = 0
        while i < self.M:
            if len(clusters[i]) == 1:
                index_min = np.argmin([np.abs(mu[i] - clusters[i])])
                clusters[index_min].append(clusters.pop(i)[0])
                mu.pop(i)
                i -= 1
                self.M -= 1
            i += 1
        cov = [np.std(cluster) for cluster in clusters]

        self.B = np.array([\
                    [\
                        (1/(100 * self.M)) * sum( [self.normal(mu[m], cov[m], self.FracChange(frac_change))\
                                                    for m in range(self.M)] )
                    for frac_change in range(50)\
                ] for i in range(self.N)])

        self.O = self.O / 100
        self.clusters = clusters
        self.mu = mu
        self.cov = cov
        

    def forwards(self):
        self.alpha = np.zeros([self.N, self.T])
        self.alpha[:, 0] = [self.Pi[i] * self.B[i][self.index(self.O[0])] for i in range(self.N)]
        #print(f"forwards first self.alpha column: {self.alpha[:, 0]}")
        for t in range(1, self.T):
            temp = np.array([self.B[i][self.index(self.O[t])] * np.dot(self.alpha[:, t-1], self.A[:, i]) for i in range(self.N)])
            #print(f"self.B[0][self.index(self.O[t])]: {self.B[0][self.index(self.O[t])]}")
            #print(f"self.alpha[:, t-1]: {self.alpha[:, t-1]}")
            #print(f"self.A[:, 0]: {self.A[:, 0]}")
            #print(f"self.alpha column at time {t}: {temp}")
            self.alpha[:, t] = temp
        return self.alpha

    def backwards(self):
        self.beta = np.zeros([self.N, self.T])
        self.beta[:, self.T-1] = np.ones(self.N)
        for t in range(self.T-2, -1, -1):
            self.beta[:, t] = np.array([np.dot(np.multiply(self.beta[:, t+1], self.A[i, :]), self.B[:, self.index(self.O[t+1])]) for i in range(self.N)])
        return self.beta

    def calculateGamma(self):
        #print(f"numerators: {[[self.alpha[i][t] * self.beta[i][t] for t in range(self.T)] for i in range(self.N)]}")
        #print(f"np.dot thing: {[np.dot(self.alpha[:, t], self.beta[:, t]) for t in range(self.T)]}")
        return np.array( [[self.alpha[i][t] * self.beta[i][t] / np.dot(self.alpha[:, t], self.beta[:, t]) for t in range(self.T)] for i in range(self.N)] )

    def calculateXi(self):
        return np.array( [np.array( [self.calculateXiAtIJ(i, j) for j in range(self.N)] ) for i in range(self.N)] )

    def calculateXiAtIJ(self, i, j):
        return np.array( [self.alpha[i][t] * self.A[i][j] * self.beta[j][t+1] * self.B[j][self.index(self.O[t+1])] / self.doubleSum(t) for t in range(self.T-1)] )

    def doubleSum(self, t):
        return np.sum( [[self.alpha[k][t] * self.A[k][w] * self.beta[w][t+1] * self.B[w][self.index(self.O[t+1])] for w in range(self.N)] for k in range(self.N)] )

    def iterateA(self):
        temp = np.array( [ [sum(self.xi[i, j, :]) / sum(self.gamma[i, :-1]) for j in range(self.N)] for i in range(self.N)] )
        return temp

    def iterateB(self):
        #print(f"numerator: {[[self.indicatorK(self.index(self.O[t]), frac_change) * self.gamma[0][t] for t in range(self.T)] for frac_change in range(50)]} ")
        return np.array(\
            [[sum([self.indicatorK(self.index(self.O[t]), frac_change) * self.gamma[i][t] for t in range(self.T)]) / sum(self.gamma[i, :])\
                    for frac_change in range(self.H)]\
                    for i in range(self.N)]\
            )

    def BaumWelch(self):
        self.O = self.Data
        self.T = len(self.O)
        self.alpha = self.forwards()
        self.beta = self.backwards()
        self.gamma = self.calculateGamma()
        self.xi = self.calculateXi()

        iteration = 1
        deltas = np.ones(self.max_iterations)
        deltas[0] = np.inf
        while iteration < self.max_iterations and deltas[iteration-1] > self.eps:
            #print(f"iteration: {iteration}")
            #print(f"deltas: {deltas}")

            #if iteration % 10 == 0:
                #print(f"self.A: {self.A}")
                #print(f"self.B: {self.B}")
                #print(f"self.gamma: {self.gamma}")
                #print(f"self.xi: {self.xi}")
        
            Pi_star = self.gamma[:, 0]
            A_star = self.iterateA()
            B_star = self.iterateB()

            delta = max(np.linalg.norm(A_star - self.A), np.linalg.norm(B_star - self.B))
            #print(f"np.linalg.norm(A_star - self.A): {np.linalg.norm(A_star - self.A)}")
            #print(f"np.linalg.norm(B_star - self.B): {np.linalg.norm(B_star - self.B)}")
            self.Pi  = Pi_star
            self.A = A_star
            self.B = B_star

            self.alpha = self.forwards()
            self.beta = self.backwards()
            self.gamma = self.calculateGamma()
            self.xi = self.calculateXi()

            deltas[iteration] = delta
            iteration += 1

        return (Pi_star, A_star, B_star, self.gamma)

    def calculatePosterior(self):
        self.alpha = self.forwards()
        return sum(self.alpha[:, -1])

    def MAP(self, O_test=None):
        if O_test is None:
            self.O_test = np.copy(self.Data[-self.D:])
        else:
            self.O_test = np.copy(O_test)
        self.O = self.O_test
        #print(f"MAP data: {self.O}")
        self.O = np.append(self.O, [0])
        self.T = len(self.O)
        best = 0
        best_posterior = -1
        for frac_change in range(self.H):
            self.O[-1] = self.FracChange(frac_change)
            posterior = self.calculatePosterior()
            self.posteriors[frac_change] = posterior
            #print(f"self.FracChange(frac_change)/100: {self.FracChange(frac_change)}")
            #print(f"posterior: {posterior}")
            if posterior > best_posterior:
                best = frac_change
                best_posterior = posterior

        return (self.FracChange(best), best_posterior)


In [17]:
# Make two models. One "inter" day and one "intra" day. Get MAP for both to get the percentage change estimates.
# The "inter" day gives the opening price for next day
# The "intra" day gives the closing price for next day

In [18]:
def train():
    max_iterations = 15
    avg_1 = 0
    avg_2 = 0
    data = Polygon()
    data.getPrices(end_date="2023-04-21")
    print(f"last 10: {data.response[-10:]}")
    models_1 = []
    models_2 = []
    while max_iterations > 0:
        model_1 = HMM()
        model_2 = HMM()
        model_1.loadData(data, mode="inter")
        model_2.loadData(data)
        model_1.BaumWelch()
        model_2.BaumWelch()
        pred_1 = model_1.MAP()
        pred_2 = model_2.MAP()
        if pred_1[1] != 0:
            avg_1 += pred_1[0]
            models_1.append(model_1)
        if pred_2[1] != 0:
            avg_2 += pred_2[0]
            models_2.append(model_2)
        max_iterations -= 1

    #print([model.MAP() for model in models_1])
    
    avg_1 /= len(models_1)
    avg_2 /= len(models_2)
    return {"models_1": models_1, "models_2": models_2, "avg_1": avg_1, "avg_2": avg_2}

def __train(start_date="2023-01-09", end_date="2023-04-21"):
    max_iterations = 15
    best_1 = 0.0
    best_2 = 0.0
    best_model_1 = None
    best_model_2 = None
    data = Polygon()
    data.getPrices(start_date=start_date, end_date=end_date)
    while max_iterations > 0:
        model_1 = HMM()
        model_2 = HMM()
        model_1.loadData(data, mode="inter")
        model_2.loadData(data)
        model_1.BaumWelch()
        model_2.BaumWelch()
        pred_1 = model_1.MAP()
        pred_2 = model_2.MAP()
        
        if pred_1[1] != 0 and best_1 < pred_1[1]:
            best_1 = pred_1[1]
            best_model_1 = model_1
        if pred_2[1] != 0 and best_2 < pred_2[1]:
            best_2 = pred_2[1]
            best_model_2 = model_2
        
        max_iterations -= 1

    return (best_model_1, best_model_2)

In [19]:
def test(models_1, models_2, first="2023-04-22", last="2023-05-22"):
    if len(models_1) == 0 or len(models_2) == 0:
        return
    data = Polygon()
    data.getPrices(start_date=first, end_date=last)
    if len(data.response) < 11:
        return
    delta = data.getT()
    Y_hat = [(0,0) for _ in range(delta - 10)]

    #[model.loadData(data, mode="inter") for model in models_1]
    #[model.loadData(data) for model in models_2]

    i = 0
    while i < data.getT() - 10:
        pred_1 = sum([model.MAP(data.getOInterDay()[i: i+10])[0] for model in models_1])/len(models_1)
        pred_2 = sum([model.MAP(data.getO()[i+1: i+11])[0] for model in models_2])/len(models_2)

        print(f"pred_1: {pred_1}")
        print(f"pred_2: {pred_2}")

        pred_open = data.response[i+10][1]*(1 + pred_1)
        pred_close = pred_open*(1 + pred_2)
        Y_hat[i] = (pred_open, pred_close)
        i += 1

    return (Y_hat, data)

In [20]:
result = train()

[(130.465, 130.15), (130.26, 130.73), (131.25, 133.49), (133.88, 133.41), (132.03, 134.76), (134.83, 135.94), (136.815, 135.21), (134.08, 135.27), (135.28, 137.87), (138.12, 141.11), (140.305, 142.53), (140.89, 141.86), (143.17, 143.96), (143.155, 145.93), (144.955, 143), (142.7, 144.29), (143.97, 145.43), (148.9, 150.82), (148.03, 154.5), (152.575, 151.73), (150.64, 154.65), (153.88, 151.92), (153.775, 150.87), (149.46, 151.01), (150.952, 153.85), (152.12, 153.2), (153.11, 155.33), (153.51, 153.71), (152.35, 152.55), (150.2, 148.48), (148.87, 148.91), (150.09, 149.4), (147.11, 146.71), (147.71, 147.92), (147.05, 147.41), (146.83, 145.31), (144.38, 145.91), (148.045, 151.03), (153.785, 153.83), (153.7, 151.6), (152.81, 152.87), (153.559, 150.59), (150.21, 148.5), (147.805, 150.47), (151.28, 152.59), (151.19, 152.99), (152.16, 155.85), (156.08, 155), (155.07, 157.4), (157.32, 159.28), (159.3, 157.83), (158.83, 158.93), (158.86, 160.25), (159.94, 158.28), (157.97, 157.65), (159.37, 160.7

In [21]:
result

{'models_1': [<__main__.HMM at 0x1072d5070>,
  <__main__.HMM at 0x1549175f0>,
  <__main__.HMM at 0x1472e0cb0>,
  <__main__.HMM at 0x1075ac110>,
  <__main__.HMM at 0x1543ad3a0>,
  <__main__.HMM at 0x15494ee40>,
  <__main__.HMM at 0x154970ef0>,
  <__main__.HMM at 0x14695c650>,
  <__main__.HMM at 0x154971430>,
  <__main__.HMM at 0x154971220>,
  <__main__.HMM at 0x1549725a0>,
  <__main__.HMM at 0x154970770>],
 'models_2': [<__main__.HMM at 0x154972ba0>,
  <__main__.HMM at 0x1549704a0>,
  <__main__.HMM at 0x154973290>,
  <__main__.HMM at 0x154972f00>],
 'avg_1': -0.00026666666666666673,
 'avg_2': 0.006}

In [22]:
preds = test(result["models_1"], result["models_2"], first="2023-04-06", last="2023-04-21")

[(162.43, 164.66), (161.42, 162.03), (162.35, 160.8), (161.22, 160.1), (161.63, 165.56), (164.59, 165.21), (165.09, 165.23), (166.1, 166.47), (165.8, 167.63), (166.09, 166.65), (165.05, 165.02)]
pred_1: -0.0002666666666666667
pred_2: 0.006


In [23]:
preds

([(164.97599466666668, 165.96585063466668)], <__main__.Polygon at 0x154972b40>)

In [199]:
preds[1].getPrices(start_date="2023-04-24", end_date="2023-05-22")

[(165, 165.33), (165.19, 163.77), (163.055, 163.76), (165.19, 168.41), (168.49, 169.68), (169.28, 169.59), (170.09, 168.54), (169.5, 167.45), (164.89, 165.79), (170.975, 173.57), (172.48, 173.5), (173.05, 171.77), (173.02, 173.555), (173.85, 173.75), (173.62, 172.57), (173.16, 172.07), (171.99, 172.07), (171.71, 172.69), (173, 175.05), (176.39, 175.16), (173.98, 174.2)]


In [194]:
data = Polygon()
data.getPrices(start_date="2023-04-24", end_date="2023-05-22")

[(165, 165.33), (165.19, 163.77), (163.055, 163.76), (165.19, 168.41), (168.49, 169.68), (169.28, 169.59), (170.09, 168.54), (169.5, 167.45), (164.89, 165.79), (170.975, 173.57), (172.48, 173.5), (173.05, 171.77), (173.02, 173.555), (173.85, 173.75), (173.62, 172.57), (173.16, 172.07), (171.99, 172.07), (171.71, 172.69), (173, 175.05), (176.39, 175.16), (173.98, 174.2)]


In [221]:
[model.MAP(preds[1].getOInterDay()[0: 0+10])[0] for model in result["models_1"]]

self.FracChange(frac_change)/100: -0.04
posterior: 0.0
self.FracChange(frac_change)/100: -0.0384
posterior: 0.0
self.FracChange(frac_change)/100: -0.0368
posterior: 0.0
self.FracChange(frac_change)/100: -0.0352
posterior: 0.0
self.FracChange(frac_change)/100: -0.0336
posterior: 0.0
self.FracChange(frac_change)/100: -0.032
posterior: 0.0
self.FracChange(frac_change)/100: -0.0304
posterior: 0.0
self.FracChange(frac_change)/100: -0.0288
posterior: 0.0
self.FracChange(frac_change)/100: -0.027200000000000002
posterior: 0.0
self.FracChange(frac_change)/100: -0.0256
posterior: 0.0
self.FracChange(frac_change)/100: -0.024
posterior: 0.0
self.FracChange(frac_change)/100: -0.022400000000000003
posterior: 0.0
self.FracChange(frac_change)/100: -0.020800000000000003
posterior: 0.0
self.FracChange(frac_change)/100: -0.0192
posterior: 0.0
self.FracChange(frac_change)/100: -0.0176
posterior: 0.0
self.FracChange(frac_change)/100: -0.016
posterior: 0.0
self.FracChange(frac_change)/100: -0.0144
posterior

[-0.04,
 -0.04,
 -0.04,
 -0.04,
 -0.04,
 -0.04,
 -0.04,
 -0.04,
 -0.04,
 -0.04,
 -0.04,
 -0.04]

In [206]:
preds[1].getOInterDay()[0: 0+10]

array([-0.00084679, -0.00436588,  0.00873229,  0.00047503, -0.00235738,
        0.00294829,  0.00569598, -0.01528815,  0.0312745 , -0.00627989])