In [11]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

from IPython.display import display
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import time
import random
pd.set_option('display.max_columns', 50)

jpm = pd.DataFrame.from_csv('../TradingBot/JPM.csv')
display(jpm.head())
display(jpm.tail())

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-09-27,65.410004,66.410004,65.110001,66.360001,13580600,66.360001
2016-09-26,66.599998,66.800003,65.540001,65.779999,16408100,65.779999
2016-09-23,67.389999,67.900002,67.18,67.25,13967400,67.25
2016-09-22,66.989998,67.419998,66.839996,67.389999,12781700,67.389999
2016-09-21,66.839996,67.129997,66.309998,66.839996,14116800,66.839996


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1984-01-06,46.875014,47.375008,46.375021,46.875014,194400,2.772681
1984-01-05,46.812508,47.375008,46.250008,47.375008,344100,2.802256
1984-01-04,44.843758,45.874979,44.249986,45.874979,292500,2.713529
1984-01-03,43.937506,44.249986,43.624979,44.000008,385500,2.602623
1983-12-30,44.000008,44.500006,43.500014,44.000008,211500,2.602623


In [18]:
start_date = '1983-12-30'
end_date = '2016-09-27'
date_range = pd.date_range(start_date, end_date)

dfJPM = pd.DataFrame(index=date_range)
dfJPM = dfJPM.join(jpm, how='left')
dfJPM.dropna(inplace=True)
dfJPM.head()

Unnamed: 0,Open,High,Low,Close,Volume,Adj Close
1983-12-30,44.000008,44.500006,43.500014,44.000008,211500.0,2.602623
1984-01-03,43.937506,44.249986,43.624979,44.000008,385500.0,2.602623
1984-01-04,44.843758,45.874979,44.249986,45.874979,292500.0,2.713529
1984-01-05,46.812508,47.375008,46.250008,47.375008,344100.0,2.802256
1984-01-06,46.875014,47.375008,46.375021,46.875014,194400.0,2.772681


In [2]:
from copy import deepcopy

def adjust_price_volume(df):
    '''
    Adjust all cols
    '''
    df['adj_factor'] = df['Adj Close'] / df['Close']
    df['Open'] = df['Open'] * df['adj_factor']
    df['High'] = df['High'] * df['adj_factor']
    df['Low'] = df['Low'] * df['adj_factor']
    df['Volume'] = df['Volume'] / df['adj_factor']

In [19]:
def create_label_column(df):
    '''
    Adj close as prediction label
    '''
    df['adj_close_label'] = df['Adj Close'].shift(-1)

In [20]:
def create_same_day_open(df):
    '''
    Same day open as one of the measurements
    '''
    df['same_day_open'] = df['Open'].shift(-1)

In [5]:
def create_diffs(df):
    ''' 
    Measurement error cols
    '''
    
    df['high_diff'] = df['High'] - df['adj_close_label']
    df['low_diff'] = df['Low'] - df['adj_close_label']
    df['close_diff'] = df['Adj Close'] - df['adj_close_label']
    df['open_diff'] = df['Open shift'] - df['adj_close_label']

In [6]:
def preprocess(dataframe):
    df = deepcopy(dataframe)
    
    adjust_price_volume(df)
    create_label_column(df)
    create_next_day_open(df)
    create_diffs(df)
    
    df.dropna(inplace=True)
    
    return df

In [7]:
from numpy.linalg import inv

class Kalman(object):
    def __init__(self, init_price, noise=1):
        self.dt = 1 # time scale
        self.noise = noise
        
        self.x = np.array([init_price, 0]) # State vector: [price, price_rate] (2x1)
#         self.x = np.array([init_price, 0, 0]) # State vector: [price, price_rate, price_rate2] (3x1)
        self.P = np.array([[1, 0], [0, 1]]) # Uncertainty covariance matrix (2x2)
#         self.P = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) # Uncertainty covariance matrix (3x3)
        
        self.F = np.array([[1, self.dt], [0, 1]]) # Prediction matrix (2x2)
#         self.F = np.array([[1, self.dt, 0.5 * self.dt**2], [0, 1, self.dt], [0, 0, 1]]) # Prediction matrix (2x2)
    
        self.Q = np.array([[noise, 0], [0, noise]]) # Unpredictable external factor noise covariance matrix (2x2)
#         self.Q = np.array([[noise, 0, 0], [0, noise, 0], [0, 0, noise]]) # Unpredictable external factor noice covariance matrix (3x3)
        
        self.H = np.array([1, 0]) # Measurement mapping function (1x2)
#         self.H = np.array([1, 0, 0]) # Measurement mapping function (1x3)
        
        self.R_h = None # Sensor noise covariance (scalar)
        self.R_l = None # Sensor noise covariance (scalar)
        self.R_c = None # Sensor noise covariance (scalar)
        self.R_o = None # Sensor noise covariance (scalar)   
        
        self.S = None # Fusion (scalar)
        
        self.y = None # error (scalar)
        self.K = None # Kalman gain (2x1)
#         self.K = None # Kalman gain (3x1)
        
    def predict(self):
        self.x = np.matmul(self.F, self.x) # Predict today's adj close
        self.P = np.matmul(np.matmul(self.F, self.P), self.F.T) + self.Q
    
    def update(self, measurement, sensor_type):
        self.y = measurement - np.matmul(self.H, self.x) # Calculate loss
        
        if sensor_type == 'high':
            self.S = np.matmul(np.matmul(self.H, self.P), self.H.T) + self.R_h
        elif sensor_type == 'low':
            self.S = np.matmul(np.matmul(self.H, self.P), self.H.T) + self.R_l
        elif sensor_type == 'close':
            self.S = np.matmul(np.matmul(self.H, self.P), self.H.T) + self.R_c
        else:
            self.S = np.matmul(np.matmul(self.H, self.P), self.H.T) + self.R_o
            
#         K = np.matmul(self.P, self.H.T) * (1/S) # Calculate Kalman gain (2x1)
        self.K = np.matmul(self.P, self.H.T) * (1/self.S) # Calculate Kalman gain (3x1)
        
        # Update x and P
        self.x = self.x + self.K * self.y
        self.P = np.matmul(np.eye(2, 2) - np.matmul(self.K, self.H), self.P)
#         self.P = np.matmul(np.eye(3, 3) - np.matmul(self.K, self.H), self.P)