# Define class StockCracker

In [30]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 1000)
#---- matplotlib, my old favorite...
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns
sns.set(style='ticks')
%matplotlib inline
#---- for Bokeh, a ploter
import bokeh.plotting as bp
from bokeh.layouts import gridplot, column
from bokeh.models import LinearAxis, DataRange1d, HoverTool, BoxSelectTool
TOOLS = ["pan,wheel_zoom,box_zoom,reset,crosshair, hover, box_select"]
WIDTH = 990
HEIGHT = 280
bp.output_notebook()
#---- for color, style print in iPython
from IPython.display import HTML, display
import datetime
import os
#---- for colorful print
from colorama import init
init(autoreset=True)
from colorama import Fore, Back, Style
#---- for statistics
from scipy import stats

class LabelManager:
    def __init__(self):
        self.DATE = 'Date'
        self.OPEN = 'Open'
        self.HIGH = 'High'
        self.LOW  = 'Low'
        self.CLOSE = 'Close'
        self.VOLUME = 'Volume'
label = LabelManager()
        
class StockCracker:
    def __init__(self):
        self.rootPath = './'
        self.USDJPYPath = './fx/USDJPY/'
        self.mazdaPath = './Mazda/'
        self.nikkeiPath = './Nikkei/'
        self.nikkeiYahooPath = './Nikkei_Yahoo/N225.csv'
        self.panasonicPath = './Panasonic_Yahoo/6752.T.csv'
        label = LabelManager()
        print("Initializing StockCracker instance")
        

    def readOneCsv_Yahoo(self, filePath):
        data = pd.read_csv(filePath, encoding='UTF-8', header=0)
        data[label.DATE] = [datetime.datetime.strptime(d, '%Y-%m-%d').date() for d in data[label.DATE]]
        data[label.OPEN] = pd.to_numeric(data[label.OPEN], errors='coerce')
        data[label.HIGH] = pd.to_numeric(data[label.HIGH], errors='coerce')
        data[label.LOW] = pd.to_numeric(data[label.LOW], errors='coerce')
        data[label.CLOSE] = pd.to_numeric(data[label.CLOSE], errors='coerce')
        data[label.VOLUME] = pd.to_numeric(data[label.VOLUME], errors='coerce')
        
        return data
        
    # read one csv file
    def readOneCsv(self, filePath, isReversed):
        '''
        Obsolete, do NOT use this API
        '''
        data = pd.read_csv(filePath, encoding='shift-JIS', header=0)
        if isReversed is True:
            data = data.reindex(index=data.index[::-1])
            data.index = range(len(data))
        #print(Fore.YELLOW + data['日付'][0] + '~' + data['日付'][len(data)-1])
        return data

    # read one folder, recurssively
    def readOneFolder(self, folderPath, isReversed=True):
        '''
        Obsolete, do NOT use this API
        '''
        for root, subdirs, files in os.walk(folderPath):
            #print('----------------')
            #print(Back.CYAN + Fore.RED + Style.BRIGHT + 
            #      'In Folder:'+root)
            #---- sorted() here sorts files 
            for oneCsv in sorted(files):
                #print(Back.CYAN +"    "+ oneCsv)
                filePath = folderPath+oneCsv
                data = self.readOneCsv(filePath, isReversed)
                try:
                    allData
                except NameError:   # allData not exist
                    allData = data
                else:   # allData exist
                    # concatenate 2 pandas dataframes
                    allData = pd.concat([allData, data])
        allData.index = range(len(allData))
        #print(allData) # output all data of target stock
        return allData

    # calculate Beta coefficient, 
    def calcBetaCoefficient(self, stock, index_Nikkei):
        #---- 
#### following example gives a good intuition about the quantitative  
#        >>> a = np.array([1,3,9,6,5])
#        >>> b = a + 100
#        >>> b
#        array([101, 103, 109, 106, 105])
#        >>> np.cov(a,b,bias=1)
#        array([[ 7.36,  7.36],
#                [ 7.36,  7.36]])
#        >>> b = a * 10
#        >>> np.cov(a,b,bias=1)
#        array([[   7.36,   73.6 ],
#               [  73.6 ,  736.  ]])
        dailyChange_stock = np.array([(stock[label.CLOSE][idx+1] - stock[label.CLOSE][idx]) / stock[label.CLOSE][idx] for idx in range(len(stock) - 1)]) # from 0 ~ len(stock)-1
        dailyChange_index = np.array([(index_Nikkei[label.CLOSE][idx+1] - index_Nikkei[label.CLOSE][idx]) / index_Nikkei[label.CLOSE][idx] for idx in range(len(index_Nikkei) - 1)]) # from 0 ~ len(index)-1
        # covariance matrix(a, b) = 
        # [[var(a)    cov(a, b)]
        #  [cov(b, a) var(b)   ]]
        #
        matrix = np.cov(dailyChange_stock, dailyChange_index, bias=1)        
        beta = matrix[0][1] / matrix[1][1]
        print("Beta = {}".format(beta))
        if beta >= 1:
            print("Target Stock is MORE volitale than the index, target stock is RISKY")
        else:
            print("Target Stock is LESS volitale than the index, target stock is NOT risky")
        return beta, dailyChange_stock, dailyChange_index
    
    # On-balance volume, need 出来高 of the target stock
    def keyIndicator_OBV(self, stock):
        OBV = np.zeros(len(stock))
        OBV[0] = 0 
        for idx in range(len(stock)):
            if idx == 0:
                pass
            if stock['終値'].iloc[idx] > stock['終値'].iloc[idx - 1]:
                OBV[idx] = OBV[idx - 1] + stock['出来高'].iloc[idx]
            elif stock['終値'].iloc[idx] < stock['終値'].iloc[idx - 1]:
                OBV[idx] = OBV[idx - 1] - stock['出来高'].iloc[idx]
            else:
                OBV[idx] = OBV[idx - 1]
        return OBV
    #-- MACD
    #----- Simple moving average
    def calc_SMA(self, stock, N):
        SMA = np.zeros(len(stock))
        for i in range(len(stock)):
            if i > N-2:
                SMA[i] = stock['終値'][i-N+1:i].sum() / N
        return SMA
    #----- Exponential moving average
    def calc_EMA(self, stock, N):
        SMA = self.calc_SMA(stock, N)
        EMA = np.zeros(len(stock))
        EMA[N-1] = SMA[N-1]
        multiplier = 2 / (N + 1)
        for i in range(len(stock)):
            if i > N - 1:
                EMA[i] = multiplier * (stock['終値'].iloc[i] - EMA[i-1]) + EMA[i-1]
        return EMA
    #----- MACD and signal
    def keyIndicator_MACD(self, stock, N1, N2, sigN):
        EMA_N1 = self.calc_EMA(stock, N1)
        EMA_N2 = self.calc_EMA(stock, N2)
        MACD   = EMA_N1 - EMA_N2
        #--- zerolize all necessary index
        if N1 < N2:
            border = N2 - 1
        else:
            border = N1 - 1
        for idx in range(border):
            MACD[idx] = 0
        #--- calc Signal line
        multiplier = 2/(sigN + 1)
        signal = np.zeros(len(stock))
        signal[border+sigN-1] = np.average( MACD[border:border+sigN-1] )
        for idx in range(border+sigN, len(stock)):
            signal[idx] = MACD[idx] * multiplier + signal[idx-1] * (1 - multiplier)
        return MACD, signal
    #-------------
    # 我需要分析index以及之前N天，之后M天的数据～
    #-------------
    def focusOn(self, stock, index, formerN, laterM):
        start = index - formerN
        if start < 0:
            start = 0
            
        end = index + laterM
        if end > stock.size:
            end = len(stock)
        return stock[start:end]
    #-------------
    # 计算LinearRegression
    #-------------
    def calcSlope(self, data):
        return
    
sc = StockCracker()

Initializing StockCracker instance


In [29]:
# read data
data_Nikkei = sc.readOneCsv_Yahoo(sc.nikkeiYahooPath)
data_Panasonic = sc.readOneCsv_Yahoo(sc.panasonicPath)
# plot data
fig1 = bp.figure(x_axis_type="datetime", tools=TOOLS, plot_width=WIDTH, plot_height=HEIGHT, toolbar_location="above", title="Nikkei & Panasonic")
## Panasonic
fig1.extra_y_ranges = {"Panasonic":DataRange1d(start=data_Panasonic[label.OPEN].min()-100, 
                                           end=data_Panasonic[label.OPEN].max()+100)}
fig1.add_layout(LinearAxis(y_range_name = "Panasonic"), 'right')
fig1.line(data_Panasonic[label.DATE], data_Panasonic[label.OPEN], line_width=1, color="blue", y_range_name="Panasonic", legend="Panasonic")
## Nikkei
fig1.line(data_Nikkei[label.DATE], data_Nikkei[label.OPEN], line_width=1, color="red", legend="Nikkei")
fig1.legend.location="top_left"



bp.show(fig1)

In [32]:
#---------------
# 计算Beta系数
#---------------
# calculate Beta Coefficient, the input data must have the same length
beta, dailyChange_panasonic, dailyChange_nikkei = sc.calcBetaCoefficient(data_Panasonic, data_Nikkei)
##--- another version of beta, refer to ([ref] Beta Coefficient.pdf )
slope, intercept, r_value, p_value, std_err = stats.linregress(dailyChange_nikkei, dailyChange_mazda)
print("y = {}*x + {}".format(slope, intercept)) #line = slope*xi+intercept

ValueError: all the input array dimensions except for the concatenation axis must match exactly

In [21]:
# plot Nikkei and Mazda
fig1 = bp.figure(x_axis_type="datetime", tools=TOOLS, plot_width=WIDTH, plot_height=HEIGHT, toolbar_location="above", title="Nikkei & Mazda")
fig1.extra_y_ranges = {"Mazda":DataRange1d(start=data_Mazda['始値'].min()-100, 
                                           end=data_Mazda['始値'].max()+100)}
fig1.add_layout(LinearAxis(y_range_name = "Mazda"), 'right')
fig1.line(data_Nikkei['日付'], data_Nikkei['始値'], line_width=1, color="red", legend="Nikkei")
fig1.line(data_Mazda['日付'], data_Mazda['始値'], line_width=2, color="blue", y_range_name="Mazda", legend="Mazda")
fig1.legend.location="top_center"

# plot FX - USDJPY
fig2 = bp.figure(x_axis_type="datetime", tools=TOOLS, plot_width=WIDTH, plot_height=HEIGHT, toolbar_location="above", title="USDJYP")
fig2.line(data_fx['日付'], data_fx['始値'])

grid = gridplot([fig1, fig2], ncols=1)

#bp.show(column(fig1, fig2))
bp.show(grid)

In [5]:
#---- calculate several Key Indicators
#----- OBV
#--------------
# 计算OBV，配合成交Volume
#--------------
# data_Mazda[['出来高','終値']] # extract 2 specific columns from original dataframe
OBV_Mazda = sc.keyIndicator_OBV(data_Mazda)
fig_OBV = bp.figure( x_axis_type = "datetime", tools=TOOLS, 
                    plot_width=WIDTH, plot_height=HEIGHT + 300, 
                    toolbar_location="above", title="On-balance volume & Volume Bar of Mazda")
### plot volume
index_red = data_Mazda.loc[data_Mazda['終値'] >= data_Mazda['始値']].index
index_blk = data_Mazda.loc[data_Mazda['終値'] <  data_Mazda['始値']].index
fig_OBV.vbar(x=data_Mazda['日付'][index_red], bottom=0,
           top=data_Mazda['出来高'][index_red], color="red", width=0.0, alpha=0.3, line_width=3.0)
fig_OBV.vbar(x=data_Mazda['日付'][index_blk], bottom=0,
           top=data_Mazda['出来高'][index_blk], color="green", width=0.0, alpha=0.3, line_width=3.0)
### plot OBV
scaler = 0.1 # the trajectory/trend of OBV is crucial, the value is not
fig_OBV.line(data_Mazda['日付'], OBV_Mazda * scaler, color="black", alpha=1.5)
### plot price 始値
fig_OBV.extra_y_ranges = {"Mazda":DataRange1d(start=data_Mazda['始値'].min()-6000, end=data_Mazda['始値'].max()+100)}
fig_OBV.add_layout(LinearAxis(y_range_name = "Mazda"), 'right')
fig_OBV.line(data_Mazda['日付'], data_Mazda['始値'], line_width=2, color="blue", y_range_name="Mazda", alpha=0.3)

bp.show(fig_OBV)

In [6]:
#----------------------------------
# 线性回归，用来用OBV计算走势
#----------------------------------
# Sliding window to calculate Trend of OBV
windowSize = 100
slope, intercept, r_value, p_value, std_err = stats.linregress( range(0, windowSize), data_Mazda['始値'][0:windowSize] )
print(slope)
slope, intercept, r_value, p_value, std_err = stats.linregress( range(0, windowSize), OBV_Mazda[0:windowSize] )
print(slope)


-9.60189018902
-1191390.69907


In [7]:
#-------------------------#
# plot conventional MACD(12, 26, 9)
#-------------------------#
MACD, signal = sc.keyIndicator_MACD(data_Mazda, 12, 26, 9)

fig_MACD = bp.figure(x_axis_type = "datetime", tools=TOOLS, 
                    plot_width=WIDTH, plot_height=HEIGHT + 300, 
                    toolbar_location="above", title="MACD of Mazda")
fig_MACD.line(data_Mazda['日付'], MACD, color='red', legend="MACD")
fig_MACD.line(data_Mazda['日付'], signal, legend="signal")
#fig_MACD.line(data_Mazda['日付'], MACD - signal, legend="MACD-Signal")
### plot price 終値
fig_MACD.line(data_Mazda['日付'], data_Mazda['終値']/10, line_width=2, color="purple", alpha=0.3)
bp.show(fig_MACD)

In [42]:
#----------------------
# 看MACD-Signal，
#  >0:红色 
#  <0:绿色
#  Cyan cross: 红绿变化点～
#----------------------
MACD, signal = sc.keyIndicator_MACD(data_Mazda, 12, 26, 9)

data = MACD - signal
index_up    = []
index_down  = []
index_cross = []
for idx in range(len(data)):
    if data[idx] > 0:
        index_up.append(idx)
    else:
        index_down.append(idx)
for idx in range(len(data) - 1):
    if data[idx] < 0 and data[idx+1] > 0:
        index_cross.append(idx)
        
#------- linear Regression
slope_cross = []
#for ele in index_cross:
for idx in range(len(data_Mazda)):
    oneSlice = sc.focusOn(data_Mazda, idx, 2, 2)
    slope, intercept, r_value, p_value, std_err = stats.linregress([i for i in range(len(oneSlice))], oneSlice['終値'])
    slope_cross.append(slope)
    
#------- Plot
fig_MACD = bp.figure(x_axis_type = "datetime", tools=TOOLS, 
                    plot_width=WIDTH, plot_height=HEIGHT + 300, 
                    toolbar_location="above", title="MACD of Mazda")
### plot price 終値
fig_MACD.line(data_Mazda['日付'], data_Mazda['終値'], line_width=2, color="purple", alpha=0.3, legend="Price")
fig_MACD.scatter(data_Mazda['日付'][index_up], data_Mazda['終値'][index_up], 
                 line_width=0.1, color="red", alpha=0.8, legend="Buy?")
fig_MACD.scatter(data_Mazda['日付'][index_down], data_Mazda['終値'][index_down], 
                 line_width=0.1, color="green", alpha=0.8, legend="Sell?")
### MACD - Signal
fig_MACD.extra_y_ranges = {"MACD of Mazda":DataRange1d(start=-100, end=100)}
fig_MACD.add_layout(LinearAxis(y_range_name = "MACD of Mazda"), 'right')
fig_MACD.line(data_Mazda['日付'], MACD - signal, legend="MACD-Signal", y_range_name="MACD of Mazda", alpha=0.3)
### crossover
fig_MACD.circle_cross(data_Mazda['日付'][index_cross], data_Mazda['終値'][index_cross], line_width=1, fill_alpha=0.2,
                      color="cyan", size=10, legend="cross(buy)")
### slope
fig_MACD.vbar(x=data_Mazda['日付'], bottom=0,
           top=slope_cross, color="red", width=0.0, alpha=0.3, line_width=3.0)

bp.show(fig_MACD)

  sterrest = np.sqrt((1 - r**2) * ssym / ssxm / df)
