# Define class StockCracker

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 1000)
#---- matplotlib, my old favorite...
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns
sns.set(style='ticks')
%matplotlib inline
#---- for Bokeh, a ploter
import bokeh.plotting as bp
from bokeh.layouts import gridplot, column
from bokeh import palettes, charts
from bokeh.models import LinearAxis, DataRange1d, HoverTool, BoxSelectTool
from bokeh.charts import Histogram, Bar
TOOLS = ["pan,wheel_zoom,box_zoom,reset,crosshair, hover, box_select"]
WIDTH = 990
HEIGHT = 280
bp.output_notebook()
#---- for color, style print in iPython
from IPython.display import HTML, display
import datetime
import os
#---- for colorful print
from colorama import init
init(autoreset=True)
from colorama import Fore, Back, Style
#---- for statics
from scipy import stats

class StockCracker:
    def __init__(self):
        self.rootPath = './'
        self.USDJPYPath = './fx/USDJPY/'
        self.mazdaPath = './Mazda/'
        self.nikkeiPath = './Nikkei/'
        print("Initializing StockCracker instance")

    # read one csv file
    def readOneCsv(self, filePath, isReversed):
        data = pd.read_csv(filePath, encoding='shift-JIS', header=0)
        if isReversed is True:
            data = data.reindex(index=data.index[::-1])
            data.index = range(len(data))
        print(Fore.YELLOW + data['日付'][0] + '~' + data['日付'][len(data)-1])
        return data

    # read one folder, recurssively
    def readOneFolder(self, folderPath, isReversed=True):
        for root, subdirs, files in os.walk(folderPath):
            print('----------------')
            print(Back.CYAN + Fore.RED + Style.BRIGHT + 
                  'In Folder:'+root)
            for oneCsv in files:
                print(Back.CYAN + oneCsv)
                filePath = folderPath+oneCsv
                data = self.readOneCsv(filePath, isReversed)
                try:
                    allData
                except NameError:   # allData not exist
                    allData = data
                else:   # allData exist
                    # concatenate 2 pandas dataframes
                    allData = pd.concat([allData, data])
        allData.index = range(len(allData))
        #print(allData) # output all data of target stock
        return allData

    # calculate Beta coefficient, 
    def calcBetaCoefficient(self, stock, index):
        #---- 
#### following example gives a good intuition about the quantitative  
#        >>> a = np.array([1,3,9,6,5])
#        >>> b = a + 100
#        >>> b
#        array([101, 103, 109, 106, 105])
#        >>> np.cov(a,b,bias=1)
#        array([[ 7.36,  7.36],
#                [ 7.36,  7.36]])
#        >>> b = a * 10
#        >>> np.cov(a,b,bias=1)
#        array([[   7.36,   73.6 ],
#               [  73.6 ,  736.  ]])
        dailyChange_stock = np.array([(stock['終値'][idx+1] - stock['終値'][idx]) / stock['終値'][idx] for idx in range(len(stock) - 1)]) # from 0 ~ len(stock)-1
        dailyChange_index = np.array([(index['終値'][idx+1] - index['終値'][idx]) / index['終値'][idx] for idx in range(len(index) - 1)]) # from 0 ~ len(index)-1
        # covariance matrix(a, b) = 
        # [[var(a)    cov(a, b)]
        #  [cov(b, a) var(b)   ]]
        #
        matrix = np.cov(dailyChange_stock, dailyChange_index, bias=1)        
        beta = matrix[0][1] / matrix[1][1]
        print("Beta = {}".format(beta))
        if beta >= 1:
            print("Target Stock is MORE volitale than the index, target stock is RISKY")
        else:
            print("Target Stock is LESS volitale than the index, target stock is NOT risky")
        return beta, dailyChange_stock, dailyChange_index
    
    # On-balance volume, need 出来高 of the target stock
    def keyIndicator_OBV(self, stock):
        OBV = np.zeros(len(stock))
        OBV[0] = 0 
        for idx in range(len(stock)):
            if idx == 0:
                pass
            if stock['終値'].iloc[idx] > stock['終値'].iloc[idx - 1]:
                OBV[idx] = OBV[idx - 1] + stock['出来高'].iloc[idx]
            elif stock['終値'].iloc[idx] < stock['終値'].iloc[idx - 1]:
                OBV[idx] = OBV[idx - 1] - stock['出来高'].iloc[idx]
            else:
                OBV[idx] = OBV[idx - 1]
        return OBV
    
sc = StockCracker()

Initializing StockCracker instance


In [2]:
# reading data ------------------------------------
data_Nikkei = sc.readOneFolder(sc.nikkeiPath)
data_Mazda  = sc.readOneFolder(sc.mazdaPath)
data_fx     = sc.readOneFolder(sc.USDJPYPath, False)
## convert date
data_Mazda['日付']    = [datetime.datetime.strptime(d, '%Y-%m-%d').date() for d in data_Mazda['日付']]
data_Nikkei['日付']   = [datetime.datetime.strptime(d, '%Y-%m-%d').date() for d in data_Nikkei['日付']]
data_fx['日付']       = [datetime.datetime.strptime(d, '%Y/%m/%d').date() for d in data_fx['日付']]

### for Mazda, 2014-07-29 株併合:1株 -> 0.2株
#index = data_Mazda.loc[data_Mazda['日付'] == '2014-07-28'].index[0] # old version
index = data_Mazda.loc[data_Mazda['日付'] == datetime.datetime.strptime("2014-07-28", '%Y-%m-%d').date()].index[0]
data_Mazda['始値'][0:(index+1)] = data_Mazda['始値'][0:(index+1)] * 5
data_Mazda['高値'][0:(index+1)] = data_Mazda['高値'][0:(index+1)] * 5
data_Mazda['安値'][0:(index+1)] = data_Mazda['安値'][0:(index+1)] * 5
data_Mazda['終値'][0:(index+1)] = data_Mazda['終値'][0:(index+1)] * 5

----------------
In Folder:./Nikkei/
indices_I101_1d_2007.csv
2007-01-04~2007-12-28
indices_I101_1d_2008.csv
2008-01-04~2008-12-30
indices_I101_1d_2009.csv
2009-01-05~2009-12-30
indices_I101_1d_2010.csv
2010-01-04~2010-12-30
indices_I101_1d_2011.csv
2011-01-04~2011-12-30
indices_I101_1d_2012.csv
2012-01-04~2012-12-28
indices_I101_1d_2013.csv
2013-01-04~2013-12-30
indices_I101_1d_2014.csv
2014-01-06~2014-12-30
indices_I101_1d_2015.csv
2015-01-05~2015-12-30
indices_I101_1d_2016.csv
2016-01-04~2016-12-30
indices_I101_1d_2017.csv
2017-01-04~2017-02-07
----------------
In Folder:./Mazda/
stocks_7261-T_1d_2007.csv
2007-01-04~2007-12-28
stocks_7261-T_1d_2008.csv
2008-01-04~2008-12-30
stocks_7261-T_1d_2009.csv
2009-01-05~2009-12-30
stocks_7261-T_1d_2010.csv
2010-01-04~2010-12-30
stocks_7261-T_1d_2011.csv
2011-01-04~2011-12-30
stocks_7261-T_1d_2012.csv
2012-01-04~2012-12-28
stocks_7261-T_1d_2013.csv
2013-01-04~2013-12-30
stocks_7261-T_1d_2014.csv
2014-01-06~2014-12-30
stocks_7261-T_1d_2015.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [3]:
#---- plot data, Matplotlib
#fig, (ax, bx) = plt.subplots(2, sharex = True)
## Nikkei
#ax.plot(date_Nikkei, data_Nikkei['始値'][:], c='r')
#ax.set_ylabel('Nikkei', color='r', fontsize=20)
## Mazda
#ax1 = ax.twinx()
#ax1.plot(date_Mazda, data_Mazda['始値'][:], c='b')
#ax1.set_ylabel('Mazda', color='b', fontsize=20)
## fx USDJPY
#bx.plot(date_fx, data_fx['始値'][:], c='c')
#bx.set_ylabel('USDJPY', color='c', fontsize=20)
#plt.show()

# calculate Beta Coefficient
beta, dailyChange_mazda, dailyChange_nikkei = sc.calcBetaCoefficient(data_Mazda, data_Nikkei)
##--- another version of beta, refer to ([ref] Beta Coefficient.pdf )
slope, intercept, r_value, p_value, std_err = stats.linregress(dailyChange_nikkei, dailyChange_mazda)
print("y = {}*x + {}".format(slope, intercept)) #line = slope*xi+intercept

Beta = 1.3158815949766904
Target Stock is MORE volitale than the index, target stock is RISKY
y = 1.3158815949766904*x + -0.00015806818560150078


In [4]:
# plot Nikkei and Mazda
fig1 = bp.figure(x_axis_type="datetime", tools=TOOLS, plot_width=WIDTH, plot_height=HEIGHT, toolbar_location="above", title="Nikkei & Mazda")
fig1.extra_y_ranges = {"Mazda":DataRange1d(start=data_Mazda['始値'].min()-100, 
                                           end=data_Mazda['始値'].max()+100)}
fig1.add_layout(LinearAxis(y_range_name = "Mazda"), 'right')
fig1.line(data_Nikkei['日付'], data_Nikkei['始値'], line_width=1, color="red", legend="Nikkei")
fig1.line(data_Mazda['日付'], data_Mazda['始値'], line_width=2, color="blue", y_range_name="Mazda", legend="Mazda")
fig1.legend.location="top_center"

# plot FX - USDJPY
fig2 = bp.figure(x_axis_type="datetime", tools=TOOLS, plot_width=WIDTH, plot_height=HEIGHT, toolbar_location="above", title="USDJYP")
fig2.line(data_fx['日付'], data_fx['始値'])

grid = gridplot([fig1, fig2], ncols=1)

#bp.show(column(fig1, fig2))
bp.show(grid)

In [26]:
#---- calculate several Key Indicators
#----- OBV
# data_Mazda[['出来高','終値']] # extract 2 specific columns from original dataframe
OBV_Mazda = sc.keyIndicator_OBV(data_Mazda)
fig_OBV = bp.figure( x_axis_type = "datetime", tools=TOOLS, 
                    plot_width=WIDTH, plot_height=HEIGHT + 300, 
                    toolbar_location="above", title="On-balance volume & Volume Bar of Mazda")
### plot volume
index_red = data_Mazda.loc[data_Mazda['終値'] >= data_Mazda['始値']].index
index_blk = data_Mazda.loc[data_Mazda['終値'] <  data_Mazda['始値']].index
fig_OBV.vbar(x=data_Mazda['日付'][index_red], bottom=0,
           top=data_Mazda['出来高'][index_red], color="red", width=0.0, alpha=0.3, line_width=3.0)
fig_OBV.vbar(x=data_Mazda['日付'][index_blk], bottom=0,
           top=data_Mazda['出来高'][index_blk], color="green", width=0.0, alpha=0.3, line_width=3.0)
### plot OBV
scaler = 0.1 # the trajectory/trend of OBV is crucial, the value is not
fig_OBV.line(data_Mazda['日付'], OBV_Mazda * scaler, color="black", alpha=1.5)
### plot price 始値
fig_OBV.extra_y_ranges = {"Mazda":DataRange1d(start=data_Mazda['始値'].min()-6000, end=data_Mazda['始値'].max()+100)}
fig_OBV.add_layout(LinearAxis(y_range_name = "Mazda"), 'right')
fig_OBV.line(data_Mazda['日付'], data_Mazda['始値'], line_width=2, color="blue", y_range_name="Mazda", alpha=0.3)

bp.show(fig_OBV)

In [23]:
# Sliding window to calculate Trend of OBV
windowSize = 100
slope, intercept, r_value, p_value, std_err = stats.linregress( range(0, windowSize), data_Mazda['始値'][0:windowSize] )
print(slope)
slope, intercept, r_value, p_value, std_err = stats.linregress( range(0, windowSize), OBV_Mazda[0:windowSize] )
print(slope)


-9.60189018902
-1191390.69907
