In [1]:
import pandas as pd
import datetime as dt
import nsepy as nse
import pickle as pk
import matplotlib.pyplot as plt
from matplotlib.dates import date2num
import matplotlib.dates as mdates
%matplotlib inline
import mpl_finance as mf
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import numpy as np
import zigzag as zg

In [2]:
def create_analysis_windows(window_size,sliding_window_size):
    analysis_windows = []
    windows = ((end_date - start_date).days/sliding_window_size)
    first_window_size = (1+windows-int(windows))*sliding_window_size - sliding_window_size + window_size
    first_window_size = round(first_window_size)
    first_window_size = dt.timedelta(first_window_size)
    first_window_size
    all_other_windows = dt.timedelta(30)
    windows = int(windows)
    for window in range(windows):
        if window == 0:
            window_start_date = start_date
            window_end_date = start_date + first_window_size
        else:
            window_start_date = window_start_date+dt.timedelta(sliding_window_size)
            window_end_date = window_start_date+dt.timedelta(window_size)
        analysis_windows.append((window_start_date, window_end_date))
        #print('window: ','start_date:',window_start_date,' end_date:', window_end_date)
    return analysis_windows

In [3]:
def get_downtrend_length(X,y,pivots,inverse_method_used=False):
    count_of_tops=1
    tops = y.values[pivots == 1]
    for i in range(len(tops)-1):
        ix = -1*(i+1)
        if tops[ix] < tops[ix-1]:
            count_of_tops = count_of_tops+1
        else:
            break
    print('downtrend ON from ',count_of_tops,' tops from end.')
    if inverse_method_used == False:
        length = X[-1][0] - X[np.where(pivots == 1)[0][-1*count_of_tops]][0]
    else:
        length = X[-1][0] - X[np.where(pivots != 0)[0][-3]][0]
    return length, count_of_tops

def get_uptrend_length(X,y,pivots,inverse_method_used=False):
    count_of_bottoms=1
    bottoms = y.values[pivots == -1]
    for i in range(len(bottoms)-1):
        ix = -1*(i+1)
        if bottoms[ix] > bottoms[ix-1]:
            count_of_bottoms = count_of_bottoms+1
        else:
            break
    print('uptrend ON from ',count_of_bottoms,' bottoms from end.')
    if inverse_method_used == False:
        length = X[-1][0] - X[np.where(pivots == -1)[0][-1*count_of_bottoms]][0]
    else:
        length = X[-1][0] - X[np.where(pivots != 0)[0][-3]][0]
    return length, count_of_bottoms

def get_turning_points_for_trend(pivots,trend_length, pvt, max_points_for_trend = 4, inverse_method_used = False):
    if inverse_method_used:
        turning_points = 2
        return turning_points
    
    trend_started_from = len(pivots) - trend_length
    turning_points = len(np.where(np.where(pivots==pvt)[0] >= trend_started_from)[0])
    if turning_points > max_points_for_trend:
        turning_points = max_points_for_trend
    return turning_points

def get_trend_line(X,y,turning_points_for_trend,pivots,pvt, inverse_method_used = False):
    y_for_reg = y.values[pivots==pvt][-1*turning_points_for_trend:]
    X_for_reg = X[np.where(pivots==pvt)[0][-1*turning_points_for_trend:]]
    reg = LinearRegression().fit(X_for_reg, y_for_reg)
    return reg.coef_

In [4]:
def plot_pivots(X, y, pivots):
    plt.xlim(X.min(), X.max())
    plt.ylim(y.min()*0.99, y.max()*1.01)
    plt.plot(X, y, 'k:', alpha=0.5)
    plt.plot(X[pivots != 0], y[pivots != 0], 'k-')
    plt.scatter(X[pivots == 1], y[pivots == 1], color='g')
    plt.scatter(X[pivots == -1], y[pivots == -1], color='r')

In [5]:
def calc_dis_from_sma(d,y,data):
    last_pos = y.index[-1]
    start_pos = last_pos - d
    if start_pos < 0:
        print('SMA period longer than data available. Consider choosing value of d smaller than the window size.')
        return None
    
    #data dataframe contains the whole data series of the scrip (and not just the window data). 
    #In case the d is longer than window length, then the data for SMA needs to be brought from
    #the whole series i.e. data
    
    sma = (data['Close'].iloc[start_pos:last_pos].mean()/data['Close'].iloc[last_pos])*1000
    dist_from_sma = 1000 - sma
    return dist_from_sma

In [6]:
def create_features_for_window(window,data,analysis_windows,zig_zag_percentage, max_points_for_trend):
    df = data[(data.Date >= analysis_windows[window][0]) & (data.Date <= analysis_windows[window][1])]
    
    #replace dates for window by numeric series starting from 0. 
    #Scale the stock price to a scale of 1000 (set the closing of the last day to 1000) and 
    #proportionately adjust the stock price for all other days in the window.
    #This is required to get comparable slope values irrespective of the window time period & 
    #absolute values of the stock price.
    df['Matplotlib_Date'] = df['Date'].apply(date2num)
    df['x_axis'] = df['Matplotlib_Date'] - min(df['Matplotlib_Date'])
    df['y_axis'] = (df['Close']/df['Close'].values[-1])*1000
    
    X = df.x_axis.values.reshape(df.x_axis.shape[0],1)
    y = df.y_axis
    
    np.random.seed(1997)
    
    #get the pivot points where the trend reverses after achieving at least the configured % points as 
    #defined as inputs for the following function. Pivot = 1 means top is formed, and 
    #pivot = -1 means bottom is formed, at all other places the pivot value is 0.
    pivots = zg.peak_valley_pivots(y.values, zig_zag_percentage/100, -1*zig_zag_percentage/100)
    
    uptrend = False
    downtrend = False
    inverse_method_used = False  #Uptrend is determined when there are higher bottoms and
                                 #Downtrend is determined when there are lower tops.
                                 #But if there are not enough points to determine the trend, inverse method is used.
                                 #i.e. by looking at the lower bottoms for downtrend or higher tops for uptrend

    check = y.values[pivots == 1][-2:]
    print('check:',check)
    if check[1] < check[0]:
        downtrend = True

    check = y.values[pivots == -1][-2:]
    print('check:',check)
    if check[1] > check[0]:
        uptrend = True

    if uptrend or downtrend:
        pass
    else:
        y1 = y.values[pivots != 0][-3]
        y2 = y.values[pivots != 0][-1]
        if y1 < y2:
            uptrend = True
        else:
            downtrend = True
        inverse_method_used = True

    print('Inverse:',inverse_method_used)
    up_trend_length = None
    count_of_bottoms = None
    up_trend_bottoms_slope = None
    up_trend_slope = None
    down_trend_length = None
    count_of_tops = None
    down_trend_tops_slope = None
    down_trend_slope = None
        
    
    if uptrend:
        up_trend_length, count_of_bottoms = get_uptrend_length(X,y,pivots,inverse_method_used)
        pvt = -1
        if inverse_method_used:
            pvt = pvt*-1
        turning_points_for_trend = get_turning_points_for_trend(pivots,up_trend_length, pvt, max_points_for_trend, inverse_method_used)
        up_trend_bottoms_slope = get_trend_line(X,y,turning_points_for_trend,pivots,pvt, inverse_method_used)
        X_trend = X[int(-1*(up_trend_length-1)):]
        y_trend = y[int(-1*(up_trend_length-1)):]
        reg = LinearRegression().fit(X_trend, y_trend)
        up_trend_slope = reg.coef_

        
    if downtrend:
        down_trend_length, count_of_tops = get_downtrend_length(X,y,pivots,inverse_method_used)
        pvt = 1
        if inverse_method_used:
            pvt = pvt*-1
        turning_points_for_trend = get_turning_points_for_trend(pivots,down_trend_length, pvt, max_points_for_trend, inverse_method_used)
        down_trend_tops_slope = get_trend_line(X,y,turning_points_for_trend,pivots,pvt, inverse_method_used)
        X_trend = X[int(-1*(down_trend_length-1)):]
        y_trend = y[int(-1*(down_trend_length-1)):]
        reg = LinearRegression().fit(X_trend, y_trend)
        down_trend_slope = reg.coef_

    dist_from_sma20 = abs(calc_dis_from_sma(20,y,data))
    dist_from_sma50 = abs(calc_dis_from_sma(50,y,data))
    
    return uptrend, up_trend_length, count_of_bottoms, up_trend_bottoms_slope, up_trend_slope, downtrend, down_trend_length, count_of_tops, down_trend_tops_slope, down_trend_slope, dist_from_sma20, dist_from_sma50

In [7]:
start_date = dt.date(2001,1,1)
end_date = dt.date(2019,1,31)

#data = nse.get_history(symbol="NIFTY 50", start=start_date, end=end_date, index=True)
#pickle_dump = open('nifty_data.pkl', 'ab')
#pk.dump(data,pickle_dump)
#pickle_dump.close()
#data.head()

In [9]:
pickle_dump = open('nifty_data.pkl', 'rb')
data = pk.load(pickle_dump)
pickle_dump.close()
data = data.reset_index()

window_size = 60
sliding_window_size = 30 #gap between start dates of two consecutive windows
analysis_windows = create_analysis_windows(window_size,sliding_window_size)
window=2
zig_zag_percentage = 0.5
max_points_for_trend = 4

create_features_for_window(window,data,analysis_windows,zig_zag_percentage, max_points_for_trend)

check: [1026.7496112 1000.       ]
check: [910.81981782 978.71584092]
Inverse: False
uptrend ON from  2  bottoms from end.
downtrend ON from  2  tops from end.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


(True,
 18.0,
 2,
 array([4.52640154]),
 array([1.26699427]),
 True,
 5.0,
 2,
 array([-5.34992224]),
 array([-4.99571524]),
 3.3370362141746455,
 76.30304376805134)