In [None]:
#import required packages
from datetime import date,timedelta
from nsepy import get_history
import numpy as np
import pandas as pd
from pandas import datetime

In [None]:
# collecting data of 1 year starting from 2015-01-01 from nsepy

number_of_days = 365
strt = date(2015,1,1)
ends = strt+timedelta(days=number_of_days)
SBIN = get_history(symbol='sbin',
                    start= strt,
                    end=ends)
TCS = get_history(symbol='TCS',
                    start= strt,
                    end=ends)
INFY = get_history(symbol='INFY',
                    start= strt,
                    end=ends)

In [None]:
SBIN.head()

In [None]:
TCS.head()

In [None]:
INFY.head()

In [None]:
# resetting index of each data set
SBIN = SBIN.reset_index()
TCS = TCS.reset_index()
INFY = INFY.reset_index()
SBIN["Date"] = pd.to_datetime(SBIN["Date"])
TCS["Date"] = pd.to_datetime(TCS["Date"])
INFY["Date"] = pd.to_datetime(INFY["Date"])
SBIN.name = 'SBIN'
TCS.name = 'TCS'
INFY.name = 'INFY'
stock_data = [SBIN, TCS, INFY]

In [None]:
def assign_index(stock):
    stock.index = stock['Date']
    return stock

In [None]:
SBIN = assign_index(SBIN)
TCS = assign_index(TCS)
INFY = assign_index(INFY)

In [None]:
stocks = [SBIN,TCS,INFY]

In [None]:
#importing plot library

import matplotlib.pyplot as plt
%matplotlib inline
# Control the default size of figures in this Jupyter notebook
%pylab inline
pylab.rcParams['figure.figsize'] = (20, 12)

# Part -1 
### 1. Create 4,16,....,52 week moving average(closing price) for each stock and index. This should happen through a function.)

In [None]:
#Moving average implementation

def moving_average(values,size):
    weights = np.repeat(1.0, size)/size
    smas = np.convolve(values,weights,'valid')
    print(type(smas))
    return smas

In [None]:
def moving_average_PLOT(stock):
    # weeks size
    size_arr = [4,16,28,40,52]
    
    moving_avg = {}
        
    # Line chart for each graph
    plt.title("Moving average for "+stock.name,fontsize=20)
    # Original closing price as -- line
    plt.plot(stock["Date"],stock["Close"],label="Closing pricing",linestyle='--',linewidth=2)
    
    for i in range(len(size_arr)):
        # dummy size array 'a' to resize the frame with original size
        a = [None for i in range(size_arr[i]-1)]
        a = np.array(a)
        # merging both the array dummy and moving_average
        moving_avg[size_arr[i]] = np.hstack([a,moving_average(stock["Close"],size_arr[i])])
        
        stock[str(size_arr[i])+"_moving_avg"] = moving_avg[size_arr[i]]
        
        name = "Moving average for "+str(size_arr[i])+" weeks"
        print(name + "is as follow :")
        print(stock[str(size_arr[i])+"_moving_avg"])
        plt.plot(stock["Date"],moving_avg[size_arr[i]],label = name,linewidth=2)
        
    plt.legend(title = "Legends",loc = 3,prop={'size': 12})
    plt.show()

In [None]:
moving_average_PLOT(SBIN)
    

In [None]:
moving_average_PLOT(TCS)

In [None]:
moving_average_PLOT(INFY)

### 2.Create rolling window of size 10 on each stock/index. Handle unequal time series due to stock market holidays. You should look to increase your rolling window size to 75 and see how the data looks like. Remember they will create stress on your laptop RAM load.

In [None]:
def rolling_window(stock):
    plt.title("Moving average with inbuilt function on" + stock.name,fontsize = 15)
    plt.plot(stock["Close"],label="Original closing Price")
    rolling_size = ["10","25","50","75"]
    for i in range(len(rolling_size)):
        temp_name = str(rolling_size[i])+" rolling window"
        stock[temp_name] = np.round(stock["Close"].rolling(window = int(rolling_size[i]), center = False).mean(), 2)
        text = "Rolling window of size : "+rolling_size[i]
        print(text)
        print(stock[temp_name])
        plt.plot(stock[temp_name],label=text)
    plt.legend(title = "Legends",loc = 3,prop={'size': 12})
    plt.show()

In [None]:
rolling_window(SBIN)
# starting values are NaN as rolling window is calculating 'valid' average.

 ### 3.1 Volume shocks

#### 0/1 boolean time series for shock

In [None]:
# making a extra column as we need to compare with previous day's volume
SBIN["prev_day"] = SBIN.Volume.shift(1)
TCS["prev_day"] = TCS.Volume.shift(1)
INFY["prev_day"] = INFY.Volume.shift(1)

In [None]:
# Calculating volume shock
SBIN["Volume_Shock"] = ((((abs(SBIN["prev_day"]-SBIN["Volume"]))/SBIN["Volume"])*100)>10).astype(int)
print(SBIN["Volume_Shock"])

In [None]:
TCS["Volume_Shock"] = ((((abs(TCS["prev_day"]-TCS["Volume"]))/TCS["Volume"])*100)>10).astype(int)
print(TCS["Volume_Shock"])

In [None]:
INFY["Volume_Shock"] = ((((abs(INFY["prev_day"]-INFY["Volume"]))/INFY["Volume"])*100)>10).astype(int)
print(INFY["Volume_Shock"])

#### 0/1 dummy-coded time series for direction of shock

In [None]:
# Calculating direction volume shock for each share
def direction_shock(stock_name):
    if(stock_name["Volume_Shock"]==1):
        if(stock_name["Volume"]-stock_name["prev_day"]>0):
            return 1
        else:
            return 0
    else:
        return "NaN"

In [None]:
# putting NaN where volume shock is 0
SBIN["dir_shock"] = 'NaN'
SBIN["dir_shock"] = SBIN.apply(direction_shock,axis=1)
print(SBIN["dir_shock"])

In [None]:
# putting NaN where volume shock is 0
TCS["dir_shock"] = 'NaN'
TCS["dir_shock"] = TCS.apply(direction_shock,axis=1)
print(TCS["dir_shock"])

In [None]:
# putting NaN where volume shock is 0
INFY["dir_shock"] = 'NaN'
INFY["dir_shock"] = INFY.apply(direction_shock,axis=1)
print(INFY["dir_shock"])

 ### 3.1 Price shocks and Price black swan(same)

#### 0/1 boolean time series for shock

In [None]:
#extra column for previous day closing price
SBIN["prev_day_close"] = SBIN.Close.shift(-1)
TCS["prev_day_close"] = TCS.Close.shift(-1)
INFY["prev_day_close"] = INFY.Close.shift(-1)


In [None]:
SBIN["Close_price_shock"] = ((((abs(SBIN["prev_day_close"]-SBIN["Close"]))/SBIN["Close"])*100)>2).astype(int)
print(SBIN["Close_price_shock"])

In [None]:
TCS["Close_price_shock"] = ((((abs(TCS["prev_day_close"]-TCS["Close"]))/TCS["Close"])*100)>2).astype(int)
print(TCS["Close_price_shock"])

In [None]:
INFY["Close_price_shock"] = ((((abs(INFY["prev_day_close"]-INFY["Close"]))/INFY["Close"])*100)>2).astype(int)
print(INFY["Close_price_shock"])

#### 0/1 dummy-coded time series for direction of shock

In [None]:
def direction_close_shock(stock_name):
    if(stock_name["Close_price_shock"]==1):
        if(stock_name["Close"]-stock_name["prev_day_close"]>0):
            return 1
        else:
            return 0
    else:
        return "Nan"

In [None]:
SBIN["dir_shock_price"] = 'Nan'
SBIN["dir_shock_price"] = SBIN.apply(direction_close_shock,axis=1)
print(SBIN["dir_shock_price"])

In [None]:
TCS["dir_shock_price"] = 'Nan'
TCS["dir_shock_price"] = TCS.apply(direction_close_shock,axis=1)
print(TCS["dir_shock_price"])

In [None]:
INFY["dir_shock_price"] = 'Nan'
INFY["dir_shock_price"] = INFY.apply(direction_close_shock,axis=1)
print(INFY["dir_shock_price"])

### Pricing shock without volume shock

In [None]:
SBIN["notVolShock"]  = (~(SBIN["Volume_Shock"].astype(bool))).astype(int)
SBIN["Pshock_w/o_volShock"] =  (SBIN["notVolShock"] & SBIN["dir_shock_price"]).astype(int)
print(SBIN["Pshock_w/o_volShock"])

In [None]:
TCS["notVolShock"]  = (~(TCS["Volume_Shock"].astype(bool))).astype(int)
TCS["Pshock_w/o_volShock"] =  (TCS["notVolShock"] & TCS["dir_shock_price"]).astype(int)
print(TCS["Pshock_w/o_volShock"])

In [None]:
INFY["notVolShock"]  = (~(INFY["Volume_Shock"].astype(bool))).astype(int)
INFY["Pshock_w/o_volShock"] =  (INFY["notVolShock"] & INFY["dir_shock_price"]).astype(int)
print(INFY["Pshock_w/o_volShock"])

# Part 2 (data visualization ):

In [None]:
# Importing plotting libraries
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.palettes import Spectral11, colorblind, Inferno, BuGn, brewer,GnBu,Blues
from bokeh.models import HoverTool, value, LabelSet, Legend, ColumnDataSource,LinearColorMapper,BasicTicker, PrintfTickFormatter, ColorBar


In [None]:
output_notebook()

In [None]:
def bokeh_visuals(stock):
    fig = figure(x_axis_type="datetime")
    fig.line(stock.index, stock['Close'], color='blue', alpha=0.5)

    # fig.line(sbin.index[2:10],sbin['Close'],color='red',alpha=0.5)
    # flag = False
    # last_i = 0
    # segments = []
    # for i in range(len(sbin["Volume_Shock"])):
    #     if(sbin["Volume_Shock"][i] and flag):
    # #         fig.line(sbin.index[last_i:i], sbin['Close'], color='red', alpha=0.5)
    #         segments.append((last_i,i))
    #         flag = False
    #     elif(sbin["Volume_Shock"][i]):
    #         last_i = i
    #         flag = True
    # fig.segment(x0=sbin["Close"],x1=sbin["Close"],y0=segments[0],y1=segments[1])
    fig.circle(stock.index, stock.Close*stock["Pshock_w/o_volShock"], size=4, legend='price shock without vol shock')
    show(fig)

In [None]:
bokeh_visuals(SBIN)

In [None]:
bokeh_visuals(TCS)

In [None]:
bokeh_visuals(INFY)

In [None]:
from statsmodels.tsa.stattools import acf, pacf

def draw_pacf(stock):
    
    lags = 50

    x = list(range(lags))

    p = figure(plot_height=500, title="Partial Autocorrelation PLot {}" .format("SBIN"))

    partial_autocorr = pacf(stock["Close"], nlags=lags)
    p.vbar(x=x, top=partial_autocorr, width=0.9)
    show(p)

In [None]:
draw_pacf(SBIN)

In [None]:
draw_pacf(TCS)

In [None]:
draw_pacf(INFY)