In [163]:
import pandas as pd
import numpy as np
from datetime import date
from nsepy import get_history
import warnings
warnings.filterwarnings('ignore')

In [164]:
# lodaing dataset
def load_dataset(name, start_d, end_d):
    df = get_history(symbol = name,
                     start = start_d,
                     end = end_d)
    return df

In [165]:
# filiing/adding holiday dates
# assuming that the market doesn't change during holidays
def fill_holiday(df, start_d, end_d):
    idx = pd.date_range(start_d, end_d)
    df = df.reindex(idx, fill_value=0)
    df['Is_Holiday'] = df['Symbol'].apply(lambda x: 1 if(x==0) else 0)
    temp_d = 0
    in_p = int(df.shape[1]-1)
    for i in idx:
        if df.loc[i].Symbol != 0:
            temp_d = i
        else:
            df.loc[i,:in_p] = df.loc[temp_d]
    return df

In [166]:
# Part 1.1: moving average(closing price)
def moving_avg(df, week_list):
    for i in week_list:
        df['MA_'+str(i)] = df.Close.rolling(i*7).mean()
    return df

In [167]:
# Part 1.2: rolling window(closing price)
def roll_window(df, window_size_list):
    for i in window_size_list:
        df['roll_'+str(i)] = df.Close.shift(i)
    return df

In [168]:
# Part 1.3: dummy time series
def add_dummy(df):
    df['volume_shift_1'] = df.Volume.shift(1)
    # 0-if(10% higher or 10% lower), 1
    df['Volume_shocks'] = df.apply(lambda x: 0 if((x.Volume==1.1*x.volume_shift_1)or(x.Volume==0.9*x.volume_shift_1)) else 1, axis='columns')
    # 0-upward, 1-downward
    df['Volume_shocks_direction'] = df.apply(lambda x: 0 if(x.Volume>=x.volume_shift_1) else 1, axis='columns')
    
    df['Close_shift_1'] = df.Close.shift(-1)
    # 0->2%, 1
    df['Price_shocks'] = df.apply(lambda x: 0 if(abs(x.Close-x.Close_shift_1)>0.02*x.Close) else 1, axis='columns')
    # 0-upward, 1-downward
    df['Price_shocks_direction'] = df.apply(lambda x: 0 if(x.Close<x.Close_shift_1) else 1, axis='columns')
    
    # 0->2%, 1
    df['Pricing_black_swan'] = df.apply(lambda x: 0 if(abs(x.Close-x.Close_shift_1)>0.02*x.Close) else 1, axis='columns')
    # 0-upward, 1-downward
    df['Pricing_black_swan_direction'] = df.apply(lambda x: 0 if(x.Close<x.Close_shift_1) else 1, axis='columns')
    
    df['Pricing_shock_without_volume_shock'] = df.apply(lambda x: 0 if(x.Volume_shocks==0 and x.Price_shocks==0) else 1, axis='columns')
    return df

In [178]:
# financial year date
start_date = date(2015,4,1)
end_date = date(2016,3,31)

In [179]:
# 'INFY'
infy = load_dataset('INFY', start_date, end_date)
infy.head()

Unnamed: 0_level_0,Symbol,Series,Prev Close,Open,High,Low,Last,Close,VWAP,Volume,Turnover,Trades,Deliverable Volume,%Deliverble
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2015-04-01,INFY,EQ,2218.35,2198.9,2199.5,2157.7,2171.0,2173.95,2171.15,1684622,365756000000000.0,90934,1187902,0.7051
2015-04-06,INFY,EQ,2173.95,2179.0,2184.7,2160.4,2179.95,2178.2,2171.24,1935438,420229400000000.0,54198,1529651,0.7903
2015-04-07,INFY,EQ,2178.2,2175.0,2182.8,2138.0,2161.0,2159.5,2160.92,2135567,461478600000000.0,71104,1610234,0.754
2015-04-08,INFY,EQ,2159.5,2182.5,2204.9,2173.05,2200.0,2200.55,2191.4,2546394,558017900000000.0,84366,2009389,0.7891
2015-04-09,INFY,EQ,2200.55,2211.4,2218.9,2184.4,2212.0,2215.0,2202.96,2645387,582767200000000.0,65171,2202202,0.8325


In [180]:
# handel holidays
# closing and previous close are same
infy = fill_holiday(infy, start_date, end_date)

In [183]:
# moving avg
weeks = [4,16,28,40,52]
infy_ma = moving_avg(infy[['Is_Holiday','Close']], weeks)
infy_ma

Unnamed: 0,Is_Holiday,Close,MA_4,MA_16,MA_28,MA_40,MA_52
2015-04-01,0,2173.95,,,,,
2015-04-02,1,2173.95,,,,,
2015-04-03,1,2173.95,,,,,
2015-04-04,1,2173.95,,,,,
2015-04-05,1,2173.95,,,,,
2015-04-06,0,2178.20,,,,,
2015-04-07,0,2159.50,,,,,
2015-04-08,0,2200.55,,,,,
2015-04-09,0,2215.00,,,,,
2015-04-10,0,2233.95,,,,,


In [185]:
# roll window
window_size = [10,20,30,40,50,60,70,75]
infy_rw = roll_window(infy[['Is_Holiday','Close']],window_size)
infy_rw

Unnamed: 0,Is_Holiday,Close,roll_10,roll_20,roll_30,roll_40,roll_50,roll_60,roll_70,roll_75
2015-04-01,0,2173.95,,,,,,,,
2015-04-02,1,2173.95,,,,,,,,
2015-04-03,1,2173.95,,,,,,,,
2015-04-04,1,2173.95,,,,,,,,
2015-04-05,1,2173.95,,,,,,,,
2015-04-06,0,2178.20,,,,,,,,
2015-04-07,0,2159.50,,,,,,,,
2015-04-08,0,2200.55,,,,,,,,
2015-04-09,0,2215.00,,,,,,,,
2015-04-10,0,2233.95,,,,,,,,


In [186]:
# create dummy
infy_dummy = add_dummy(infy[['Volume','Close']])
infy_dummy

Unnamed: 0,Volume,Close,volume_shift_1,Volume_shocks,Volume_shocks_direction,Close_shift_1,Price_shocks,Price_shocks_direction,Pricing_black_swan,Pricing_black_swan_direction,Pricing_shock_without_volume_shock
2015-04-01,1684622,2173.95,,1,1,2173.95,1,1,1,1,1
2015-04-02,1684622,2173.95,1684622.0,1,0,2173.95,1,1,1,1,1
2015-04-03,1684622,2173.95,1684622.0,1,0,2173.95,1,1,1,1,1
2015-04-04,1684622,2173.95,1684622.0,1,0,2173.95,1,1,1,1,1
2015-04-05,1684622,2173.95,1684622.0,1,0,2178.20,1,0,1,0,1
2015-04-06,1935438,2178.20,1684622.0,1,0,2159.50,1,1,1,1,1
2015-04-07,2135567,2159.50,1935438.0,1,0,2200.55,1,0,1,0,1
2015-04-08,2546394,2200.55,2135567.0,1,0,2215.00,1,0,1,0,1
2015-04-09,2645387,2215.00,2546394.0,1,0,2233.95,1,0,1,0,1
2015-04-10,1780944,2233.95,2645387.0,1,1,2233.95,1,1,1,1,1


In [171]:
# similarly for other datasets

In [188]:
# TCS
tcs = load_dataset('TCS', start_date, end_date)

# handel holidays
# closing and previous close are same
tcs = fill_holiday(tcs, start_date, end_date)

# moving avg
weeks = [4,16,28,40,52]
tcs_ma = moving_avg(tcs[['Is_Holiday','Close']], weeks)

# roll window
window_size = [10,20,30,40,50,60,70,75]
tcs_rw = roll_window(tcs[['Is_Holiday','Close']],window_size)

# create dummy
tcs_dummy = add_dummy(tcs[['Volume','Close']])