In [112]:
import pandas as pd
import numpy as np
from sklearn import svm
import sklearn.preprocessing as skp

In [113]:
#Creates Lagged series
#Goes through a series and generates an lag+1 dimensional pandas DataFrame that has each previous lag timeunit
#as a column and current as the last cobilumn
#Input: Pandas Series
#Output: lag+1 dimensional DataFrame

def timeseriesLagged(data, lag=60):
    df = data
    columns = [df.shift(i) for i in range(1, lag+2)] 
    df = pd.concat(columns,axis=1)
    df.fillna(0, inplace=True)
    df.columns = [str(lag+2-x) for x in range(1,lag+2)]
    # df.reset_index(inplace=True,drop=False)
    df = df[df.columns[::-1]] #Flip because we want newer data on the right
    df= df.iloc[lag+1:] # drop the first 'lag' columns because zeroes.
    df.reset_index(drop=True,inplace=True)
    return df

# Binarizes the last column into 1, 0, -1. 1 = buy 0 = do nothing -1 = sell
# Rate is the percent increase or decrease that should trigger a buy or a sell
# lag is the time unit of lag. 
# atleast is how many of the lookahead need to be atleast the same or greater than flat+rat
# Input: lagged pandas DataFrame, uint lag, double dif, double flat, double atleast between 0 and 1
# Output : Pandas Dataframe with last column binarized
def binarizeTime(resLagged,rate = 0,lookahead = 0, flat = 0,atleast = 0.5):
    if lookahead <= 0 :
        raise Exception("lookahead Must be 1 or higher!")
    resLagged = resLagged.copy() # Make a deep copy
    last = np.shape(resLagged)[1] # find the length of the data 
    last = last-lookahead # convert it to string for loc
    colsLookAhead = list(resLagged.loc[:,str(last+1):str(last + lookahead)])
    colsLast = resLagged[str(last)]
    diffs = resLagged[colsLookAhead].subtract(colsLast,axis=0)
#     print(diffs)
    greater = diffs>=flat  # all the times the price changed higer than flat
    greater = np.count_nonzero(greater,axis=1).reshape((1,-1))
    lesser = diffs<=-flat # all the times the price fell lower than fat
    lesser = np.count_nonzero(lesser,axis=1).reshape((1,-1))
#     return greater,lesser
#     print(greater)
    greater = greater.reshape(1,-1)
    changeToBuy = np.any(greater > lesser & np.greater(greater,atleast*lookahead),axis=0) # make sure more rises than falls and atleast half rises
    changeToSell = np.any(lesser > greater & np.greater(lesser,atleast*lookahead),axis=0)      # make sure more falls than rises and atleast half rises
    changeToHold = ~changeToBuy & ~changeToSell
    resLagged = resLagged.drop(colsLookAhead,1)
    resLagged.loc[changeToSell,str(last+1)] = -1 # Set sell to -1
    resLagged.loc[changeToBuy,str(last+1)] = 1 # Set buy to 1
    resLagged.loc[changeToHold,str(last+1)] = 0 # Set to 0
    return resLagged

In [114]:
# Nifty. Reading and Cleaning
fut = pd.read_csv("Nifty50FUT.csv")
fut['Price']= fut['Price'].str.replace(",","").astype(np.double)
fut['Open']= fut['Open'].str.replace(",","").astype(np.double)
fut['High']= fut['High'].str.replace(",","").astype(np.double)
fut['Low']= fut['Low'].str.replace(",","").astype(np.double)
fut['Vol.']= fut['Vol.'].str.replace(",","").str.replace("M","e6").str.replace("-","0").str.replace("K","e3").astype(np.double)
fut = fut[::-1]
fut.reset_index(inplace=True, drop = True)

nifty = pd.read_csv("Nifty.csv")
nifty = nifty[::-1]
nifty.reset_index(inplace=True, drop = True)
nifty['Price']= nifty['Price'].str.replace(",","").astype(np.double)
nifty['Open']= nifty['Open'].str.replace(",","").astype(np.double)
nifty['High']= nifty['High'].str.replace(",","").astype(np.double)
nifty['Low']= nifty['Low'].str.replace(",","").astype(np.double)
nifty['Vol.']= nifty['Vol.'].str.replace(",","").str.replace("M","e6").str.replace("-","0").str.replace("K","e3").astype(np.double)


niftyDrop = np.setdiff1d(fut['Date'].values,nifty['Date'].values)
futDrop = np.setdiff1d(nifty['Date'].values,fut['Date'].values)
nifty = nifty[~nifty.Date.isin(futDrop)]
fut = fut[~fut.Date.isin(niftyDrop)]

In [115]:
# Nifty Base
volNifty = nifty['Vol.'].diff().dropna()
openNifty = nifty['Open'].diff().dropna()
highNifty = nifty['High'].diff().dropna()
lowNifty = nifty['Low'].diff().dropna()
dataNifty = nifty['Price'].diff().dropna()

# Nifty Futs
volFut = fut['Vol.'].diff().dropna()
openFut = fut['Open'].diff().dropna()
highFut = fut['High'].diff().dropna()
lowFut = fut['Low'].diff().dropna()
dataFut = fut['Price'].diff().dropna()
#Future Premium
prems = dataFut.values - dataNifty.values
prems = pd.Series(prems)

In [116]:
lag=14
lookahead = 1
flat = 1
# First N predicts N+1th. Creating the 1st N series
closeNifty = timeseriesLagged(dataNifty,lag + lookahead-1)
# These are correlated with closeNifty, so we will ignore them for now
# openNifty = timeseriesLagged(openNifty,lag + lookahead-1).drop(str(lag+1),axis=1)
# highNifty = timeseriesLagged(highNifty,lag + lookahead-1).drop(str(lag+1),axis=1)
# lowNifty = timeseriesLagged(lowNifty,lag + lookahead-1).drop(str(lag+1),axis=1)

volNifty = timeseriesLagged(volNifty,lag + lookahead-1).drop(str(lag+1),axis=1)
volNifty = skp.scale(volNifty,axis=1)

# First N predicts N+1th. Creating the 1st N series
closeFut = timeseriesLagged(dataFut,lag + lookahead-1).drop(str(lag+1),axis=1)
closeFut = skp.scale(closeFut,axis=1)

# These are correlated with closeNifty, so we will ignore them for now
# openNifty = timeseriesLagged(openNifty,lag + lookahead-1).drop(str(lag+1),axis=1)
# highNifty = timeseriesLagged(highNifty,lag + lookahead-1).drop(str(lag+1),axis=1)
# lowNifty = timeseriesLagged(lowNifty,lag + lookahead-1).drop(str(lag+1),axis=1)
volFut = timeseriesLagged(volFut,lag + lookahead-1).drop(str(lag+1),axis=1)
volFut = skp.scale(volFut,axis=1)

prems = timeseriesLagged(prems,lag + lookahead-1).drop(str(lag+1),axis=1)
prems = skp.scale(prems,axis=1)





In [117]:
closeNifty.loc[closeNifty[str(lag+1)] > flat,str(lag+1)] = 1
closeNifty.loc[closeNifty[str(lag+1)] < flat,str(lag+1)] = -1
closeNifty.loc[closeNifty[str(lag+1)] == flat,str(lag+1)] = 0


In [118]:

buySeriesLabs = closeNifty[str(lag+1)] # labels
buySeriesFeats = closeNifty.drop(str(lag+1),axis=1) #features
buySeriesFeats = buySeriesFeats.values
buySeriesFeats = skp.scale(buySeriesFeats,axis=1)

In [120]:
buySeriesFeats.shape

(2626, 14)

In [169]:
futVals = np.array([
           fut.Open.values.astype(np.double), 
           fut.High.values.astype(np.double),
           fut.Low.values.astype(np.double),
           fut.Price.values.astype(np.double),
           fut['Vol.'].values.astype(np.double)
                   ])
niftyVals = np.array([
             nifty.Open.values.astype(np.double),
             nifty.High.values.astype(np.double),
             nifty.Low.values.astype(np.double),
             nifty.Price.values.astype(np.double),
             nifty['Vol.'].values.astype(np.double)
                ])

In [176]:
premiums = futVals[0:4]-niftyVals[0:4]

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,6179.40,6178.55,6274.30,6279.10,6287.85,6272.00,6156.95,6200.10,6206.80,6074.25,5935.75,5913.20,5705.30,5208.80,4899.30
1,6178.55,6274.30,6279.10,6287.85,6272.00,6156.95,6200.10,6206.80,6074.25,5935.75,5913.20,5705.30,5208.80,4899.30,5203.40
2,6274.30,6279.10,6287.85,6272.00,6156.95,6200.10,6206.80,6074.25,5935.75,5913.20,5705.30,5208.80,4899.30,5203.40,5033.45
3,6279.10,6287.85,6272.00,6156.95,6200.10,6206.80,6074.25,5935.75,5913.20,5705.30,5208.80,4899.30,5203.40,5033.45,5383.35
4,6287.85,6272.00,6156.95,6200.10,6206.80,6074.25,5935.75,5913.20,5705.30,5208.80,4899.30,5203.40,5033.45,5383.35,5274.10
5,6272.00,6156.95,6200.10,6206.80,6074.25,5935.75,5913.20,5705.30,5208.80,4899.30,5203.40,5033.45,5383.35,5274.10,5280.80
6,6156.95,6200.10,6206.80,6074.25,5935.75,5913.20,5705.30,5208.80,4899.30,5203.40,5033.45,5383.35,5274.10,5280.80,5167.60
7,6200.10,6206.80,6074.25,5935.75,5913.20,5705.30,5208.80,4899.30,5203.40,5033.45,5383.35,5274.10,5280.80,5167.60,5137.45
8,6206.80,6074.25,5935.75,5913.20,5705.30,5208.80,4899.30,5203.40,5033.45,5383.35,5274.10,5280.80,5167.60,5137.45,5317.25
9,6074.25,5935.75,5913.20,5705.30,5208.80,4899.30,5203.40,5033.45,5383.35,5274.10,5280.80,5167.60,5137.45,5317.25,5463.50


Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,"Jan 02, 2008",6179.40,6144.70,6197.00,6060.85,110.35M,0.57%
1,"Jan 03, 2008",6178.55,6184.25,6230.15,6126.40,156.36M,-0.01%
2,"Jan 04, 2008",6274.30,6179.10,6300.05,6179.10,147.63M,1.55%
3,"Jan 07, 2008",6279.10,6271.00,6289.80,6193.35,139.61M,0.08%
4,"Jan 08, 2008",6287.85,6282.45,6357.10,6221.60,162.56M,0.14%
5,"Jan 09, 2008",6272.00,6287.55,6338.30,6231.25,120.85M,-0.25%
6,"Jan 10, 2008",6156.95,6278.10,6347.00,6142.90,133.66M,-1.83%
7,"Jan 11, 2008",6200.10,6166.65,6224.20,6112.55,125.14M,0.70%
8,"Jan 14, 2008",6206.80,6208.80,6244.15,6172.00,110.91M,0.11%
9,"Jan 15, 2008",6074.25,6226.35,6260.45,6053.30,117.68M,-2.14%
