In [112]:
import pandas as pd
import numpy as np
from sklearn import svm
import sklearn.preprocessing as skp

In [122]:
#Creates Lagged series
#Goes through a series and generates an lag+1 dimensional pandas DataFrame that has each previous lag timeunit
#as a column and current as the last cobilumn
#Input: Pandas Series
#Output: lag+1 dimensional DataFrame

def timeseriesLagged(data, lag=60):
    df = data
    columns = [df.shift(i) for i in range(1, lag+2)] 
    df = pd.concat(columns,axis=1)
    df.fillna(0, inplace=True)
    df.columns = [str(lag+2-x) for x in range(1,lag+2)]
    # df.reset_index(inplace=True,drop=False)
    df = df[df.columns[::-1]] #Flip because we want newer data on the right
    df= df.iloc[lag+1:] # drop the first 'lag' columns because zeroes.
    df.reset_index(drop=True,inplace=True)
    return df

# Binarizes the last column into 1, 0, -1. 1 = buy 0 = do nothing -1 = sell
# Rate is the percent increase or decrease that should trigger a buy or a sell
# lag is the time unit of lag. 
# atleast is how many of the lookahead need to be atleast the same or greater than flat+rat
# Input: lagged pandas DataFrame, uint lag, double dif, double flat, double atleast between 0 and 1
# Output : Pandas Dataframe with last column binarized
def binarizeTime(resLagged,rate = 0,lookahead = 0, flat = 0,atleast = 0.5):
    if lookahead <= 0 :
        raise Exception("lookahead Must be 1 or higher!")
    resLagged = resLagged.copy() # Make a deep copy
    last = np.shape(resLagged)[1] # find the length of the data 
    last = last-lookahead # convert it to string for loc
    colsLookAhead = list(resLagged.loc[:,str(last+1):str(last + lookahead)])
    colsLast = resLagged[str(last)]
    diffs = resLagged[colsLookAhead].subtract(colsLast,axis=0)
#     print(diffs)
    greater = diffs>=flat  # all the times the price changed higer than flat
    greater = np.count_nonzero(greater,axis=1).reshape((1,-1))
    lesser = diffs<=-flat # all the times the price fell lower than fat
    lesser = np.count_nonzero(lesser,axis=1).reshape((1,-1))
#     return greater,lesser
#     print(greater)
    greater = greater.reshape(1,-1)
    changeToBuy = np.any(greater > lesser & np.greater(greater,atleast*lookahead),axis=0) # make sure more rises than falls and atleast half rises
    changeToSell = np.any(lesser > greater & np.greater(lesser,atleast*lookahead),axis=0)      # make sure more falls than rises and atleast half rises
    changeToHold = ~changeToBuy & ~changeToSell
    resLagged = resLagged.drop(colsLookAhead,1)
    resLagged.loc[changeToSell,str(last+1)] = -1 # Set sell to -1
    resLagged.loc[changeToBuy,str(last+1)] = 1 # Set buy to 1
    resLagged.loc[changeToHold,str(last+1)] = 0 # Set to 0
    return resLagged

In [123]:
# Nifty. Reading and Cleaning
fut = pd.read_csv("Nifty50FUT.csv")
fut['Price']= fut['Price'].str.replace(",","").astype(np.double)
fut['Open']= fut['Open'].str.replace(",","").astype(np.double)
fut['High']= fut['High'].str.replace(",","").astype(np.double)
fut['Low']= fut['Low'].str.replace(",","").astype(np.double)
fut['Vol.']= fut['Vol.'].str.replace(",","").str.replace("M","e6").str.replace("-","0").str.replace("K","e3").astype(np.double)
fut = fut[::-1]
fut.reset_index(inplace=True, drop = True)

nifty = pd.read_csv("Nifty.csv")
nifty = nifty[::-1]
nifty.reset_index(inplace=True, drop = True)
nifty['Price']= nifty['Price'].str.replace(",","").astype(np.double)
nifty['Open']= nifty['Open'].str.replace(",","").astype(np.double)
nifty['High']= nifty['High'].str.replace(",","").astype(np.double)
nifty['Low']= nifty['Low'].str.replace(",","").astype(np.double)
nifty['Vol.']= nifty['Vol.'].str.replace(",","").str.replace("M","e6").str.replace("-","0").str.replace("K","e3").astype(np.double)


niftyDrop = np.setdiff1d(fut['Date'].values,nifty['Date'].values)
futDrop = np.setdiff1d(nifty['Date'].values,fut['Date'].values)
nifty = nifty[~nifty.Date.isin(futDrop)]
fut = fut[~fut.Date.isin(niftyDrop)]

In [124]:
# Nifty Base
volNifty = nifty['Vol.'].diff().dropna()
openNifty = nifty['Open'].diff().dropna()
highNifty = nifty['High'].diff().dropna()
lowNifty = nifty['Low'].diff().dropna()
dataNifty = nifty['Price'].diff().dropna()

# Nifty Futs
volFut = fut['Vol.'].diff().dropna()
openFut = fut['Open'].diff().dropna()
highFut = fut['High'].diff().dropna()
lowFut = fut['Low'].diff().dropna()
dataFut = fut['Price'].diff().dropna()
#Future Premium
prems = dataFut.values - dataNifty.values
prems = pd.Series(prems)

In [125]:
lag=14
lookahead = 1
flat = 1
# First N predicts N+1th. Creating the 1st N series
closeNifty = timeseriesLagged(dataNifty,lag + lookahead-1)
# These are correlated with closeNifty, so we will ignore them for now
# openNifty = timeseriesLagged(openNifty,lag + lookahead-1).drop(str(lag+1),axis=1)
# highNifty = timeseriesLagged(highNifty,lag + lookahead-1).drop(str(lag+1),axis=1)
# lowNifty = timeseriesLagged(lowNifty,lag + lookahead-1).drop(str(lag+1),axis=1)

volNifty = timeseriesLagged(volNifty,lag + lookahead-1).drop(str(lag+1),axis=1)
volNifty = skp.scale(volNifty,axis=1)

# First N predicts N+1th. Creating the 1st N series
closeFut = timeseriesLagged(dataFut,lag + lookahead-1).drop(str(lag+1),axis=1)
closeFut = skp.scale(closeFut,axis=1)

# These are correlated with closeNifty, so we will ignore them for now
# openNifty = timeseriesLagged(openNifty,lag + lookahead-1).drop(str(lag+1),axis=1)
# highNifty = timeseriesLagged(highNifty,lag + lookahead-1).drop(str(lag+1),axis=1)
# lowNifty = timeseriesLagged(lowNifty,lag + lookahead-1).drop(str(lag+1),axis=1)
volFut = timeseriesLagged(volFut,lag + lookahead-1).drop(str(lag+1),axis=1)
volFut = skp.scale(volFut,axis=1)

prems = timeseriesLagged(prems,lag + lookahead-1).drop(str(lag+1),axis=1)
prems = skp.scale(prems,axis=1)





In [117]:
closeNifty.loc[closeNifty[str(lag+1)] > flat,str(lag+1)] = 1
closeNifty.loc[closeNifty[str(lag+1)] < flat,str(lag+1)] = -1
closeNifty.loc[closeNifty[str(lag+1)] == flat,str(lag+1)] = 0


In [127]:

buySeriesLabs = closeNifty[str(lag+1)] # labels
buySeriesFeats = closeNifty.drop(str(lag+1),axis=1) #features
buySeriesFeats = buySeriesFeats.values
buySeriesFeats = skp.scale(buySeriesFeats,axis=1)

buySeries = np.zeros((len(buySeriesFeats),buySeriesFeats.shape[-1],5))
buySeries[:,:,0] = buySeriesFeats
buySeries[:,:,1] = volFut
buySeries[:,:,2] = prems
buySeries[:,:,3] = closeFut
buySeries[:,:,4] = volNifty



In [128]:
# x,y = shuffle(buySeries,buySeriesLabs)
x,y = buySeries,buySeriesLabs
tot = len(x)
y = y.values
yOrig = np.copy(y)
y = np_utils.to_categorical(y,3)
trainPercent = 0.9 # majority of data used for training
testPercent = 0.9 # 
valPercent = 1.00  #

# Test Train Val Split

xTrain = x[0:int(trainPercent*tot),:,:]
yTrain = y[0:int(trainPercent*tot)]

xTest = x[int(trainPercent*tot): int(testPercent*tot),:,:]
yTest = y[int(trainPercent*tot): int(testPercent*tot)]

xVal = x[int(testPercent*tot):,:,:]
yVal = y[int(testPercent*tot):]

NameError: name 'np_utils' is not defined