In [61]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import pandas_datareader.data as web
import talib
from sklearn.preprocessing import StandardScaler

def get_data(ticker):
    start = dt.datetime(2009, 1, 1)
    end = dt.datetime.now()
    df = web.DataReader(ticker,'yahoo', start, end)
    df.to_csv(ticker+'.csv')
    
get_data('RELIANCE.NS')

In [62]:
pd.read_csv('RELIANCE.NS.csv').shape

(2634, 7)

In [63]:
def data_preprocessing(ticker):
    dataset = pd.read_csv('{}.csv'.format(ticker))
    dataset = dataset.dropna()
    dataset = dataset[['Open', 'High', 'Low', 'Close']]
    
    dataset['H-L'] = dataset['High'] - dataset['Low']
    dataset['O-C'] = dataset['Close'] - dataset['Open']
    
    dataset['ma_5'] = dataset['Close'].rolling(window=5).mean()
    dataset['ma_10'] = dataset['Close'].rolling(window=10).mean()
    
    dataset['EWMA_12'] = dataset['Close'].ewm(span=12).mean()

    dataset['std_5'] = dataset['Close'].rolling(window=5).std()
    dataset['std_10'] = dataset['Close'].rolling(window=10).std()
    
    dataset['RSI'] = talib.RSI(dataset['Close'].values, timeperiod = 14)
    dataset['Williams %R'] = talib.WILLR(dataset['High'].values, dataset['Low'].values, dataset['Close'].values, 7)
    dataset['SAR'] = talib.SAR(np.array(dataset['High']), np.array(dataset['Low']), 0.2, 0.2)
    dataset['ADX'] = talib.ADX(np.array(dataset['High']), np.array(dataset['Low']), np.array(dataset['Close']), 10)

    dataset['Price_Rise'] = np.where(dataset['Close'].shift(-1) > dataset['Close'], 1, 0)
    
    dataset = dataset.dropna()
    X = dataset.iloc[:, 4:-1]
    y = dataset.iloc[:, -1]
    
    split = int(len(dataset)*0.8)
    X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]
    
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    
    return X_train, X_test, y_train, y_test

data_preprocessing('RELIANCE.NS')

(array([[ 1.24990328, -0.25318244, -2.10975508, ...,  0.68610529,
         -2.25878472, -0.75096482],
        [ 0.58656108,  0.55909951, -2.05514846, ...,  1.00355479,
         -2.21475089, -0.60958484],
        [-0.1388872 , -0.17567447, -2.03235373, ...,  0.84622464,
         -2.13745601, -0.477388  ],
        ...,
        [ 0.43399882,  1.20707064,  4.37786509, ...,  1.46602741,
          4.12195834,  2.19843306],
        [ 1.51569103, -2.00175908,  4.46754708, ...,  0.64526713,
          4.21589455,  2.22327845],
        [ 0.36991899,  0.25526678,  4.48040733, ...,  0.52958064,
          4.60241203,  2.13800853]]),
 array([[-1.82774403e-02,  8.78433841e-01,  4.48033922e+00, ...,
          1.04266058e+00,  4.60241203e+00,  2.11142855e+00],
        [ 6.18674611e-01, -1.13057410e+00,  4.48231243e+00, ...,
         -5.76140063e-02,  4.24219651e+00,  2.03866469e+00],
        [ 3.43538047e-01,  1.24736872e+00,  4.49109003e+00, ...,
          1.29259046e+00,  4.30245686e+00,  2.02745390e+

In [64]:
from sklearn import svm
from collections import Counter

def svm_linear(ticker):
    X_train, X_test, y_train, y_test = data_preprocessing(ticker)
    clf = svm.SVC(kernel = 'linear')
    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('accuracy:',confidence)
    predictions = clf.predict(X_test)
    print('predicted class counts:',Counter(predictions))
    
svm_linear('RELIANCE.NS')    

accuracy: 0.51434034416826
predicted class counts: Counter({0: 469, 1: 54})


In [65]:
def svm_poly(ticker):
    X_train, X_test, y_train, y_test = data_preprocessing(ticker)
    clf = svm.SVC(kernel = 'poly')
    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('accuracy:',confidence)
    predictions = clf.predict(X_test)
    print('predicted class counts:',Counter(predictions))
    
svm_poly('RELIANCE.NS')   

accuracy: 0.5315487571701721
predicted class counts: Counter({1: 377, 0: 146})


In [66]:
def svm_rbf(ticker):
    X_train, X_test, y_train, y_test = data_preprocessing(ticker)
    clf = svm.SVC(kernel = 'rbf')
    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('accuracy:',confidence)
    predictions = clf.predict(X_test)
    print('predicted class counts:',Counter(predictions))
    
svm_rbf('RELIANCE.NS')  

accuracy: 0.5200764818355641
predicted class counts: Counter({0: 388, 1: 135})
