In [1]:
import pandas as pd
import numpy as np
import math
from nsepy import get_history
from datetime import date, timedelta
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

In [2]:
def get_pair_data(stock1, stock2, delta=300, end = date.today()):
    
    start = end - timedelta(delta)
    
    stk1 = get_history(symbol = stock1, start = start, end = end)
    stk2 = get_history(symbol = stock2, start = start, end = end)
    
    length = min(len(stk1),len(stk2))
    stk1 = stk1['Close'][-length:]
    stk2 = stk2['Close'][-length:]
    
    stk1 = stk1.rename('X')
    stk2 = stk2.rename('Y')
    data = pd.concat([stk1, stk2], axis=1)
    
    return data

In [3]:
def calculate_error_ratio(stk1, stk2):
    
    data = get_pair_data(stk1, stk2, 500)
    
    X = data['X']
    Y = data['Y']

    X = sm.add_constant(X)
    model = sm.OLS(Y, X).fit()
    
    std_err_intercept = model.bse[0]
    std_error = RSE(Y,model.predict(X))
    
    return std_err_intercept/std_error

In [4]:
def RSE(y_true, y_predicted):
    """
    - y_true: Actual values
    - y_predicted: Predicted values
    """
    y_true = np.array(y_true)
    y_predicted = np.array(y_predicted)
    RSS = np.sum(np.square(y_true - y_predicted))

    rse = math.sqrt(RSS / (len(y_true) - 2))
    return rse

In [5]:
def stock_by_error_ratio(stk_cmb):
    
    pair = pd.DataFrame(columns=['Sector','X','Y'])
    for i, row in stk_cmb.iterrows():
        
        print(len(stk_cmb)-i)
    
        err_ratio_1 = calculate_error_ratio(row['Stock1'], row['Stock2'])
        err_ratio_2 = calculate_error_ratio(row['Stock2'], row['Stock1'])
    
        if(err_ratio_1<err_ratio_2):
            pair.loc[len(pair)] = [row['Sector'], row['Stock1'], row['Stock2']]
        else:
            pair.loc[len(pair)] = [row['Sector'], row['Stock2'], row['Stock1']]
        
    return pair

In [6]:
def prepare_data():
    stk_cmb = pd.read_csv('Correlation_NSE_100.csv')
    fut_stks = pd.read_csv('Future_Stocks.csv')
    fut_stks = fut_stks.Symbol.tolist()

    stk_cmb = stk_cmb[(stk_cmb['Stock1'].isin(fut_stks)) & (stk_cmb['Stock2'].isin(fut_stks) & (stk_cmb['Corr_value']>.79))]

    stk_cmb = stk_cmb.reset_index(drop=True)
    
    return stk_cmb

In [7]:
#stk_cmb = prepare_data()

##Run to get the pair of stock with the lower error ratio

#stock_by_err = stock_by_error_ratio(stk_cmb)
#stock_by_err.to_csv('Pair_stock_lower_error_ratio.csv', index=False, header = True)

In [8]:
def return_adf_pvalue(X, Y):
    
    X = sm.add_constant(X)
    model = sm.OLS(Y, X).fit()
    
    res = Y-model.predict(X)
    
    result = adfuller(res, autolag='AIC')
    
    return result[1]

In [9]:
def store_adf_value(pair):
    
    pair['adf_test_pval'] = np.nan
    for i, row in pair.iterrows():
        print(len(pair)-i)
    
        data = get_pair_data(row['X'], row['Y'])
        pair.loc[i,'adf_test_pval'] = return_adf_pvalue(data['X'], data['Y'])
    return pair

In [10]:
pair = pd.read_csv('Pair_stock_lower_error_ratio.csv')
#pair = store_adf_value(pair)
#pair.to_csv('Pair_stock_lower_error_ratio.csv', index=False, header = True)

In [11]:
def check_daily_pair_trade(pair):
    
    for i,row in pair.iterrows():
        print(len(pair) - i)
    
        for j in range(2,-1,-1):
            data = get_pair_data(row['X'], row['Y'], 300, date.today()-timedelta(j))
            X = data['X']
            Y = data['Y']

            X = sm.add_constant(X)
            model = sm.OLS(Y, X).fit()
    
            rse = RSE(Y,model.predict(X))
    
            std_err = (Y[-1:]-model.predict(X[-1:]))/rse
        
            if (std_err[0]<=-2.5):
            
                print('Long:', row['Y'], '(', str(data['Y'][-1:][0]), ') Short:', row['X'],'(', str(data['X'][-1:][0]), ')')
                print(std_err)
                print('Buy 1 lot of',row['Y'], '& Sell', str(model.params[1]), '* Number of shares of Y of', row['X'])
                print('Beta:', str(model.params[1]), 'Intercept:', str(model.params[0]))
        
            elif(std_err[0]>=2.5):
            
                print('Long:', row['X'], '(', str(data['X'][-1:][0]), ') Short:', row['Y'],'(', str(data['Y'][-1:][0]), ')')
                print(std_err)
                print('Sell 1 lot of',row['Y'], '& Buy', str(model.params[1]), '* Number of shares of Y of', row['X'])
                print('Beta:', str(model.params[1]), 'Intercept:', str(model.params[0]))

In [12]:
pair = pair[pair['adf_test_pval']<=0.05]
pair = pair.reset_index(drop=True)

check_daily_pair_trade(pair)

15


  return ptp(axis=axis, out=out, **kwargs)


14
13
12
11
10
9
8
7
6
5
Long: PNB ( 27.75 ) Short: INDUSINDBK ( 619.1 )
Date
2020-10-07   -2.800218
dtype: float64
Buy 1 lot of PNB & Sell 0.030633819810783804 * Number of shares of Y of INDUSINDBK
Beta: 0.030633819810783804 Intercept: 17.387933621808603
Long: PNB ( 27.8 ) Short: INDUSINDBK ( 617.7 )
Date
2020-10-08   -2.706079
dtype: float64
Buy 1 lot of PNB & Sell 0.030685231983171364 * Number of shares of Y of INDUSINDBK
Beta: 0.030685231983171364 Intercept: 17.312506889394356
4
3
2
1
