In [1]:
import numpy as np
import pandas as pd

In [2]:
np.random.seed(2019)

In [3]:
### Generate fake data-set of stock returns
pd.DataFrame(np.random.randn(1000,100)).to_csv("data.csv")

In [16]:
### Read data.csv
data = pd.DataFrame.from_csv("data.csv")

  


In [28]:
num_of_days   = data.shape[0]
num_of_stocks = data.shape[1]

In [49]:
### Finding top 3 pair trading strategy using z-score of compounding returns

## Before running it for the entire data frame we first run it for two columns
## Accessing columns
d1 = np.asarray(data.loc[:,str(0)])
d2 = np.asarray(data.loc[:,str(1)])

## long d1, short d2
d = d1 - d2

## compounding return index
processed_d = np.zeros(len(d))
processed_d[0] = 1 + d[0]

for i in range(1, len(d)):
    processed_d[i] = processed_d[(i-1)]*(1+d[i])
    
z_score_d = (max(processed_d) - np.mean(processed_d))/np.std(processed_d)
z_score_d

9.028293182853501

In [60]:
## Given 100 different stock returns we can use the above procedure to calculate z-score for pair trading strategy for all
## This procedure would give us 100*100 matrix
## More efficient way would be to run it only for upper diagonal elements and find maximum of absolute values

def z_score_from_cumulative_returns(a, b):
    d = a - b
    processed_d = np.zeros(len(d))
    processed_d[0] = 1 + d[0]
    for i in range(1, len(d)):
        processed_d[i] = processed_d[(i-1)]*(1+d[i])
    z_score_d = (max(processed_d) - np.mean(processed_d))/np.std(processed_d)
    return(z_score_d)

## test the function
#test_z = z_score_from_cumulative_returns(d1, d2)
#test_z

returns_of_pairs = np.zeros((num_of_stocks, num_of_stocks))

for i in range(num_of_stocks):
    for j in range((i+1), num_of_stocks):
        d1 = np.asarray(data.loc[:,str(i)])
        d2 = np.asarray(data.loc[:,str(j)])
        #print(i, " ", j)
        returns_of_pairs[i][j] = returns_of_pairs[i][j] + z_score_from_cumulative_returns(d1, d2)

In [61]:
returns_of_pairs

array([[ 0.        ,  9.02829318, 15.29889929, ..., 18.3919948 ,
        12.9178282 , 16.39251758],
       [ 0.        ,  0.        , 21.38905089, ..., 22.89546299,
        24.37765775, 21.9723015 ],
       [ 0.        ,  0.        ,  0.        , ..., 26.35094816,
        29.19788938,  7.99241458],
       ...,
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
        11.79325306, 17.06775045],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        , 24.09992918],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]])

In [62]:
## Constructing all the returns is simple since we have upper diagonal elements
## Note negative sign takes care of the order of long/short
pair_trading_strategy = returns_of_pairs - returns_of_pairs.T

In [63]:
pair_trading_strategy

array([[  0.        ,   9.02829318,  15.29889929, ...,  18.3919948 ,
         12.9178282 ,  16.39251758],
       [ -9.02829318,   0.        ,  21.38905089, ...,  22.89546299,
         24.37765775,  21.9723015 ],
       [-15.29889929, -21.38905089,   0.        , ...,  26.35094816,
         29.19788938,   7.99241458],
       ...,
       [-18.3919948 , -22.89546299, -26.35094816, ...,   0.        ,
         11.79325306,  17.06775045],
       [-12.9178282 , -24.37765775, -29.19788938, ..., -11.79325306,
          0.        ,  24.09992918],
       [-16.39251758, -21.9723015 ,  -7.99241458, ..., -17.06775045,
        -24.09992918,   0.        ]])

In [64]:
### Get largest indices

def get_largest_indices(arr, n):
    flat = arr.flatten()
    indices = np.argpartition(flat, -n)[-n:]
    indices = indices[np.argsort(-flat[indices])]
    return np.unravel_index(indices, arr.shape)

### get the best 3 pairs for trading strategy

best_pairs = get_largest_indices(pair_trading_strategy, 3)

In [68]:
long_in_pair  = best_pairs[0]
short_in_pair = best_pairs[1]

long_short_pairs = []

for i in range(len(long_in_pair)):
    long_short_pairs.append([long_in_pair[i], short_in_pair[i]])
    
long_short_pairs

[[16, 19], [6, 20], [27, 29]]