In [5]:
# imports
import numpy as np
import pandas as pd
import pickle

In [6]:
# initial data clean to get rid of na values
fnames = ['closeRussell1000', 'closeWilshire5000']

for fname in fnames:
    # read in data
    df = pd.read_csv(f'./data/{fname}.csv', index_col=0)
    # drop columns with no data
    df.dropna(axis=1, how='all', inplace=True)
    # format column names
    df.columns = [x.strip('.Close') for x in df.columns]
    # write clean data
    df.to_csv(f'./data/{fname}_clean.csv', index=False)
    df = None

In [7]:
# data processing to compute the delta array
fnames = ['closeRussell1000']

for fname in fnames:
    df = pd.read_csv(f'./data/{fname}_clean.csv')
    n, p = df.shape

    # compute returns from close prices
    df = df.pct_change()

    # predefine empty delta array
    delta = np.zeros((n, p * (p-1) // 2))

    # filling up delta array according to definition
    k = 0
    pair_names = list()

    for i in range(p-1):
        col = df.iloc[:, i]
        subset = df.iloc[:, i+1:]

        new_pairs = [col.name + '+' + sub_col for sub_col in subset.columns]
        num_new_pairs = len(new_pairs)
        pair_names += new_pairs

        diff = subset.sub(col, axis=0).values
        delta[:, k: k+num_new_pairs] = diff
        k += num_new_pairs

        col = subset = new_pairs = diff = None

    df = None
    print(f'Made the delta array for {fname}. Shape {delta.shape}')
    
    # export delta array
    with open(f'./data/{fname}_delta.pkl', 'wb') as f:
        pickle.dump(pd.DataFrame(delta, columns=pair_names), f)
    
    delta = pair_names = None

Made the delta array for closeRussell1000. Shape (2769, 459361)
