# AlphaNetPro

## Import packages

In [1]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import shap

from tensorflow.keras import models, layers
from itertools import combinations

## Create synthetic data

In [2]:
ndays = 300  # Number of days
nfeats = 5   # Number of features

raw_data = dict()
maxlen = len(str(ndays - 1))
for i in range(ndays):
    key = 'T' + f'{i}'.zfill(maxlen)
    raw_data[key] = np.random.random(nfeats)
features = [f'F{i}' for i in range(nfeats)]

df = pd.DataFrame(raw_data, index=features)

In [3]:
df

Unnamed: 0,T000,T001,T002,T003,T004,T005,T006,T007,T008,T009,...,T290,T291,T292,T293,T294,T295,T296,T297,T298,T299
F0,0.561847,0.556159,0.373207,0.877459,0.665455,0.177736,0.021531,0.442589,0.227059,0.42207,...,0.049667,0.602843,0.580337,0.936373,0.240154,0.552995,0.032935,0.634336,0.804172,0.309535
F1,0.936933,0.083131,0.786935,0.196082,0.747721,0.5171,0.428441,0.53168,0.889945,0.090909,...,0.815135,0.401051,0.221881,0.780806,0.231722,0.470978,0.256339,0.570805,0.368559,0.335695
F2,0.260988,0.672641,0.939384,0.333104,0.236288,0.727627,0.940672,0.222917,0.565081,0.286562,...,0.425886,0.780106,0.985521,0.908501,0.165153,0.073489,0.507545,0.578147,0.299942,0.187873
F3,0.565483,0.99978,0.689166,0.172858,0.049088,0.034513,0.946713,0.970514,0.127533,0.92826,...,0.818316,0.933305,0.531704,0.499935,0.492364,0.392439,0.595098,0.893342,0.245649,0.709278
F4,0.668637,0.539143,0.393539,0.604186,0.569365,0.075279,0.934851,0.623224,0.239556,0.674519,...,0.130948,0.323797,0.52814,0.800513,0.046259,0.179088,0.483346,0.547428,0.719714,0.377944


## Function to Generate Samples

In [34]:
# rtdays=5 means the target value would be the 5-day return
def generate_samples(data, rtdays=5, width=30, strides=1):
    dates = []
    samples = []
    targets = []
    
    i = data.shape[1] - 1 - rtdays
    
    while i - width + 1 >= 0:
        rt = (data.iloc[-1,i+rtdays] - data.iloc[-1,i]) / data.iloc[-1,i]
        
        dates.append(data.columns[i])
        samples.append(data.iloc[:,i-width+1:i+1])
        targets.append(rt)
        
        i -= strides
    
    dates.reverse()
    samples.reverse()
    targets.reverse()
    
    return dates, samples, targets

In [35]:
dates, samples, targets = generate_samples(df, strides=2)

In [36]:
len(dates)

133

In [37]:
len(samples)

133

In [38]:
len(targets)

133

In [39]:
dates[:10]

['T030',
 'T032',
 'T034',
 'T036',
 'T038',
 'T040',
 'T042',
 'T044',
 'T046',
 'T048']

In [40]:
dates[-10:]

['T276',
 'T278',
 'T280',
 'T282',
 'T284',
 'T286',
 'T288',
 'T290',
 'T292',
 'T294']

In [41]:
samples[0]

Unnamed: 0,T001,T002,T003,T004,T005,T006,T007,T008,T009,T010,...,T021,T022,T023,T024,T025,T026,T027,T028,T029,T030
F0,0.556159,0.373207,0.877459,0.665455,0.177736,0.021531,0.442589,0.227059,0.42207,0.923863,...,0.788171,0.689188,0.658864,0.717001,0.803317,0.676335,0.731325,0.32172,0.260146,0.75691
F1,0.083131,0.786935,0.196082,0.747721,0.5171,0.428441,0.53168,0.889945,0.090909,0.560451,...,0.11084,0.056375,0.968797,0.875142,0.217317,0.280666,0.026681,0.131964,0.478317,0.134705
F2,0.672641,0.939384,0.333104,0.236288,0.727627,0.940672,0.222917,0.565081,0.286562,0.776221,...,0.664394,0.200664,0.354214,0.919681,0.923825,0.790384,0.422235,0.525505,0.378268,0.936806
F3,0.99978,0.689166,0.172858,0.049088,0.034513,0.946713,0.970514,0.127533,0.92826,0.150337,...,0.632025,0.102792,0.855916,0.542882,0.32742,0.170417,0.339555,0.030155,0.33759,0.687088
F4,0.539143,0.393539,0.604186,0.569365,0.075279,0.934851,0.623224,0.239556,0.674519,0.164995,...,0.169398,0.009918,0.020743,0.299153,0.151802,0.905619,0.492402,0.778167,0.035317,0.154821


In [42]:
samples[-1]

Unnamed: 0,T265,T266,T267,T268,T269,T270,T271,T272,T273,T274,...,T285,T286,T287,T288,T289,T290,T291,T292,T293,T294
F0,0.410957,0.765139,0.975005,0.70727,0.054768,0.073876,0.170793,0.068277,0.821444,0.039953,...,0.407677,0.307475,0.110347,0.11818,0.523271,0.049667,0.602843,0.580337,0.936373,0.240154
F1,0.472683,0.450313,0.715101,0.61283,0.175354,0.035162,0.937566,0.603728,0.320928,0.580583,...,0.324284,0.49347,0.185797,0.357509,0.124449,0.815135,0.401051,0.221881,0.780806,0.231722
F2,0.278356,0.352683,0.315111,0.134359,0.313781,0.238001,0.073813,0.196808,0.953729,0.438698,...,0.531658,0.187561,0.745921,0.912052,0.960048,0.425886,0.780106,0.985521,0.908501,0.165153
F3,0.439742,0.320578,0.71996,0.220081,0.68668,0.67816,0.854477,0.466518,0.064277,0.287079,...,0.246215,0.187429,0.354687,0.437826,0.056581,0.818316,0.933305,0.531704,0.499935,0.492364
F4,0.846354,0.68883,0.001974,0.543298,0.309806,0.602124,0.266064,0.750834,0.388592,0.449374,...,0.593193,0.306052,0.534498,0.618617,0.542047,0.130948,0.323797,0.52814,0.800513,0.046259


In [43]:
targets[0]

-0.5939463902823945

In [44]:
targets[-1]

7.170149687489093

In [107]:
train_data = np.array(samples[:100], dtype='float32')
test_data = np.array(samples[100:], dtype='float32')
train_targets = np.array(targets[:100], dtype='float32')
test_targets = np.array(targets[100:], dtype='float32')

## Feature Extraction Functions

In [52]:
def ts_corr(X, Y, d=2, s=1):
    res = []
    i = len(X) - 1
    
    while i - d + 1 >= 0:
        Xsub = X[i-d+1:i+1]
        Ysub = Y[i-d+1:i+1]
        corr = np.corrcoef(Xsub, Ysub)[0,1]
        res.append(corr)
        
        i -= s

    return np.array(res, dtype='float32')

In [53]:
def ts_cov(X, Y, d=2, s=1):
    res = []
    i = len(X) - 1
    
    while i - d + 1 >= 0:
        Xsub = X[i-d+1:i+1]
        Ysub = Y[i-d+1:i+1]
        cov = np.cov(Xsub, Ysub)[0,1]
        res.append(cov)
        
        i -= s
        
    return np.array(res, dtype='float32')

In [109]:
def ts_stdev(X, d=2, s=1):
    res = []
    i = len(X) - 1
    
    while i - d + 1 >= 0:
        Xsub = X[i-d+1:i+1]
        stdev = np.std(Xsub)
        res.append(stdev)
        
        i -= s
        
    return np.array(res, dtype='float32')

In [61]:
def ts_zscore(X, d=2, s=1):
    res = []
    i = len(X) - 1
    
    while i - d + 1 >= 0:
        Xsub = X[i-d+1:i+1]
        zscore = np.mean(Xsub) / np.std(Xsub)
        res.append(zscore)
        
        i -= s
        
    return np.array(res, dtype='float32')

In [65]:
def ts_return(X, d=2, s=1):
    res = []
    i = len(X) - 1
    
    while i - d + 1 >= 0:
        Xsub = X[i-d+1:i+1]
        ret = (Xsub[-1] - Xsub[0]) / Xsub[0]
        res.append(ret)
        
        i -= s
        
    return np.array(res, dtype='float32')

In [69]:
def ts_decaylinear(X, d=2, s=1):
    res = []
    i = len(X) - 1
    
    while i - d + 1 >= 0:
        Xsub = X[i-d+1:i+1]
        Wsub = np.array([i for i in range(1, d+1)])
        weighted_sum = np.sum(Xsub * Wsub / np.sum(Wsub))
        res.append(weighted_sum)
        
        i -= s
        
    return np.array(res, dtype='float32')

In [54]:
a = np.array([1, 2, 3, 4, 5])
b = np.array([6, 7, 8, 9, 10])

In [55]:
ts_corr(a, b, d=3, s=3)

array([1.], dtype=float32)

In [56]:
ts_cov(a, b, d=3, s=3)

array([1.], dtype=float32)

In [60]:
ts_stdev(a, d=3, s=3)

array([0.8164966], dtype=float32)

In [62]:
ts_zscore(a, d=3, s=3)

array([4.8989797], dtype=float32)

In [66]:
ts_return(a, d=3, s=3)

array([0.6666667], dtype=float32)

In [70]:
ts_decaylinear(a, d=3, s=3)

array([4.3333335], dtype=float32)

## Pooling Functions

In [72]:
def ts_min(X, d=2, s=1):
    res = []
    i = len(X) - 1
    
    while i - d + 1 >= 0:
        Xsub = X[i-d+1:i+1]
        vmin = np.min(Xsub)
        res.append(vmin)
        
        i -= s
        
    return np.array(res, dtype='float32')

In [73]:
def ts_max(X, d=2, s=1):
    res = []
    i = len(X) - 1
    
    while i - d + 1 >= 0:
        Xsub = X[i-d+1:i+1]
        vmax = np.max(Xsub)
        res.append(vmax)
        
        i -= s
        
    return np.array(res, dtype='float32')

In [74]:
def ts_sum(X, d=2, s=1):
    res = []
    i = len(X) - 1
    
    while i - d + 1 >= 0:
        Xsub = X[i-d+1:i+1]
        vsum = np.sum(Xsub)
        res.append(vsum)
        
        i -= s
        
    return np.array(res, dtype='float32')

## Function to Generate Synthetic Features

In [75]:
def generate_syn_feats(feats, fn_names):
    syn_feats = []
    
    for fn in fn_names:
        if fn in ['ts_corr', 'ts_cov']:
            for r1, r2 in combinations(range(len(feats)), 2):
                f1 = feats[r1]
                f2 = feats[r2]
                syn_feats.append(f"N({fn}({f1},{f2}))")
        else:
            for feat in feats:
                syn_feats.append(f"N({fn}({feat}))")
    
    return syn_feats

In [78]:
syn_feats = generate_syn_feats(
    features,
    ["ts_corr", "ts_cov", "ts_stdev", "ts_zscore", "ts_return", "ts_decaylinear"]
)

In [79]:
syn_feats

['N(ts_corr(F0,F1))',
 'N(ts_corr(F0,F2))',
 'N(ts_corr(F0,F3))',
 'N(ts_corr(F0,F4))',
 'N(ts_corr(F1,F2))',
 'N(ts_corr(F1,F3))',
 'N(ts_corr(F1,F4))',
 'N(ts_corr(F2,F3))',
 'N(ts_corr(F2,F4))',
 'N(ts_corr(F3,F4))',
 'N(ts_cov(F0,F1))',
 'N(ts_cov(F0,F2))',
 'N(ts_cov(F0,F3))',
 'N(ts_cov(F0,F4))',
 'N(ts_cov(F1,F2))',
 'N(ts_cov(F1,F3))',
 'N(ts_cov(F1,F4))',
 'N(ts_cov(F2,F3))',
 'N(ts_cov(F2,F4))',
 'N(ts_cov(F3,F4))',
 'N(ts_stdev(F0))',
 'N(ts_stdev(F1))',
 'N(ts_stdev(F2))',
 'N(ts_stdev(F3))',
 'N(ts_stdev(F4))',
 'N(ts_zscore(F0))',
 'N(ts_zscore(F1))',
 'N(ts_zscore(F2))',
 'N(ts_zscore(F3))',
 'N(ts_zscore(F4))',
 'N(ts_return(F0))',
 'N(ts_return(F1))',
 'N(ts_return(F2))',
 'N(ts_return(F3))',
 'N(ts_return(F4))',
 'N(ts_decaylinear(F0))',
 'N(ts_decaylinear(F1))',
 'N(ts_decaylinear(F2))',
 'N(ts_decaylinear(F3))',
 'N(ts_decaylinear(F4))']

In [80]:
len(syn_feats)

40

## Function to Generate Synthetic Samples

In [126]:
def generate_syn_samples(train_data, test_data, width=2, strides=1):
    feomap = { 'train': [], 'test': [] }
    fefns = (ts_corr, ts_cov, ts_stdev, ts_zscore, ts_return, ts_decaylinear)
    nfeats = train_data[0].shape[0]
    
    for sample in train_data:
        feoutput = []
        
        for fefn in fefns:
            if fefn in [ts_corr, ts_cov]:
                for r1, r2 in combinations(range(nfeats), 2):
                    res = fefn(sample[r1], sample[r2], d=width, s=strides)
                    feoutput.append(res)
            else:
                for row in sample:
                    res = fefn(row, d=width, s=strides)
                    feoutput.append(res)
                    
        feomap['train'].append(feoutput)
        
    for sample in test_data:
        feoutput = []
        
        for fefn in fefns:
            if fefn in [ts_corr, ts_cov]:
                for r1, r2 in combinations(range(nfeats), 2):
                    res = fefn(sample[r1], sample[r2], d=width, s=strides)
                    feoutput.append(res)
            else:
                for row in sample:
                    res = fefn(row, d=width, s=strides)
                    feoutput.append(res)
                    
        feomap['test'].append(feoutput)
    
    feomap['train'] = np.array(feomap['train'], dtype='float32')
    feomap['test'] = np.array(feomap['test'], dtype='float32')
    
    # Normalize
    nrows, ncols = feomap['train'][0].shape
    
    for i in range(nrows):
        for j in range(ncols):
            arr = []
            
            for sample in feomap['train']:
                arr.append(sample[i,j])
                
            arr_mean = np.mean(arr)
            arr_stdev = np.std(arr)
            
            for sample in feomap['train']:
                sample[i,j] = (sample[i,j] - arr_mean) / arr_stdev
                
            for sample in feomap['test']:
                sample[i,j] = (sample[i,j] - arr_mean) / arr_stdev

    return feomap

In [127]:
syn_samples_dict = generate_syn_samples(train_data, test_data, width=3, strides=3)

In [108]:
train_data[0]

array([[0.556159  , 0.37320736, 0.8774587 , 0.6654548 , 0.17773609,
        0.02153125, 0.44258922, 0.22705936, 0.42207032, 0.92386305,
        0.61660135, 0.4093221 , 0.5757732 , 0.01922725, 0.5751682 ,
        0.843652  , 0.37175006, 0.32805958, 0.48219687, 0.55743736,
        0.7881714 , 0.6891882 , 0.6588642 , 0.71700144, 0.8033174 ,
        0.67633486, 0.7313246 , 0.32172012, 0.26014566, 0.7569098 ],
       [0.08313102, 0.78693485, 0.19608183, 0.74772096, 0.51710016,
        0.42844117, 0.53168035, 0.88994473, 0.09090896, 0.560451  ,
        0.06285619, 0.23122948, 0.20826732, 0.9496844 , 0.7624021 ,
        0.10427587, 0.77511305, 0.89397293, 0.07615993, 0.44785556,
        0.11084031, 0.05637541, 0.9687968 , 0.87514174, 0.2173173 ,
        0.2806659 , 0.02668067, 0.131964  , 0.47831714, 0.13470456],
       [0.6726412 , 0.93938375, 0.33310372, 0.23628776, 0.72762746,
        0.9406716 , 0.22291717, 0.5650814 , 0.28656173, 0.77622104,
        0.12763661, 0.38755268, 0.9095915 , 0.