In [6]:
# The date of the adj file is actually the date of the input data, which represents the first-order network of the next trading day
import argparse
import seaborn as sns
import matplotlib.pyplot as plt

In [7]:
parser = argparse.ArgumentParser(description='Stock Network')


parser.add_argument('--prestart_time', type=str, default='2000-01-01')
parser.add_argument('--start_time', type=str, default='2004-10-31')
parser.add_argument('--end_time', type=str, default='2020-01-01')
parser.add_argument('--lagend_time', type=str, default='2020-10-31')
parser.add_argument('--save_path', type=str, default='./output')
parser.add_argument('--lr', type=float, default=0.0001)
parser.add_argument('--weight_decay', type=float, default=5e-4)
parser.add_argument('--epochs', type=int, default=400)
parser.add_argument('--device', type=str, default='cuda:1')
parser.add_argument('--window_size', type=int, default=5)

args = parser.parse_args(args=[
    '--save_path', './adj_rolling',
    '--prestart_time', '2014-06-01',
    '--start_time', '2015-01-01',
    '--end_time', '2020-01-01',
    '--lagend_time', '2020-10-30',
    ])

In [8]:
from interv_utlis import *
from eval_utlis import *

In [9]:
company_pool = []
com_path = './company_pool.txt'
with open(com_path, 'r', encoding='utf-8') as f:
    for line in f:
        line = line.replace(',', '')
        company_pool.append(line.strip())
company_pool.sort()
company_pool.append('^gspc')

In [10]:
company_sel, df = get_uplift_data(company_1=company_pool, args=args)
pred_timestamps = list(D.calendar(start_time='2017-01-01', end_time='2020-01-01'))

In [1]:
corr_path = args.save_path + '/' + 'corr_lim'
if not os.path.exists(corr_path):
    os.makedirs(corr_path)

ts_bgein = datetime.datetime.strptime('2017-01-01', '%Y-%m-%d')

for pred_day in tqdm(pred_timestamps):
    if pred_day < ts_bgein:
        continue
    train_start = (pred_day - relativedelta(years=2)).strftime('%Y-%m-%d')
    train_end = (pred_day - relativedelta(days=1)).strftime('%Y-%m-%d')
    events = {'train':(train_start, train_end),}

    ## correlation
    partial_corr = get_corr(events=events, price_df=df, company_sel=company_sel, method=get_interval)

    dir_adj = pd.DataFrame(partial_corr, index=company_sel, columns=company_sel)
    dir_adj = dir_adj.fillna(value=0)

    lim_pos = 0.4
    lim_neg = -0.2
    dir_adj_np = gen_adj(dir_adj.copy(), lim_pos=lim_pos, lim_neg=lim_neg)

    dir_adj_np = np.delete(dir_adj_np, -1, 0)
    dir_adj_np = np.delete(dir_adj_np, -1, 1)

    np.save(os.path.join(corr_path, pred_day.strftime('%Y-%m-%d')+'.npy'), dir_adj_np)

In [12]:
company_sel.remove('^gspc')

## high volume with high positeve return

In [2]:
output_path = args.save_path + '/' + 'hrhv_pos'
if not os.path.exists(output_path):
    os.makedirs(output_path)

ts_bgein = datetime.datetime.strptime('2017-01-01', '%Y-%m-%d')

for pred_day in tqdm(pred_timestamps):
    if pred_day < ts_bgein:
        continue
    train_start = (pred_day - relativedelta(years=2)).strftime('%Y-%m-%d')
    train_end = (pred_day - relativedelta(days=1)).strftime('%Y-%m-%d')
    events = {'train':(train_start, train_end),}

    ## correlation
    dir_adj_np = np.load(args.save_path + '/corr_lim/' + pred_day.strftime('%Y-%m-%d') + '.npy')

    ## causal
    adj_matrix = pd.DataFrame(dir_adj_np, index=company_sel, columns=company_sel,dtype=bool)
    inter = events['train']
    day_start = get_interval(inter)[0]
    day_end = get_interval(inter)[1]
    causal_martix = np.zeros_like(adj_matrix, dtype=np.float64)
    for comp in company_sel: # target node
        i = company_sel.index(comp)
        related_comp = adj_matrix.loc[comp][adj_matrix.loc[comp] == True].index.to_list()
        if len(related_comp)==0:
            continue
        df_comp = df.loc[day_start:day_end].loc[(slice(None), related_comp+[comp]), 'feature'].loc[:, 'return']
        df_comp = pd.pivot(df_comp.reset_index(),index='datetime', columns='instrument')
        df_vol = df.loc[day_start:day_end].loc[(slice(None), related_comp+[comp]), 'feature'].loc[:, '$volume']
        df_vol = pd.pivot(df_vol.reset_index(), index='datetime', columns='instrument')
        Y = pd.Series(df_comp.loc[:, 'return'][comp].shift(-1), name='Y')
        for T_comp in related_comp:
            j = company_sel.index(T_comp)
            ## volume
            T_vol = pd.Series(df_vol.loc[:, '$volume'][T_comp], name='T').copy(deep=True)
            lim_vol = T_vol.mean() + 1 * T_vol.std()
            cond_pos = T_vol>=lim_vol
            T_vol.where(cond_pos, other=0, inplace=True)
            T_vol[T_vol>0.01] = 1
            T_vol = T_vol.astype('int')
            ## return
            T_high = pd.Series(df_comp.loc[:, 'return'][T_comp], name='T').copy(deep=True)
            lim_low = 0.04
            cond_2 = T_high>=lim_low
            T_high.where(cond_2, other=0, inplace=True)
            T_high[T_high>0.0001] = 1
            T_high = T_high.astype('int')

            T_0 = T_vol & T_high
            if T_0.sum()<5:
                continue

            df_comp2 = pd.concat([df_comp.loc[:, 'return'], T_0, Y], axis=1)

            Y_uplift = df_comp2['Y'].values[:-1]  # outcome of interest
            T_uplift = df_comp2['T'].values[:-1]   # intervention, or treatment
            X_uplift = df_comp2.drop(columns=['Y', 'T']).values[:-1]   # confounders
            xg = XGBTRegressor(random_state=212)
            try:
                te, lb, ub = xg.estimate_ate(X_uplift, T_uplift, Y_uplift)
                X_comp = related_comp
                pred_day_X = df.loc[pred_day].loc[X_comp, 'feature'].loc[:, 'return'].values.reshape(1, -1)
                effect = xg.predict(pred_day_X)
                causal_effect = effect[0, 0]
            except:
                causal_effect = 0
            causal_martix[i, j] = causal_effect
    np.save(os.path.join(output_path, pred_day.strftime('%Y-%m-%d') + '.npy'), causal_martix)


## high volume with high negative return

In [3]:
output_path = args.save_path + '/' + 'hrhv_neg'
if not os.path.exists(output_path):
    os.makedirs(output_path)

ts_bgein = datetime.datetime.strptime('2017-01-01', '%Y-%m-%d')

for pred_day in tqdm(pred_timestamps):
    if pred_day < ts_bgein:
        continue
    train_start = (pred_day - relativedelta(years=2)).strftime('%Y-%m-%d')
    train_end = (pred_day - relativedelta(days=1)).strftime('%Y-%m-%d')
    events = {'train':(train_start, train_end),}

    ## correlation
    dir_adj_np = np.load(args.save_path + '/corr_lim/' + pred_day.strftime('%Y-%m-%d') + '.npy')

    ## causal
    adj_matrix = pd.DataFrame(dir_adj_np, index=company_sel, columns=company_sel,dtype=bool)
    inter = events['train']
    day_start = get_interval(inter)[0]
    day_end = get_interval(inter)[1]
    causal_martix = np.zeros_like(adj_matrix, dtype=np.float64)
    for comp in company_sel: # target node
        i = company_sel.index(comp)
        related_comp = adj_matrix.loc[comp][adj_matrix.loc[comp] == True].index.to_list()
        if len(related_comp)==0:
            continue
        df_comp = df.loc[day_start:day_end].loc[(slice(None), related_comp+[comp]), 'feature'].loc[:, 'return']
        df_comp = pd.pivot(df_comp.reset_index(),index='datetime', columns='instrument')
        df_vol = df.loc[day_start:day_end].loc[(slice(None), related_comp+[comp]), 'feature'].loc[:, '$volume']
        df_vol = pd.pivot(df_vol.reset_index(), index='datetime', columns='instrument')
        Y = pd.Series(df_comp.loc[:, 'return'][comp].shift(-1), name='Y')
        for T_comp in related_comp:
            j = company_sel.index(T_comp)
            ## volume
            T_vol = pd.Series(df_vol.loc[:, '$volume'][T_comp], name='T').copy(deep=True)
            lim_vol = T_vol.mean() + 1 * T_vol.std()
            cond_pos = T_vol>=lim_vol
            T_vol.where(cond_pos, other=0, inplace=True)
            T_vol[T_vol>0.01] = 1
            T_vol = T_vol.astype('int')
            ## return
            T_high = pd.Series(df_comp.loc[:, 'return'][T_comp], name='T').copy(deep=True)
            lim_high = -0.04
            cond_1 = T_high<=lim_high
            T_high.where(cond_1, other=0, inplace=True)
            T_high[T_high<-0.0001] = 1
            T_high = T_high.astype('int')

            T_0 = T_vol & T_high
            if T_0.sum()<5:
                continue

            df_comp2 = pd.concat([df_comp.loc[:, 'return'], T_0, Y], axis=1)

            Y_uplift = df_comp2['Y'].values[:-1]  # outcome of interest
            T_uplift = df_comp2['T'].values[:-1]   # intervention, or treatment
            X_uplift = df_comp2.drop(columns=['Y', 'T']).values[:-1]   # confounders
            xg = XGBTRegressor(random_state=212)
            try:
                te, lb, ub = xg.estimate_ate(X_uplift, T_uplift, Y_uplift)
                X_comp = related_comp
                pred_day_X = df.loc[pred_day].loc[X_comp, 'feature'].loc[:, 'return'].values.reshape(1, -1)
                effect = xg.predict(pred_day_X)
                causal_effect = effect[0, 0]
            except:
                causal_effect = 0
            causal_martix[i, j] = causal_effect
    np.save(os.path.join(output_path, pred_day.strftime('%Y-%m-%d') + '.npy'), causal_martix)


## continuing rise of return

In [12]:
output_path = args.save_path + '/' + 'conup'
if not os.path.exists(output_path):
    os.makedirs(output_path)

ts_bgein = datetime.datetime.strptime('2017-02-10', '%Y-%m-%d')

for pred_day in tqdm(pred_timestamps):
    if pred_day < ts_bgein:
        continue
    train_start = (pred_day - relativedelta(years=2)).strftime('%Y-%m-%d')
    train_end = (pred_day - relativedelta(days=1)).strftime('%Y-%m-%d')
    events = {'train':(train_start, train_end),}

    ## correlation
    dir_adj_np = np.load(args.save_path + '/corr_lim/' + pred_day.strftime('%Y-%m-%d') + '.npy')

    ## causal
    adj_matrix = pd.DataFrame(dir_adj_np, index=company_sel, columns=company_sel,dtype=bool)
    inter = events['train']
    day_start = get_interval(inter)[0]
    day_end = get_interval(inter)[1]
    causal_martix = np.zeros_like(adj_matrix, dtype=np.float64)
    for comp in company_sel: # target node
        i = company_sel.index(comp)
        related_comp = adj_matrix.loc[comp][adj_matrix.loc[comp] == True].index.to_list()
        if len(related_comp)==0:
            continue
        df_comp = df.loc[day_start:day_end].loc[(slice(None), related_comp+[comp]), 'feature'].loc[:, 'return']
        df_comp = pd.pivot(df_comp.reset_index(),index='datetime', columns='instrument')
        Y = pd.Series(df_comp.loc[:, 'return'][comp].shift(-1), name='Y')
        for T_comp in related_comp:
            j = company_sel.index(T_comp)
            ## conup
            T_t = pd.Series(df_comp.loc[:, 'return'][T_comp], name='T').copy(deep=True)
            lim = 0
            cond_pos = T_t>=lim
            T_t.where(cond_pos, other=0, inplace=True)
            T_t[T_t>0] = 1
            T_t = T_t.astype('int')
            T_tsub1 = pd.Series(T_t.shift(1), name='Ttsub1')
            T_tsub2 = pd.Series(T_t.shift(2), name='Ttsub2')
            T_raw = pd.concat([T_t, T_tsub1, T_tsub2], axis=1).fillna(value=0)
            T_raw['conup'] = T_raw.apply(lambda x: sum(x), axis=1)
            T_conup = T_raw['conup'].apply(lambda x: 1 if x==3 else 0)

            T_0 = pd.Series(T_conup, name='T')
            if T_0.sum()<5:
                continue

            df_comp2 = pd.concat([df_comp.loc[:, 'return'], T_0, Y], axis=1)

            Y_uplift = df_comp2['Y'].values[:-1]  # outcome of interest
            T_uplift = df_comp2['T'].values[:-1]   # intervention, or treatment
            X_uplift = df_comp2.drop(columns=['Y', 'T']).values[:-1]   # confounders
            xg = XGBTRegressor(random_state=212)
            try:
                te, lb, ub = xg.estimate_ate(X_uplift, T_uplift, Y_uplift)
                X_comp = related_comp
                pred_day_X = df.loc[pred_day].loc[X_comp, 'feature'].loc[:, 'return'].values.reshape(1, -1)
                effect = xg.predict(pred_day_X)
                causal_effect = effect[0, 0]

            except:
                causal_effect = 0

            causal_martix[i, j] = causal_effect
    np.save(os.path.join(output_path, pred_day.strftime('%Y-%m-%d') + '.npy'), causal_martix)


## continuing drop of return

In [4]:
output_path = args.save_path + '/' + 'condown'
if not os.path.exists(output_path):
    os.makedirs(output_path)

ts_bgein = datetime.datetime.strptime('2017-01-01', '%Y-%m-%d')

for pred_day in tqdm(pred_timestamps):
    if pred_day < ts_bgein:
        continue
    train_start = (pred_day - relativedelta(years=2)).strftime('%Y-%m-%d')
    train_end = (pred_day - relativedelta(days=1)).strftime('%Y-%m-%d')
    events = {'train':(train_start, train_end),}

    ## correlation
    dir_adj_np = np.load(args.save_path + '/corr_lim/' + pred_day.strftime('%Y-%m-%d') + '.npy')

    ## causal
    adj_matrix = pd.DataFrame(dir_adj_np, index=company_sel, columns=company_sel,dtype=bool)
    inter = events['train']
    day_start = get_interval(inter)[0]
    day_end = get_interval(inter)[1]
    causal_martix = np.zeros_like(adj_matrix, dtype=np.float64)
    for comp in company_sel: # target node
        i = company_sel.index(comp)
        related_comp = adj_matrix.loc[comp][adj_matrix.loc[comp] == True].index.to_list()
        if len(related_comp)==0:
            continue
        df_comp = df.loc[day_start:day_end].loc[(slice(None), related_comp+[comp]), 'feature'].loc[:, 'return']
        df_comp = pd.pivot(df_comp.reset_index(),index='datetime', columns='instrument')
        Y = pd.Series(df_comp.loc[:, 'return'][comp].shift(-1), name='Y')
        for T_comp in related_comp:
            j = company_sel.index(T_comp)
            ## condown
            T_t = pd.Series(df_comp.loc[:, 'return'][T_comp], name='T').copy(deep=True)
            lim = 0
            cond_pos = T_t<lim
            T_t.where(cond_pos, other=0, inplace=True)
            T_t[T_t<0] = 1
            T_t = T_t.astype('int')
            T_tsub1 = pd.Series(T_t.shift(1), name='Ttsub1')
            T_tsub2 = pd.Series(T_t.shift(2), name='Ttsub2')
            T_raw = pd.concat([T_t, T_tsub1, T_tsub2], axis=1).fillna(value=0)
            T_raw['conup'] = T_raw.apply(lambda x: sum(x), axis=1)
            T_conup = T_raw['conup'].apply(lambda x: 1 if x==3 else 0)

            T_0 = pd.Series(T_conup, name='T')
            if T_0.sum()<5:
                continue

            df_comp2 = pd.concat([df_comp.loc[:, 'return'], T_0, Y], axis=1)

            Y_uplift = df_comp2['Y'].values[:-1]  # outcome of interest
            T_uplift = df_comp2['T'].values[:-1]   # intervention, or treatment
            X_uplift = df_comp2.drop(columns=['Y', 'T']).values[:-1]   # confounders
            xg = XGBTRegressor(random_state=212)
            try:
                te, lb, ub = xg.estimate_ate(X_uplift, T_uplift, Y_uplift)
                X_comp = related_comp
                pred_day_X = df.loc[pred_day].loc[X_comp, 'feature'].loc[:, 'return'].values.reshape(1, -1)
                effect = xg.predict(pred_day_X)
                causal_effect = effect[0, 0]
            except:
                causal_effect = 0
            causal_martix[i, j] = causal_effect
    np.save(os.path.join(output_path, pred_day.strftime('%Y-%m-%d') + '.npy'), causal_martix)
