## 1, Import Library

In [1]:
from machine_lib import * 

## 2, 登录
<div style="margin-left: 20px;">
1, 在machine_lib文件的login方法中填写用户名和密码后保存，然后来到本文件Restart Kernal后重新import machine_lib后才在本文件生效
</div>

<div style="margin-left: 20px;">
2, 打印INVALID_CREDENTAIL即登录失败，打印自己的user_id信息才是登录成功。
</div>

In [2]:
s = login()

b'{"user":{"id":"CH83680"},"token":{"expiry":14400.0},"permissions":["BEFORE_AND_AFTER_PERFORMANCE_V2","BRAIN_LABS","BRAIN_LABS_JUPYTER_LAB","CONSULTANT","MULTI_SIMULATION","PROD_ALPHAS","REFERRAL","VISUALIZATION","WORKDAY"]}'


## 3, 获取数据字段
<div style="margin-left: 20px;">
在官网Data页面中显示的为自己目前有权限的数据集，在数据集Description面板下可以看到dataset_id
</div>

In [None]:
df = get_datafields(s, dataset_id = 'analyst4', region='USA', universe='TOP3000', delay=1)
df

## 4，数据字段预处理

<div style="margin-left: 20px;">
1, matrix, vector 数据类型
</div>

<div style="margin-left: 20px;">
2, ts_backfill 回填缺失值，提高数据Coverage 
</div>

<div style="margin-left: 20px;">
2, winsorize 去极值
</div>

In [None]:
pc_fields = process_datafields(df)
len(pc_fields)

## 5, Alpha factory 
<div style="margin-left: 20px;">
在factory方法中将数据字段与操作符组装成alpha表达式
</div>

In [None]:
first_order = first_order_factory(pc_fields, ts_ops)
print(first_order[:10])
print(len(first_order))

## 6, 回测前载入

<div style="margin-left: 20px;">
1, alpha表达式与初始decay配对
</div>

<div style="margin-left: 20px;">
2, random shuffle 
</div>

<div style="margin-left: 20px;">
2, Load task pool数据结构
</div>

In [None]:
# 赋予alpha表达式一个初始decay
init_decay = 6
fo_alpha_list = []

for alpha in first_order:
    fo_alpha_list.append((alpha, init_decay))

# 随机采样快速评估一个数据集的潜力
random.shuffle(fo_alpha_list)

print("数量: %s"%len(fo_alpha_list))
print(fo_alpha_list[:5])

In [None]:
# Load alphas to task pools
fo_pools = load_task_pool(fo_alpha_list, 2, 2)
print(fo_pools[0])
print(len(fo_pools))

## 7, 回测

In [None]:
# Simulate First Order
multi_simulate(fo_pools, "SUBINDUSTRY", "USA", "TOP3000", 0)

## 8, 筛选Alpha


<div style="margin-left: 20px;">
1, get_alpha：截取有潜力提升表现至可以提交的alpha进入下一阶
</div>

<div style="margin-left: 20px;">
2, 剪枝Prune：精减相似alpha，提高回测资源利用率
</div>

In [4]:
## get promising alphas to improve in the next order
fo_tracker = get_alphas("08-01", "08-02", 1.2, 0.7, "USA", 100, "track")
print(len(fo_tracker))

b'{"user":{"id":"CH83680"},"token":{"expiry":14400.0},"permissions":["BEFORE_AND_AFTER_PERFORMANCE_V2","BRAIN_LABS","BRAIN_LABS_JUPYTER_LAB","CONSULTANT","MULTI_SIMULATION","PROD_ALPHAS","REFERRAL","VISUALIZATION","WORKDAY"]}'
0
['92kVvQK', "group_rank(group_rank(ts_av_diff(sales_growth, 200), sector),densify(bucket(rank(cap), range='0.1, 1, 0.1')))", 1.32, 0.0467, 0.73, 0.001651, '2025-08-01T08:32:38-04:00', 6]
['J21Ypwn', '-ts_zscore(winsorize(ts_backfill(vec_sum(anl4_mark), 120), std=4), 120)', -1.77, 0.1177, -1.07, -0.000776, '2025-08-01T01:55:14-04:00', 6]
count: 2
2


#### Prune 剪枝

In [5]:
fo_layer = prune(fo_tracker, 'anl4', 5)

# 剪枝后数量
print(len(fo_layer))

2


## 9, 二阶提升
### ts_ops(field, days) -> group_ops(ts_ops(field, days), group)

In [6]:
so_alpha_list = []
group_ops = ["group_neutralize", "group_rank", "group_zscore"]

for expr, decay in fo_layer:
    for alpha in get_group_second_order_factory([expr], group_ops, "USA"):
        so_alpha_list.append((alpha,decay))

random.shuffle(so_alpha_list)
print(len(so_alpha_list))
print(so_alpha_list[:3])

138
[("group_neutralize(group_rank(group_rank(ts_av_diff(sales_growth, 200), sector),densify(bucket(rank(cap), range='0.1, 1, 0.1'))),densify(subindustry))", 6), ("group_rank(group_rank(group_rank(ts_av_diff(sales_growth, 200), sector),densify(bucket(rank(cap), range='0.1, 1, 0.1'))),densify(bucket(rank(cap), range='0.1, 1, 0.1')))", 6), ("group_rank(group_rank(group_rank(ts_av_diff(sales_growth, 200), sector),densify(bucket(rank(cap), range='0.1, 1, 0.1'))),densify(subindustry))", 6)]


### Simulate second order

In [7]:
so_pools = load_task_pool(so_alpha_list, 5, 5)
multi_simulate(so_pools, 'SUBINDUSTRY', 'USA', 'TOP3000', 0)

b'{"user":{"id":"CH83680"},"token":{"expiry":14400.0},"permissions":["BEFORE_AND_AFTER_PERFORMANCE_V2","BRAIN_LABS","BRAIN_LABS_JUPYTER_LAB","CONSULTANT","MULTI_SIMULATION","PROD_ALPHAS","REFERRAL","VISUALIZATION","WORKDAY"]}'
pool 0 task 4 post done
pool 0 task 4 simulate done
pool 1 task 4 post done
pool 1 task 4 simulate done
pool 2 task 4 post done
pool 2 task 4 simulate done
pool 3 task 4 post done
pool 3 task 4 simulate done
pool 4 task 4 post done
pool 4 task 4 simulate done
pool 5 task 2 post done
pool 5 task 2 simulate done
Simulate done


## 10，三阶提升
group_ops(ts_ops(field, days), group) -> trade_when(entre_event, group_ops(ts_ops(field, days), group), exit_event)

In [3]:
## get promising alphas from second order to improve in the third order
so_tracker = get_alphas("08-01", "08-02", 1.3, 0.8, "USA", 200, "track")

so_layer = prune(so_tracker, 'anl4', 5)
th_alpha_list = []

for expr, decay in so_layer:
    for alpha in trade_when_factory("trade_when",expr,"USA"):
        th_alpha_list.append((alpha,decay))

random.shuffle(th_alpha_list)        
print("三阶表达式数量:%s"%len(th_alpha_list))

b'{"user":{"id":"CH83680"},"token":{"expiry":14400.0},"permissions":["BEFORE_AND_AFTER_PERFORMANCE_V2","BRAIN_LABS","BRAIN_LABS_JUPYTER_LAB","CONSULTANT","MULTI_SIMULATION","PROD_ALPHAS","REFERRAL","VISUALIZATION","WORKDAY"]}'
0
['J22q6Yj', "group_zscore(-ts_zscore(winsorize(ts_backfill(vec_sum(anl4_mark), 120), std=4), 120),densify(bucket(rank(ts_std_dev(returns,20)),range = '0.1, 1, 0.1')))", 1.87, 0.1207, 1.11, 0.000725, '2025-08-01T11:13:25-04:00', 6]
['L22jbb2', "group_neutralize(-ts_zscore(winsorize(ts_backfill(vec_sum(anl4_mark), 120), std=4), 120),densify(bucket(rank(ts_std_dev(returns,20)),range = '0.1, 1, 0.1')))", 1.86, 0.1216, 1.12, 0.000743, '2025-08-01T11:15:17-04:00', 6]
['Z115k93', "group_zscore(-ts_zscore(winsorize(ts_backfill(vec_sum(anl4_mark), 120), std=4), 120),densify(bucket(group_rank(cap, sector),range='0.1, 1, 0.1')))", 1.81, 0.1184, 1.07, 0.000738, '2025-08-01T11:15:13-04:00', 6]
['722AR7x', "group_neutralize(-ts_zscore(winsorize(ts_backfill(vec_sum(anl4_mark)

### Simulate Third Order

In [None]:
# Simulate third order
th_pools = load_task_pool(th_alpha_list, 2, 2)
multi_simulate(th_pools, 'SUBINDUSTRY', 'USA', 'TOP3000', 0)

b'{"user":{"id":"CH83680"},"token":{"expiry":14400.0},"permissions":["BEFORE_AND_AFTER_PERFORMANCE_V2","BRAIN_LABS","BRAIN_LABS_JUPYTER_LAB","CONSULTANT","MULTI_SIMULATION","PROD_ALPHAS","REFERRAL","VISUALIZATION","WORKDAY"]}'
location key error: b'{"errors":["Multi-simulations require multiple simulations in request array. Single simulations are required to be submitted without the wrapping array."]}'


## 11, 获取可提交的Alpha

<div style="margin-left: 20px;">
1, 拉取sharpe,fitness达到提交要求的alpha
</div>

<div style="margin-left: 20px;">
2, Check Submission：检查其他Test是否达到要求
</div>

<div style="margin-left: 20px;">
2, view_alphas 对可以提交的alpha进行排序
</div>


In [6]:
# 1.58 sharpe, 1 fitness, "submit"参数
th_tracker = get_alphas("07-31", "08-01", 1.58, 1, "USA", 200, "submit")

b'{"user":{"id":"CH83680"},"token":{"expiry":14400.0},"permissions":["BEFORE_AND_AFTER_PERFORMANCE_V2","BRAIN_LABS","BRAIN_LABS_JUPYTER_LAB","CONSULTANT","MULTI_SIMULATION","PROD_ALPHAS","REFERRAL","VISUALIZATION","WORKDAY"]}'
0
100
count: 0


In [7]:
## 将get的alpha的id取出至stone_bag，用api check submission
stone_bag = []
for alpha in th_tracker:
    stone_bag.append(alpha[0])
print(len(stone_bag))
gold_bag = []
check_submission(stone_bag, gold_bag, 0)

0
b'{"user":{"id":"CH83680"},"token":{"expiry":14400.0},"permissions":["BEFORE_AND_AFTER_PERFORMANCE_V2","BRAIN_LABS","BRAIN_LABS_JUPYTER_LAB","CONSULTANT","MULTI_SIMULATION","PROD_ALPHAS","REFERRAL","VISUALIZATION","WORKDAY"]}'
[]


[]

In [8]:
# 打印可提交的alpha信息并按sharpe排序，在网页上找到alpha手动提交
view_alphas(gold_bag)

b'{"user":{"id":"CH83680"},"token":{"expiry":14400.0},"permissions":["BEFORE_AND_AFTER_PERFORMANCE_V2","BRAIN_LABS","BRAIN_LABS_JUPYTER_LAB","CONSULTANT","MULTI_SIMULATION","PROD_ALPHAS","REFERRAL","VISUALIZATION","WORKDAY"]}'


## 12, 微调可以提交的alpha

<div style="margin-left: 20px;">
1, 得到更好的表现
</div>
<div style="margin-left: 40px;">
调整中性化，操作符参数，Decay
</div>

<div style="margin-left: 20px;">
2, Alpha质量评估
</div>

<div style="margin-left: 40px;">
performance comparison，turnover，margin
</div>

<div style="margin-left: 20px;">
3, 鲁棒性评估，防止过拟合
</div>

<div style="margin-left: 40px;">
更改中性化，Rank，Binary Test...
</div>

### Appendix

In [9]:
# 模板构建Factory实例

def template_factory(sent_fields, option_fields):
    alpha_list = []
    for sent_field in sent_fields:
        for opt_field in option_fields:
            alpha_list.append("log(1+sigmoid(ts_zscore(%s,30))*sigmoid(ts_zscore(%s,30))"%(sent_field, opt_field))
    return alpha_list 

opt_df = get_datafields(s, dataset_id = 'option8', region='USA', universe='TOP3000', delay=1)
opt_fields = opt_df[opt_df['type'] == "MATRIX"]["id"].tolist()
print(opt_fields)

sent_df = get_datafields(s, dataset_id = 'sentiment1', region='USA', universe='TOP3000', delay=1)
sent_fields = sent_df[sent_df['type'] == "MATRIX"]["id"].tolist()
print(sent_fields)

alpha_list = template_factory(sent_fields, opt_fields)
print(alpha_list)

['historical_volatility_10', 'historical_volatility_120', 'historical_volatility_150', 'historical_volatility_180', 'historical_volatility_20', 'historical_volatility_30', 'historical_volatility_60', 'historical_volatility_90', 'implied_volatility_call_10', 'implied_volatility_call_1080', 'implied_volatility_call_120', 'implied_volatility_call_150', 'implied_volatility_call_180', 'implied_volatility_call_20', 'implied_volatility_call_270', 'implied_volatility_call_30', 'implied_volatility_call_360', 'implied_volatility_call_60', 'implied_volatility_call_720', 'implied_volatility_call_90', 'implied_volatility_mean_10', 'implied_volatility_mean_1080', 'implied_volatility_mean_120', 'implied_volatility_mean_150', 'implied_volatility_mean_180', 'implied_volatility_mean_20', 'implied_volatility_mean_270', 'implied_volatility_mean_30', 'implied_volatility_mean_360', 'implied_volatility_mean_60', 'implied_volatility_mean_720', 'implied_volatility_mean_90', 'implied_volatility_mean_skew_10', '