### Import library

In [1]:
import ace_lib as ace
import helpful_functions as hf
import pandas as pd
import requests
import plotly.express as px

### Start session
Enter credentials once - they will be saved to local folder and loaded each time

In [2]:
s = ace.start_session()

Complete biometrics authentication and press any key to continue: 
https://api.worldquantbrain.com/authentication/persona?inquiry=inq_RDgxw9L7CL7oeAdAZhgMX2BKG3c9



### Create list of alpha expressions
#### Step 1. Download datsets

In [3]:
############################### Equity ,Region, Delay , Top ################################
datasets_df = hf.get_datasets(s,'EQUITY','USA','1') # by default we load all datasets USA TOP3000 delay 1
# get me the name of row equal Sentiment Data for Equity
# sentiment_data_name = datasets_df[datasets_df['name'] == 'Sentiment Data for Equity']
#sentiment_data_name

In [4]:
#get me all the rows that contain the word other, news, socialmedia in the category column and seperate each in a different dataframe
other_df = datasets_df[datasets_df['category'].str.contains('other', case=False)].reset_index(drop=True)
news_df = datasets_df[datasets_df['category'].str.contains('news', case=False)].reset_index(drop=True)
socialmedia_df = datasets_df[datasets_df['category'].str.contains('socialmedia', case=False)].reset_index(drop=True)

In [17]:
selected_datasets_df =news_df[(news_df['alphaCount']> 10)& (news_df['userCount']>10) ]

For demonstration we are picking the dataset with the highest value score, this is not mandatory but definitely a recommended practice

#### Step 2. Select the needed datafields


In [8]:
dataset_id = selected_datasets_df.id.values.tolist() # create a list of selected datasets ids, our list has only one element

In [9]:
all_data_field = []
for i in dataset_id:
    datafields_df = hf.get_datafields(s, dataset_id=i) # download all fields of dataset news
    all_data_field.append(datafields_df.id.values.tolist())

In [10]:
# merge all element in all_data_field into one list 
all_data_field = [item for sublist in all_data_field for item in sublist]

In [18]:
all_data_field

['news_all_vwap',
 'news_atr14',
 'news_atr_ratio',
 'news_cap',
 'news_close_vol',
 'news_curr_vol',
 'news_dividend_yield',
 'news_eod_close',
 'news_eod_high',
 'news_eod_low',
 'news_eod_vwap',
 'news_eps_actual',
 'news_high_exc_stddev',
 'news_indx_perf',
 'news_low_exc_stddev',
 'news_ls',
 'news_main_vwap',
 'news_max_dn_amt',
 'news_max_dn_ret',
 'news_max_up_amt',
 'news_max_up_ret',
 'news_mins_10_chg',
 'news_mins_10_pct_dn',
 'news_mins_10_pct_up',
 'news_mins_1_chg',
 'news_mins_1_pct_dn',
 'news_mins_1_pct_up',
 'news_mins_20_chg',
 'news_mins_20_pct_dn',
 'news_mins_20_pct_up',
 'news_mins_2_chg',
 'news_mins_2_pct_dn',
 'news_mins_2_pct_up',
 'news_mins_3_chg',
 'news_mins_3_pct_dn',
 'news_mins_3_pct_up',
 'news_mins_4_chg',
 'news_mins_4_pct_dn',
 'news_mins_4_pct_up',
 'news_mins_5_chg',
 'news_mins_5_pct_dn',
 'news_mins_5_pct_up',
 'news_mins_7_5_chg',
 'news_mins_7_5_pct_dn',
 'news_mins_7_5_pct_up',
 'news_mov_vol',
 'news_open',
 'news_open_gap',
 'news_open_vo

#### Step 3. Create expression list, using selected datafields
Here is an example: creating expressions that assign weights as per the time series skewness of vector average of the field value

In [11]:
# Thay Operators 
expression_list = [f'-vec_avg({x})' for x in all_data_field] # create a list of alpha expressions ts_skewness(vec_avg({x}),120) where x is a datafield id

In [12]:
alpha_list = []
for universe in ['TOP200','TOP3000','TOP500','TOP1000']:
    for decay_Value in range(1,10,1):
        for trun_value in [i/10 for i in range(0, 11)]:
            for neutralization_Value in ['MARKET','INDUSTRY','NONE','SUBINDUSTRY','SECTOR']:
                alpha_list.append([ace.generate_alpha(x, region= "USA", universe = universe, decay = decay_Value, neutralization = neutralization_Value, truncation= trun_value) for x in expression_list])

alpha_list = [item[0] for item in alpha_list]

In [13]:
import random

random_list = [random.randint(1, 1000) for _ in range(400)]

In [14]:
alpha_list = [alpha_list[i] for i in random_list]

#### Step 4. Apply generate_alpha function to the expression list
In generate alpha function you can specify region, universe, decay, delay and other simulation settings

In [None]:
#alpha expressions are sliced to first 10 for demonstration purpose
# Chạy nhiều hơn thì thay 10 thành số khác
result = ace.simulate_alpha_list_multi(s, alpha_list)

In [16]:
len(alpha_list)

400

An error occurred
An error occurred


In [13]:
# #when you send multiple alphas for simulation, please make sure all alphas of a single list should have common settings
# #alphas with different settings should be sent in a different list, for instance below list has all alphas with same settings

# alpha_list = [ace.generate_alpha(x, region= "USA", universe = "TOP3000",delay = 1, decay= 0, neutralization  = "INDUSTRY", truncation = 0.08, pasteurization = "ON",test_period = "P0Y0M0D",unit_handling = "VERIFY", nan_handling= "OFF") for x in expression_list]

# alpha_list[0]

This is an example - how alpha actually looks like when you send it to the platform.

### Simulate alpha list, get simulation result

simulate_alpha_list_multi will do a multi-simulation if list of alphas is greater than 10, which is the case here

the returned object will contain simulation results for all alphas as a list

In [14]:
#alpha expressions are sliced to first 10 for demonstration purpose
# Chạy nhiều hơn thì thay 10 thành số khác
result = ace.simulate_alpha_list_multi(s, alpha_list)

100%|██████████| 330/330 [1:41:27<00:00, 18.45s/it]  


__Accessing the result of the first alpha, let's take a look at all the keys of this dictionary__

In [17]:
#prettify_result function can be used from the helpful_functions library to take a look at IS stats of all the simulated alphas

result_st1 = hf.prettify_result(result, detailed_tests_view=False)
result_st1

Unnamed: 0,pnl,book_size,long_count,short_count,turnover,returns,drawdown,margin,fitness,sharpe,...,expression,concentrated_weight,high_turnover,is_ladder_sharpe,low_fitness,low_sharpe,low_sub_universe_sharpe,low_turnover,matches_competition,matches_themes
0,27427876,20000000,500,0,0.1536,0.2746,0.5232,0.003576,1.14,0.85,...,vec_avg(scl12_alltype_buzzvec),FAIL,PASS,FAIL,PASS,FAIL,FAIL,PASS,WARNING,PASS
1,27227642,20000000,500,0,0.1547,0.2726,0.5232,0.003524,1.12,0.84,...,vec_avg(scl12_alltype_buzzvec),FAIL,PASS,FAIL,PASS,FAIL,FAIL,PASS,WARNING,PASS
2,27284092,20000000,500,0,0.1545,0.2732,0.5232,0.003536,1.12,0.84,...,vec_avg(scl12_alltype_buzzvec),FAIL,PASS,FAIL,PASS,FAIL,FAIL,PASS,WARNING,PASS
3,27208232,20000000,500,0,0.1548,0.2724,0.5232,0.003520,1.11,0.84,...,vec_avg(scl12_alltype_buzzvec),FAIL,PASS,FAIL,PASS,FAIL,FAIL,PASS,WARNING,PASS
4,27208232,20000000,500,0,0.1548,0.2724,0.5232,0.003520,1.11,0.84,...,vec_avg(scl12_alltype_buzzvec),FAIL,PASS,FAIL,PASS,FAIL,FAIL,PASS,WARNING,PASS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
985,-11494767,20000000,158,341,0.4545,-0.1151,1.1759,-0.000506,-0.46,-0.92,...,vec_avg(scl12_alltype_buzzvec),FAIL,PASS,FAIL,FAIL,FAIL,FAIL,PASS,WARNING,PASS
986,-11480770,20000000,158,341,0.4544,-0.1149,1.1745,-0.000506,-0.46,-0.92,...,vec_avg(scl12_alltype_buzzvec),FAIL,PASS,FAIL,FAIL,FAIL,FAIL,PASS,WARNING,PASS
987,-11492412,20000000,158,341,0.4545,-0.1151,1.1756,-0.000506,-0.46,-0.92,...,vec_avg(scl12_alltype_buzzvec),FAIL,PASS,FAIL,FAIL,FAIL,FAIL,PASS,WARNING,PASS
988,-11183561,20000000,158,341,0.4520,-0.1120,1.1447,-0.000495,-0.47,-0.94,...,vec_avg(scl12_alltype_buzzvec),FAIL,PASS,FAIL,FAIL,FAIL,FAIL,PASS,WARNING,PASS


In [21]:
result_st1['is_ladder_sharpe'].value_counts()

FAIL    990
Name: is_ladder_sharpe, dtype: int64

In [18]:
#to take a look at the combined result of all new alphas

is_tests_df = hf.concat_is_tests(result)
is_tests_df.head()

Unnamed: 0,alpha_id,date,endDate,limit,matched,name,result,startDate,themes,unmatched,value,year
0,GoEK9KZ,,,1.58,,LOW_SHARPE,FAIL,,,,-0.74,
1,GoEK9KZ,,,1.0,,LOW_FITNESS,FAIL,,,,-0.2,
2,GoEK9KZ,,,0.01,,LOW_TURNOVER,PASS,,,,1.2073,
3,GoEK9KZ,,,0.7,,HIGH_TURNOVER,FAIL,,,,1.2073,
4,GoEK9KZ,2019-07-10,,0.1,,CONCENTRATED_WEIGHT,FAIL,,,,0.452952,


In [19]:
#making a list of failed alphas
failed_alphas = is_tests_df.query('result=="FAIL"')['alpha_id'].unique()

#making a list of passed alphas
passed_alphas = list(set(is_tests_df['alpha_id']).difference(failed_alphas))

print(f'Failed alphas:{failed_alphas}\nPassed alphas:{passed_alphas}')

Failed alphas:['GoEK9KZ' 'V8Kmez5' 'V8Kme55' 'oYQJZGl' 'jgNPOxW' 'jgNPOOW' 'WgKoJrG'
 'QQmN6xG' 'O9KMg3p' 'V8KmRZM' 'mXe0l0x' 'JdRW2zl' 'WgKoRjG' 'P1jYbQp'
 'Rr0loMd' 'l0J5Ybx' 'ereVPRp' 'QQmNAdM' 'O9KMjM1' 'QQmNAzM' 'EKo8MLJ'
 'p7QMeXg' 'QQmNpJW' 'nWwe5R8' 'KLx5AXg' 'xnX27el' 'qXeY2gA' 'Mx9evYL'
 '5vNAKJM' 'rWQNa0o' 'Zokd8d8' 'O9KMLzg' 'GoEK2xQ' 'dQe3M3Y' 'vmapEGA'
 'kKeWwOg' 'Rr0lO91' 'YAXEVEw' 'ereVYGJ' 'vmapQZb' 'XKXv0v8' 'A3qrAzW'
 'qXeYRaV' '8LgEww7' '3AgkN8e' '2KPX3w8' 'l0J5oKO' 'YAXERx6' '7dgmkn8'
 'aOXzrE5' 'weXG8jx' 'JdRzV2x' 'V8KzvLY' 'xnXGKpg' '1g6KYKQ' 'dQez5o2'
 'Rr0zRPj' 'qXeG9kv' 'weXGKOx' 'V8Kz1qA' 'l0JGpNe' '8LgJ5Zm' 'A3qzj5Q'
 'A3qz9Rd' 'YAXz6P6' '3AgPwqP' 'YAXz6lM' '1g6KeOz' 'LROzWmn' 'QQmzMpM'
 'dQezpJw' 'zWXGZxK' '5vN5bbJ' 'g37GAOQ' 'dQezXAg' '9RgMv3r' 'rWQG3Mj'
 'EKoz7nR' 'KLxz0A1' 'P1jzWVJ' 'xnXG6ZJ' '9RgM1Ro' 'GoEzAwP' '08ng376'
 'KLxzdoj' 'LROzaQa' 'Rr0zQGn' 'jgNGbE9' 'P1jzdaq' '1g6KQQk' 'XKXz5RX'
 'YAXzG1q' 'oYQGMEk' 'g37GEgM' 'Rr0znz1' 'vmaG36d' '9RgM0Lo' 'p