In [2]:
import random
# import graphviz
from collections import OrderedDict

In [3]:
from typing import Optional

import requests
from urllib.parse import urljoin
import time
import json
import os
import getpass
from pathlib import Path

import pandas as pd

from multiprocessing.pool import ThreadPool
from functools import partial

import tqdm

import pandas as pd
import os
import time
import json


def make_clickable_alpha_id(alpha_id):
    """
    Make alpha_id clickable in dataframes
    So you can go to the platform to analyze simulation result
    """

    url = "https://platform.worldquantbrain.com/alpha/"
    return f'<a href="{url}{alpha_id}">{alpha_id}</a>'


def prettify_result(
    result, detailed_tests_view=False, clickable_alpha_id: bool = False
):
    """
    Combine needed results in one dataframe to analyze your alphas
    Sort by fitness absolute value
    """
    list_of_is_stats = [
        result[x]["is_stats"]
        for x in range(len(result))
        if result[x]["is_stats"] is not None
    ]
    is_stats_df = pd.concat(list_of_is_stats).reset_index(drop=True)
    is_stats_df = is_stats_df.sort_values("fitness", ascending=False)

    expressions = {
        result[x]["alpha_id"]: result[x]["simulate_data"]["regular"]
        for x in range(len(result))
        if result[x]["is_stats"] is not None
    }
    expression_df = pd.DataFrame(
        list(expressions.items()), columns=["alpha_id", "expression"]
    )

    list_of_is_tests = [
        result[x]["is_tests"]
        for x in range(len(result))
        if result[x]["is_tests"] is not None
    ]
    is_tests_df = pd.concat(list_of_is_tests).reset_index(drop=True)
    if detailed_tests_view:
        cols = ["limit", "result", "value"]
        is_tests_df["details"] = is_tests_df[cols].to_dict(orient="records")
        is_tests_df = is_tests_df.pivot(
            index="alpha_id", columns="name", values="details"
        ).reset_index()
    else:
        is_tests_df = is_tests_df.pivot(
            index="alpha_id", columns="name", values="result"
        ).reset_index()

    alpha_stats = pd.merge(is_stats_df, expression_df, on="alpha_id")
    alpha_stats = pd.merge(alpha_stats, is_tests_df, on="alpha_id")
    alpha_stats = alpha_stats.drop(
        columns=alpha_stats.columns[(alpha_stats == "PENDING").any()]
    )
    alpha_stats.columns = alpha_stats.columns.str.replace(
        "(?<=[a-z])(?=[A-Z])", "_", regex=True
    ).str.lower()
    if clickable_alpha_id:
        return alpha_stats.style.format({"alpha_id": make_clickable_alpha_id})
    return alpha_stats


def concat_pnl(result):
    """
    Combine needed results in one dataframe to analyze pnls of your alphas
    """
    list_of_pnls = [
        result[x]["pnl"]
        for x in range(len(result))
        if result[x]["pnl"] is not None
    ]
    pnls_df = pd.concat(list_of_pnls).reset_index()

    return pnls_df


def concat_is_tests(result):
    is_tests_list = [
        result[x]["is_tests"]
        for x in range(len(result))
        if result[x]["is_tests"] is not None
    ]
    is_tests_df = pd.concat(is_tests_list).reset_index(drop=True)
    return is_tests_df


def save_simulation_result(result):
    """
    Dump simulation result to folder simulation_results
    to json file
    """

    alpha_id = result["id"]
    region = result["settings"]["region"]
    folder_path = "simulation_results/"
    file_path = os.path.join(folder_path, f"{alpha_id}_{region}")

    os.makedirs(folder_path, exist_ok=True)

    with open(file_path, "w") as file:
        json.dump(result, file)

def set_alpha_properties(
    s,
    alpha_id,
    name: str = None,
    color: str = None,
    selection_desc: str = "None",
    combo_desc: str = "None",
    tags: str = ["gen"],
):
    """
    Function changes alpha's description parameters
    """

    params = {
        "color": color,
        "name": name,
        "tags": tags,
        "category": None,
        "regular": {"description": None},
        "combo": {"description": combo_desc},
        "selection": {"description": selection_desc},
    }
    response = s.patch(
        "https://api.worldquantbrain.com/alphas/" + alpha_id, json=params
    )



def save_pnl(pnl_df, alpha_id, region):
    """
    Dump pnl to folder alphas_pnl
    to csv file
    """

    folder_path = "alphas_pnl/"
    file_path = os.path.join(folder_path, f"{alpha_id}_{region}")
    os.makedirs(folder_path, exist_ok=True)

    pnl_df.to_csv(file_path)


def save_yearly_stats(yearly_stats, alpha_id, region):
    """
    Dump yearly-stats to folder yearly_stats
    to csv file
    """

    folder_path = "yearly_stats/"
    file_path = os.path.join(folder_path, f"{alpha_id}_{region}")
    os.makedirs(folder_path, exist_ok=True)

    yearly_stats.to_csv(file_path, index=False)


def get_alpha_pnl(s, alpha_id):
    """
    Function gets alpha pnl of simulation
    """

    while True:
        result = s.get(
            "https://api.worldquantbrain.com/alphas/" + alpha_id + "/recordsets/pnl"
        )
        if "retry-after" in result.headers:
            time.sleep(float(result.headers["Retry-After"]))
        else:
            break
    pnl = result.json().get("records", 0)
    if pnl == 0:
        return pd.DataFrame()
    pnl_df = (
        pd.DataFrame(pnl, columns=["Date", "Pnl"])
        .assign(
            alpha_id=alpha_id, Date=lambda x: pd.to_datetime(x.Date, format="%Y-%m-%d")
        )
        .set_index("Date")
    )
    return pnl_df


def get_alpha_yearly_stats(s, alpha_id):
    """
    Function gets yearly-stats of simulation
    """

    while True:
        result = s.get(
            "https://api.worldquantbrain.com/alphas/"
            + alpha_id
            + "/recordsets/yearly-stats"
        )
        if "retry-after" in result.headers:
            time.sleep(float(result.headers["Retry-After"]))
        else:
            break
    stats = result.json()

    if stats.get("records", 0) == 0:
        return pd.DataFrame()
    columns = [dct["name"] for dct in stats["schema"]["properties"]]
    yearly_stats_df = pd.DataFrame(stats["records"], columns=columns).assign(alpha_id=alpha_id)
    return yearly_stats_df

def get_datasets(
    s,
    instrument_type: str = 'EQUITY',
    region: str = 'USA',
    delay: int = 1,
    universe: str = 'TOP3000'
):
    url = "https://api.worldquantbrain.com/data-sets?" +\
        f"instrumentType={instrument_type}&region={region}&delay={str(delay)}&universe={universe}"
    result = s.get(url)
    datasets_df = pd.DataFrame(result.json()['results'])
    return datasets_df


def get_datafields(
    s,
    instrument_type: str = 'EQUITY',
    region: str = 'USA',
    delay: int = 1,
    universe: str = 'TOP3000',
    dataset_id: str = '',
    search: str = ''
):
    if len(search) == 0:
        url_template = "https://api.worldquantbrain.com/data-fields?" +\
            f"&instrumentType={instrument_type}" +\
            f"&region={region}&delay={str(delay)}&universe={universe}&dataset.id={dataset_id}&limit=50" +\
            "&offset={x}"
        count = s.get(url_template.format(x=0)).json()['count']
    else:
        url_template = "https://api.worldquantbrain.com/data-fields?" +\
            f"&instrumentType={instrument_type}" +\
            f"&region={region}&delay={str(delay)}&universe={universe}&limit=50" +\
            f"&search={search}" +\
            "&offset={x}"
        count = 100

    datafields_list = []
    for x in range(0, count, 50):
        datafields = s.get(url_template.format(x=x))
        datafields_list.append(datafields.json()['results'])

    datafields_list_flat = [item for sublist in datafields_list for item in sublist]

    datafields_df = pd.DataFrame(datafields_list_flat)
    return datafields_df



DEFAULT_CONFIG = {
    "get_pnl": False,
    "get_stats": False,
    "save_pnl_file": False,
    "save_stats_file": False,
    "save_result_file": False,
    "check_submission": False,
    "check_self_corr": False,
    "check_prod_corr": False,
}


def get_credentials():
    """
    Function gets json with platform credentials if exists,
    or asks to enter new ones
    """

    credential_email = os.environ.get('BRAIN_CREDENTIAL_EMAIL')
    credential_password = os.environ.get('BRAIN_CREDENTIAL_PASSWORD')

    credentials_folder_path = os.path.join(os.path.expanduser("~"), "secrets")
    credentials_file_path = os.path.join(credentials_folder_path, "platform-brain.json")

    if (
        Path(credentials_file_path).exists()
        and os.path.getsize(credentials_file_path) > 2
    ):
        with open(credentials_file_path) as file:
            data = json.loads(file.read())
    else:
        os.makedirs(credentials_folder_path, exist_ok=True)
        if credential_email and credential_password:
            email = credential_email
            password = credential_password
        else:
            email = input("Email:\n")
            password = getpass.getpass(prompt="Password:")
        data = {"email": email, "password": password}
        with open(credentials_file_path, "w") as file:
            json.dump(data, file)
    return (data["email"], data["password"])


def start_session():

    """
    Function sign in to platform
    and checks credentials
    and returns session object
    """

    s = requests.Session()
    s.auth = get_credentials()
    r = s.post("https://api.worldquantbrain.com/authentication")

    if r.status_code == requests.status_codes.codes.unauthorized:
        if r.headers["WWW-Authenticate"] == "persona":
            print(
                "Complete biometrics authentication and press any key to continue: \n"
                + urljoin(r.url, r.headers["Location"]) + "\n"
            )
            input()
            s.post(urljoin(r.url, r.headers["Location"]))

            while True:
                if s.post(urljoin(r.url, r.headers["Location"])).status_code != 201:
                    input("Biometrics authentication is not complete. Please try again and press any key when completed \n")
                else:
                    break
        else:
            print("\nIncorrect email or password\n")
            with open(
                os.path.join(os.path.expanduser("~"), "secrets/platform-brain.json"),
                "w",
            ) as file:
                json.dump({}, file)
            return start_session()
    return s

def check_session_timeout(s):
    """
    Function checks session time out
    """

    authentication_url = "https://api.worldquantbrain.com/authentication"
    try:
        result = s.get(authentication_url).json()["token"]["expiry"]
        return result
    except:
        return 0


def generate_alpha(
    regular: str,
    region: str = "USA",
    universe: str = "TOP3000",
    neutralization: str = "INDUSTRY",
    delay: int = 1,
    decay: int = 0,
    truncation: float = 0.08,
    nan_handling: str = "OFF",
    unit_handling: str = "VERIFY",
    pasteurization: str = "ON",
    visualization: bool = False,
):
    """
    Function generates data to use in simulation
    has default parameters
    """

    simulation_data = {
        "type": "REGULAR",
        "settings": {
            "nanHandling": nan_handling,
            "instrumentType": "EQUITY",
            "delay": delay,
            "universe": universe,
            "truncation": truncation,
            "unitHandling": unit_handling,
            "pasteurization": pasteurization,
            "region": region,
            "language": "FASTEXPR",
            "decay": decay,
            "neutralization": neutralization,
            "visualization": visualization,
        },
        "regular": regular,
    }
    return simulation_data


def start_simulation(
    s, simulate_data
):
    simulate_response = s.post(
        "https://api.worldquantbrain.com/simulations", json=simulate_data
    )
    return simulate_response


def simulation_progress(s,
    simulate_response,
):

    if simulate_response.status_code // 100 != 2:
        print(simulate_response.text)
        return {"completed": False, "result": {}}

    simulation_progress_url = simulate_response.headers["Location"]
    error_flag = False
    while True:
        simulation_progress = s.get(simulation_progress_url)
        if simulation_progress.headers.get("Retry-After", 0) == 0:
            if simulation_progress.json().get("status", "ERROR") == "ERROR":
                error_flag = True
            break

        time.sleep(float(simulation_progress.headers["Retry-After"]))

    if error_flag:

        print("An error occurred")
        if "message" in simulation_progress.json():
            print(simulation_progress.json()["message"])
        return {"completed": False, "result": {}}

    alpha = simulation_progress.json().get("alpha", 0)
    if alpha == 0:
        return {"completed": False, "result": {}}
    simulation_result = get_simulation_result_json(s, alpha)
    return {"completed": True, "result": simulation_result}



def multisimulation_progress(s,
    simulate_response,
):

    if simulate_response.status_code // 100 != 2:
        print(simulate_response.text)
        return {"completed": False, "result": {}}

    simulation_progress_url = simulate_response.headers["Location"]
    error_flag = False
    while True:
        simulation_progress = s.get(simulation_progress_url)
        if simulation_progress.headers.get("Retry-After", 0) == 0:
            if simulation_progress.json().get("status", "ERROR") == "ERROR":
                error_flag = True
            break

        time.sleep(float(simulation_progress.headers["Retry-After"]))

    if error_flag:
        print("An error occurred")
        if "message" in simulation_progress.json():
            print(simulation_progress.json()["message"])
        return {"completed": False, "result": {}}


    children = simulation_progress.json().get("children", 0)
    if len(children) == 0:
        return {"completed": False, "result": {}}
    children_list = []
    for child in children:
        child_progress = s.get("https://api.worldquantbrain.com/simulations/" + child)
        alpha = child_progress.json()["alpha"]
        child_result = get_simulation_result_json(s, alpha)
        children_list.append(child_result)
    return {"completed": True, "result": children_list}


def get_prod_corr(s, alpha_id):
    """
    Function gets alpha's prod correlation
    and save result to dataframe
    """

    while True:
        result = s.get(
            "https://api.worldquantbrain.com/alphas/" + alpha_id + "/correlations/prod"
        )
        if "retry-after" in result.headers:
            time.sleep(float(result.headers["Retry-After"]))
        else:
            break
    if result.json().get("records", 0) == 0:
        return pd.DataFrame()
    columns = [dct["name"] for dct in result.json()["schema"]["properties"]]
    prod_corr_df = pd.DataFrame(result.json()["records"], columns=columns).assign(alpha_id=alpha_id)

    return prod_corr_df


def check_prod_corr_test(s, alpha_id, threshold: float = 0.7):
    """
    Function checks if alpha's prod_corr test passed
    Saves result to dataframe
    """

    prod_corr_df = get_prod_corr(s, alpha_id)
    value = prod_corr_df[prod_corr_df.alphas > 0]["max"].max()
    result = [
        {"test": "PROD_CORRELATION", "result": "PASS" if value <= threshold else "FAIL", "limit": threshold, "value": value, "alpha_id": alpha_id}
    ]
    return pd.DataFrame(result)


def get_self_corr(s, alpha_id):
    """
    Function gets alpha's self correlation
    and save result to dataframe
    """

    while True:

        result = s.get(
            "https://api.worldquantbrain.com/alphas/" + alpha_id + "/correlations/self"
        )
        if "retry-after" in result.headers:
            time.sleep(float(result.headers["Retry-After"]))
        else:
            break
    if result.json().get("records", 0) == 0:
        return pd.DataFrame()

    records_len = len(result.json()["records"])
    if records_len == 0:
        return pd.DataFrame()

    columns = [dct["name"] for dct in result.json()["schema"]["properties"]]
    self_corr_df = pd.DataFrame(result.json()["records"], columns=columns).assign(alpha_id=alpha_id)

    return self_corr_df


def check_self_corr_test(s, alpha_id, threshold: float = 0.7):
    """
    Function checks if alpha's self_corr test passed
    Saves result to dataframe
    """

    self_corr_df = get_self_corr(s, alpha_id)
    if self_corr_df.empty:
        result = [{"test": "SELF_CORRELATION", "result": "PASS", "limit": threshold, "value": 0, "alpha_id": alpha_id}]
    else:
        value = self_corr_df["correlation"].max()
        result = [
            {
                "test": "SELF_CORRELATION",
                "result": "PASS" if value < threshold else "FAIL",
                "limit": threshold,
                "value": value,
                "alpha_id": alpha_id
            }
        ]
    return pd.DataFrame(result)



def get_check_submission(s, alpha_id):
    """
    Function gets alpha's check submission checks
    and returns result in dataframe
    """

    while True:
        result = s.get("https://api.worldquantbrain.com/alphas/" + alpha_id + "/check")
        if "retry-after" in result.headers:
            time.sleep(float(result.headers["Retry-After"]))
        else:
            break
    if result.json().get("is", 0) == 0:
        return pd.DataFrame()

    checks_df = pd.DataFrame(
            result.json()["is"]["checks"]
    ).assign(alpha_id=alpha_id)

    if 'year' in checks_df:
        ladder_dict = [checks_df.loc[checks_df.index[checks_df['name']=='IS_LADDER_SHARPE']][['value', 'year']].iloc[0].to_dict()]
        checks_df.at[checks_df.index[checks_df['name']=='IS_LADDER_SHARPE'], 'value'] = ladder_dict
        checks_df.drop(['endDate', 'startDate', 'year'], axis=1, inplace=True)

    return checks_df





def submit_alpha(s, alpha_id):
    """
    Function submits an alpha
    This function is not used anywhere
    """
    result = s.post("https://api.worldquantbrain.com/alphas/" + alpha_id + "/submit")
    while True:
        if "retry-after" in result.headers:
            time.sleep(float(result.headers["Retry-After"]))
            result = s.get("https://api.worldquantbrain.com/alphas/" + alpha_id + "/submit")
        else:
            break
    return result.status_code == 200


def get_simulation_result_json(s, alpha_id):
    return s.get("https://api.worldquantbrain.com/alphas/" + alpha_id).json()


def simulate_single_alpha(
    s,
    simulate_data,
):
    """
    To simulate single alpha
    """

    if check_session_timeout(s) < 1000:
        s = start_session()

    simulate_response = start_simulation(s, simulate_data)
    simulation_result = simulation_progress(s, simulate_response)

    if not simulation_result["completed"]:
        return {'alpha_id': None, 'simulate_data': simulate_data}
    set_alpha_properties(s, simulation_result["result"]["id"])
    return {'alpha_id': simulation_result["result"]["id"], 'simulate_data': simulate_data}


def simulate_multi_alpha(
    s,
    simulate_data_list,
):
    """
    To simulate single alpha
    """

    if check_session_timeout(s) < 1000:
        s = start_session()
    if len(simulate_data_list) == 1:
        return [simulate_single_alpha(s, simulate_data_list[0])]
    simulate_response = start_simulation(s, simulate_data_list)
    simulation_result = multisimulation_progress(s, simulate_response)

    if not simulation_result["completed"]:
        return [{'alpha_id': None, 'simulate_data': x} for x in simulate_data_list]
    result = [{"alpha_id": x["id"], "simulate_data": {"type": x["type"], "settings": x["settings"], "regular": x["regular"]["code"]}} for x in simulation_result["result"]]
    _ = [set_alpha_properties(s, x["id"]) for x in simulation_result["result"]]
    return result


def get_specified_alpha_stats(
    s,
    alpha_id,
    simulate_data,
    get_pnl: bool = False,
    get_stats: bool = False,
    save_pnl_file: bool = False,
    save_stats_file: bool = False,
    save_result_file: bool = False,
    check_submission: bool = False,
    check_self_corr: bool = False,
    check_prod_corr: bool = False,
):
    """
    Master-Function to get specified in config statistics

    """
    pnl = None
    stats = None

    if alpha_id is None:
        return {'alpha_id': None, 'simulate_data': simulate_data, 'is_stats': None, 'pnl': pnl, 'stats': stats, 'is_tests': None}

    result = get_simulation_result_json(s, alpha_id)
    region = result["settings"]["region"]
    is_stats = pd.DataFrame([{key: value for key, value in result['is'].items() if key!='checks'}]).assign(alpha_id=alpha_id)

    if get_pnl:
        pnl = get_alpha_pnl(s, alpha_id)
    if get_stats:
        stats = get_alpha_yearly_stats(s, alpha_id)

    if save_result_file:
        save_simulation_result(result)
    if save_pnl_file and get_pnl:
        save_pnl(pnl, alpha_id, region)
    if save_stats_file and get_stats:
        save_yearly_stats(stats, alpha_id, region)

    is_tests = pd.DataFrame(
        result["is"]["checks"]
    ).assign(alpha_id=alpha_id)

    if check_submission:
        is_tests = get_check_submission(s, alpha_id)

        return {'alpha_id': alpha_id, 'simulate_data': simulate_data, 'is_stats': is_stats, 'pnl': pnl, 'stats': stats, 'is_tests': is_tests}

    if check_self_corr and not check_submission:
        self_corr_test = check_self_corr_test(s, alpha_id)
        is_tests = (
            is_tests.append(self_corr_test, ignore_index=True, sort=False)
            .drop_duplicates(subset=["test"], keep="last")
            .reset_index(drop=True)
        )
    if check_prod_corr and not check_submission:
        prod_corr_test = check_prod_corr_test(s, alpha_id)
        is_tests = (
            is_tests.append(prod_corr_test, ignore_index=True, sort=False)
            .drop_duplicates(subset=["test"], keep="last")
            .reset_index(drop=True)
        )

    return {'alpha_id': alpha_id, 'simulate_data': simulate_data, 'is_stats': is_stats, 'pnl': pnl, 'stats': stats, 'is_tests': is_tests}


def simulate_alpha_list(
    s,
    alpha_list,
    limit_of_concurrent_simulations=3,
    simulation_config=DEFAULT_CONFIG,
):
    result_list = []

    with ThreadPool(limit_of_concurrent_simulations) as pool:

        with tqdm.tqdm(total=len(alpha_list)) as pbar:

            for result in pool.imap_unordered(
                partial(simulate_single_alpha, s), alpha_list
            ):
                result_list.append(result)
                pbar.update()

    stats_list_result = []
    func = lambda x: get_specified_alpha_stats(s, x['alpha_id'], x['simulate_data'], **simulation_config)
    with ThreadPool(3) as pool:
        for result in pool.map(
            func, result_list
        ):
            stats_list_result.append(result)

    return stats_list_result


def simulate_alpha_list_multi(
    s,
    alpha_list,
    limit_of_concurrent_simulations=3,
    limit_of_multi_simulations=3,
    simulation_config=DEFAULT_CONFIG,
):
    if (limit_of_multi_simulations<2) or (limit_of_multi_simulations>10):
        print('Warning, limit of multi-simulation should be 2..10')
        limit_of_multi_simulations = 3
    if len(alpha_list)<10:
        print('Warning, list of alphas too short, single concurrent simulations will be used instead of multisimulations')
        return simulate_alpha_list(s, alpha_list, simulation_config=simulation_config)

    tasks = [alpha_list[i:i + limit_of_multi_simulations] for i in range(0, len(alpha_list), limit_of_multi_simulations)]
    result_list = []

    with ThreadPool(limit_of_concurrent_simulations) as pool:

        with tqdm.tqdm(total=len(tasks)) as pbar:

            for result in pool.imap_unordered(
                partial(simulate_multi_alpha, s), tasks
            ):
                result_list.append(result)
                pbar.update()
    result_list_flat = [item for sublist in result_list for item in sublist]

    stats_list_result = []
    func = lambda x: get_specified_alpha_stats(s, x['alpha_id'], x['simulate_data'], **simulation_config)
    with ThreadPool(3) as pool:
        for result in pool.map(
            func, result_list_flat
        ):
            stats_list_result.append(result)

    return stats_list_result

In [42]:
def main():
    s = start_session()
    return s
s = main()

Complete biometrics authentication and press any key to continue: 
https://api.worldquantbrain.com/authentication/persona?inquiry=inq_jwhZZTE7KQ8hPqSYbLns96Ls

 h


# Depth one trees

In [5]:
class Node:
    def __init__(self, value):
        self.value = value
        self.left = None
        self.right = None

def depth_one_trees(terminal_values, binary_ops,ts_ops,ts_ops_values, unary_ops, flag):
    if (flag == 0):
        node = Node(random.choice(binary_ops))
        node.left = Node(random.choice(terminal_values))
        node.right = Node(random.choice(terminal_values))
        return node
    if (flag == 1):
        node = Node(random.choice(ts_ops))
        node.left = Node(random.choice(terminal_values))
        node.right = Node(random.choice(ts_ops_values))
        return node

terminal_values = ["close", "open", "high", "low", "vwap", "adv20", "volume", "cap", "returns", "dividend"]
ts_ops = ["ts_zscore", "ts_rank", "ts_arg_max", "ts_arg_min", "ts_backfill", "ts_delta", "ts_ir", "ts_mean","ts_median", "ts_product", "ts_std_dev"]
binary_ops = ["add", "subtract", "divide", "multiply", "max", "min"]
ts_ops_values = ["20", "40", "60", "120", "240"]
unary_ops = ["rank", "zscore", "winsorize", "normalize", "rank_by_side", "sigmoid", "pasteurize", "log"]

one_depth_tree = []
for i in range(100):
    flag = random.choice([0,1])
    one_tree = depth_one_trees(terminal_values, binary_ops,ts_ops,ts_ops_values, unary_ops, flag)
    one_depth_tree.append(one_tree)

In [6]:
def generate_dot_tree(node, dot, parent_id=""):
    if node:
        current_id = str(id(node))
        dot.node(current_id, label=str(node.value))

        if parent_id:
            dot.edge(parent_id, current_id)

        generate_dot_tree(node.left, dot, current_id)
        generate_dot_tree(node.right, dot, current_id)

def display_tree_with_graphviz(node):
    dot = graphviz.Digraph(comment="Genetic Programming Tree", format="png")
    generate_dot_tree(node, dot)
    dot.render("genetic_programming_tree", format="png", cleanup=True)

In [7]:
def depth_two_tree(tree1,tree2,ts_ops_values,ts_ops):
    if (jhanda == 0):
        node = Node(random.choice(binary_ops))
        node.left = tree1
        node.right = tree2
        return node
    if (jhanda == 1):
        node = Node(random.choice(ts_ops))
        node.left = random.choice([tree1,tree2])
        node.right = Node(random.choice(ts_ops_values))
        return node

In [8]:
tree_two = []
for i in range(100):
    jhanda = random.choice([0,1])
    tree1 = random.choice(one_depth_tree)
    tree2 = random.choice(one_depth_tree)
    tree22 = depth_two_tree(tree1,tree2,ts_ops_values,ts_ops)
    tree_two.append(tree22)

In [11]:
def depth_three_tree(tree2):
    if f == 0 :
        node = Node(random.choice(unary_ops))
        node.left = random.choice(tree2)
        node.right = None
        return node
    if f == 1 :
        node = Node(random.choice(binary_ops))
        node.left = random.choice(tree2)
        node.right =  random.choice(tree2)
        return node
    if f == 2 :
        node = Node(random.choice(ts_ops))
        node.left = random.choice(tree2)
        node.right =  Node(random.choice(ts_ops_values))
        return node
tree3 = []
for i in range(100):
    f = random.choice([0,1,2])
    tree33 = depth_three_tree(tree_two)
    tree3.append(tree33)

DFS try

In [13]:
#depth 1 tree to alpha

def d1tree_to_alpha(tree):
    return f"{tree.value}{'('}{tree.left.value}{','}{tree.right.value}{')'}"

def d2tree_to_alpha(tree):
    if tree.value in binary_ops:
        return f"{tree.value}{'('}{tree.left.value}{'('}{tree.left.left.value}{','}{tree.left.right.value}{'),'}{tree.right.value}{'('}{tree.right.left.value}{','}{tree.right.right.value}{'))'}"
    if tree.value in ts_ops:
        return f"{tree.value}{'('}{tree.left.value}{'('}{tree.left.left.value}{','}{tree.left.right.value}{'),'}{tree.right.value}{')'}"

def d3tree_to_alpha(tree):
    if tree.value in binary_ops and tree.left.value in binary_ops and tree.right.value in binary_ops:
        return f"{tree.value}{'('}{tree.left.value}{'('}{tree.left.left.value}{'('}{tree.left.left.left.value}{','}{tree.left.left.right.value}{'),'}{tree.left.right.value}{'('}{tree.left.right.left.value}{','}{tree.left.right.right.value}{')),'}{tree.right.value}{'('}{tree.right.left.value}{'('}{tree.right.left.left.value}{','}{tree.right.left.right.value}{'),'}{tree.right.right.value}{'('}{tree.right.right.left.value}{','}{tree.right.right.right.value}{')))'}"
    if tree.value in binary_ops and tree.left.value in ts_ops and tree.right.value in ts_ops:
        return f"{tree.value}{'('}{tree.left.value}{'('}{tree.left.left.value}{'('}{tree.left.left.left.value}{','}{tree.left.left.right.value}{'),'}{tree.left.right.value}{')),'}{tree.right.value}{'('}{tree.right.left.value}{'('}{tree.right.left.left.value}{','}{tree.right.left.right.value}{'),'}{tree.right.right.value}{'))'}"
    if tree.value in binary_ops and tree.left.value in binary_ops and tree.right.value in ts_ops:
        return f"{tree.value}{'('}{tree.left.value}{'('}{tree.left.left.value}{'('}{tree.left.left.left.value}{','}{tree.left.left.right.value}{'),'}{tree.left.right.value}{'('}{tree.left.right.left.value}{','}{tree.left.right.right.value}{')),'}{tree.right.value}{'('}{tree.right.left.value}{'('}{tree.right.left.left.value}{','}{tree.right.left.right.value}{'),'}{tree.right.right.value}{'))'}"
    if tree.value in binary_ops and tree.left.value in ts_ops and tree.right.value in binary_ops:
        return f"{tree.value}{'('}{tree.left.value}{'('}{tree.left.left.value}{'('}{tree.left.left.left.value}{','}{tree.left.left.right.value}{'),'}{tree.left.right.value}{'),'}{'('}{tree.right.value}{'('}{tree.right.left.value}{'('}{tree.right.left.left.value}{','}{tree.right.left.right.value}{'),'}{tree.right.right.value}{'('}{tree.right.right.left.value}{','}{tree.right.right.right.value}{')))'}"
    if tree.value in ts_ops and tree.left.value in binary_ops:
        return f"{tree.value}{'('}{tree.left.value}{'('}{tree.left.left.value}{'('}{tree.left.left.left.value}{','}{tree.left.left.right.value}{'),'}{tree.left.right.value}{'('}{tree.left.right.left.value}{','}{tree.left.right.right.value}{')),'}{tree.right.value}{')'}"
    if tree.value in ts_ops and tree.left.value in ts_ops:
        return f"{tree.value}{'('}{tree.left.value}{'('}{tree.left.left.value}{'('}{tree.left.left.left.value}{','}{tree.left.left.right.value}{'),'}{tree.left.right.value}{'),'}{tree.right.value}{')'}"
    if  tree.value in unary_ops and tree.left.value in binary_ops:
        return f"{tree.value}{'('}{tree.left.value}{'('}{tree.left.left.value}{'('}{tree.left.left.left.value}{','}{tree.left.left.right.value}{'),'}{tree.left.right.value}{'('}{tree.left.right.left.value}{','}{tree.left.right.right.value}{')))'}"
    if tree.value in unary_ops and tree.left.value in ts_ops:
        return f"{tree.value}{'('}{tree.left.value}{'('}{tree.left.left.value}{'('}{tree.left.left.left.value}{','}{tree.left.left.right.value}{'),'}{tree.left.right.value}{'))'}"

In [14]:
t3 = []
for i in range(100):
    r = d3tree_to_alpha(tree3[i])
    t3.append(r)

In [16]:
def fitness_fun(Data,n):
    Data['fitness_column'] = (Data['sharpe'] * Data['fitness'] * Data['returns']) / ((Data['drawdown'] * Data['turnover']**2)+0.001)
    Data.sort_values(by='fitness_column', ascending=False, inplace=True)
    top_50_values = Data.head(n)['expression'].tolist()
    return top_50_values

In [19]:
import re

def parse_expression(listr):
    arr = []
    pattern = re.compile(r'(\w+|\d+|\(|\))')
    for i in range(len(listr)):
        matches = pattern.findall(listr[i])
        components = [match for match in matches if match and match != '(' and match != ')']
        arr.append(components)
    return arr


def d1_alpha_to_tree(alphas):
    trees = []
    ary = parse_expression(alphas)
    for i in range(len(ary)):
        alp = ary[i]
        node = Node(alp[0])
        node.left = Node(alp[1])
        node.right = Node(alp[2])
        trees.append(node)
    return trees
def d2_alpha_to_tree(alpha):
    trees = []
    ary = parse_expression(alpha)
    for i in range (len(ary)):
        ar = ary[i]
        if ar[0] in ts_ops:
            node = Node(ar[0])
            node.left = Node(ar[1])
            node.left.left = Node(ar[2])
            node.left.right = Node(ar[3])
            node.right = Node(ar[4])
            trees.append(node)
        if ar[0] in binary_ops:
            node = Node(ar[0])
            node.left = Node(ar[1])
            node.left.left = Node(ar[2])
            node.left.right = Node(ar[3])
            node.right = Node(ar[4])
            node.right.left = Node(ar[5])
            node.right.right = Node(ar[6])
            trees.append(node)
    return trees

def d3_alpha_to_tree(alpha):
    trees = []
    ary = parse_expression(alpha)
    for i in range (len(ary)):
        ar = ary[i]

        if ar[0] in unary_ops:
            if ar[1] in ts_ops:
                node = Node(ar[0])
                node.left = Node(ar[1])
                node.left.left = Node(ar[2])
                node.left.left.left = Node(ar[3])
                node.left.left.right = Node(ar[4])
                node.left.right = Node(ar[5])
                trees.append(node)
        if ar[0] in unary_ops:
            if ar[1] in binary_ops:
                node = Node(ar[0])
                node.left = Node(ar[1])
                node.left.left = Node(ar[2])
                node.left.left.left = Node(ar[3])
                node.left.left.right = Node(ar[4])
                node.left.right = Node(ar[5])
                node.left.right.left = Node(ar[6])
                node.left.right.right = Node(ar[7])
                trees.append(node)

        if ar[0] in ts_ops:
            if ar[1] in ts_ops:
                node = Node(ar[0])
                node.left = Node(ar[1])
                node.left.left = Node(ar[2])
                node.left.left.left = Node(ar[3])
                node.left.left.right = Node(ar[4])
                node.left.right = Node(ar[5])
                node.right = Node(ar[6])
                trees.append(node)
                [['ts_ir', 'add', 'multiply', 'high', 'low', 'ts_ir', 'volume', '60', '240']]
        if ar[0] in ts_ops:
            if ar[1] in binary_ops:
                node = Node(ar[0])
                node.left = Node(ar[1])
                node.left.left = Node(ar[2])
                node.left.left.left = Node(ar[3])
                node.left.left.right = Node(ar[4])
                node.left.right = Node(ar[5])
                node.left.right.left = Node(ar[6])
                node.left.right.right = Node(ar[7])
                node.right = Node(ar[8])
                trees.append(node)
        if ar[0] in binary_ops:
            if ar[1] in ts_ops:
                if ar[6] in binary_ops:
                    node = Node(ar[0])
                    node.left = Node(ar[1])
                    node.left.left = Node(ar[2])
                    node.left.left.left = Node(ar[3])
                    node.left.left.right = Node(ar[4])
                    node.left.right = Node(ar[5])
                    node.right = Node(ar[6])
                    node.right.left = Node(ar[7])
                    node.right.left.left = Node(ar[8])
                    node.right.left.right = Node(ar[9])
                    node.right.right = Node(ar[10])
                    node.right.right.left = Node(ar[11])
                    node.right.right.right = Node(ar[12])
                    trees.append(node)
        if ar[0] in binary_ops:
            if ar[1] in ts_ops:
                if ar[6] in ts_ops:
                    node = Node(ar[0])
                    node.left = Node(ar[1])
                    node.left.left = Node(ar[2])
                    node.left.left.left = Node(ar[3])
                    node.left.left.right = Node(ar[4])
                    node.left.right = Node(ar[5])
                    node.right = Node(ar[6])
                    node.right.left = Node(ar[7])
                    node.right.left.left = Node(ar[8])
                    node.right.left.right = Node(ar[9])
                    node.right.right = Node(ar[10])
                    trees.append(node)
        if ar[0] in binary_ops :
            if ar[1] in binary_ops:
                if ar[8] in binary_ops:
                    node = Node(ar[0])
                    node.left = Node(ar[1])
                    node.left.left = Node(ar[2])
                    node.left.left.left = Node(ar[3])
                    node.left.left.right = Node(ar[4])
                    node.left.right = Node(ar[5])
                    node.left.right.left = Node(ar[6])
                    node.left.right.right = Node(ar[7])
                    node.right = Node(ar[8])
                    node.right.left = Node(ar[9])
                    node.right.left.left = Node(ar[10])
                    node.right.left.right = Node(ar[11])
                    node.right.right = Node(ar[12])
                    node.right.right.left = Node(ar[13])
                    node.right.right.right = Node(ar[14])
                    trees.append(node)
        if ar[0] in binary_ops and ar[1] in binary_ops and ar[8] in ts_ops:
            node = Node(ar[0])
            node.left = Node(ar[1])
            node.left.left = Node(ar[2])
            node.left.left.left = Node(ar[3])
            node.left.left.right = Node(ar[4])
            node.left.right = Node(ar[5])
            node.left.right.left = Node(ar[6])
            node.left.right.right = Node(ar[7])
            node.right = Node(ar[8])
            node.right.left = Node(ar[9])
            node.right.left.left = Node(ar[10])
            node.right.left.right = Node(ar[11])
            node.right.right = Node(ar[12])
            trees.append(node)
    return trees

In [20]:
import random

class Node:
    def __init__(self, value):
        self.value = value
        self.left = None
        self.right = None

def copy_tree(original_node):
    if original_node is None:
        return None

    new_node = Node(original_node.value)
    new_node.left = copy_tree(original_node.left)
    new_node.right = copy_tree(original_node.right)
    return new_node

def mutate_random_node(original_node, terminal_values, unary_ops, binary_ops, ts_ops, ts_ops_values):
    mutated_tree = copy_tree(original_node)
    mutation_probability = 0.5
    def mutate(node):
        nonlocal mutated_tree

        if node is not None and random.random() < mutation_probability:
            if isinstance(node.value, str) and node.value in binary_ops:
                # Mutate binary operator
                node.value = random.choice(binary_ops)
            elif isinstance(node.value, str) and node.value in ts_ops:
                # Mutate ts_op
                node.value = random.choice(ts_ops)
            elif isinstance(node.value, str) and node.value in ts_ops_values:
                node.value = random.choice(ts_ops_values)
            elif isinstance(node.value, str) and node.value in unary_ops:
                node.value = random.choice(unary_ops)
            elif isinstance(node.value, str) and node.value in terminal_values:
                node.value = random.choice(terminal_values)
            elif isinstance(node.left, Node):
                # Mutate left subtree
                mutate(node.left)
            elif isinstance(node.right, Node):
                # Mutate right subtree
                mutate(node.right)
    mutate(mutated_tree)

    return mutated_tree

In [21]:
def crossover(parent1, parent2,n):
    child1 = copy_tree(parent1)
    child2 = copy_tree(parent2)

    if n == 2:
        if child1.value and child2.value in binary_ops:
            side =  random.choice(['R','L'])
            same = random.choice(['Y','N'])
            if side == 'L' and same == 'Y':
                z = child1.left
                child1.left = child2.left
                child2.left = z
                return child1,child2
            if side == 'R' and same == 'Y':
                z = child1.right
                child1.right = child2.right
                child2.right = z
                return child1,child2
            if side == 'L' and same ==' N':
                z = child1.left
                child1.left = child2.right
                child2.right = z
                return child1,child2
            if side == 'R' and same == 'N':
                z = child1.right
                child1.right = child2.left
                child2.left = z
                return child1,child2
        if child1.value and child2.value in ts_ops:
            z = child1.left
            child1.left = child2.left
            child2.left = z
            return child1,child2

    if n == 3:
        if child1.value and child2.value in binary_ops:
            side =  random.choice(['R','L'])
            same = random.choice(['Y','N'])
            if side == 'L' and same == 'Y':
                z = child1.left
                child1.left = child2.left
                child2.left = z
                return child1,child2
            if side == 'R' and same == 'Y':
                z = child1.right
                child1.right = child2.right
                child2.right = z
                return child1,child2
            if side == 'L' and same ==' N':
                z = child1.left
                child1.left = child2.right
                child2.right = z
                return child1,child2
            if side == 'R' and same == 'N':
                z = child1.right
                child1.right = child2.left
                child2.left = z
                return child1,child2
        if child1.value and child2.value in ts_ops:
            z = child1.left
            child1.left = child2.left
            child2.left = z
            return child1,child2
#     else:
#         return child1,child2

def copy_tree(original_node):
    if original_node is None:
        return None

    new_node = Node(original_node.value)
    new_node.left = copy_tree(original_node.left)
    new_node.right = copy_tree(original_node.right)
    return new_node

def get_random_node(node):
    nodes = []
    collect_nodes(node, nodes)
    return random.choice(nodes)

def collect_nodes(node, nodes):
    if node:
        nodes.append(node)
        collect_nodes(node.left, nodes)
        collect_nodes(node.right, nodes)


In [22]:
def best_d1_alphas(n,m):

    population = []
    print("Initial parent selection of Depth one staarted")
    for i in range(n*2):
        d1_trees = depth_one_trees(terminal_values, binary_ops,ts_ops,ts_ops_values, unary_ops, flag)
        d1alpha = d1tree_to_alpha(d1_trees)
        population.append(d1alpha)
    k = population

    k = list(OrderedDict.fromkeys(k))
    alpha_list = [generate_alpha(x) for x in k]
    cntx= simulate_alpha_list(s, alpha_list)
    Data = prettify_result(cntx, detailed_tests_view=False, clickable_alpha_id = False)
    population = fitness_fun(Data,n)


    for j in range(m):
        for i in range (n):
            d1 = depth_one_trees(terminal_values, binary_ops,ts_ops,ts_ops_values, unary_ops, flag)
            d1a = d1tree_to_alpha(d1)
            population.append(d1a)

        k = population

        k = list(OrderedDict.fromkeys(k))
        alpha_list = [generate_alpha(x) for x in k]
        cntx= simulate_alpha_list(s, alpha_list)
        Data = prettify_result(cntx, detailed_tests_view=False, clickable_alpha_id = False)
        population = fitness_fun(Data,n)
    print("Initial parent selection done starting with mutation of depth one")
    best_trees = d1_alpha_to_tree(population)
    mut = []
    for i in range(len(best_trees)):
        mutt = mutate_random_node(best_trees[i], terminal_values, unary_ops, binary_ops, ts_ops, ts_ops_values)
        best_trees.append(mutt)
    for i in range(len(best_trees)):
        ft = d1tree_to_alpha(best_trees[i])
        mut.append(ft)

    mut = list(OrderedDict.fromkeys(mut))

    k = mut
    alpha_list = [generate_alpha(x) for x in k]
    cntx= simulate_alpha_list(s, alpha_list)
    Data = prettify_result(cntx, detailed_tests_view=False, clickable_alpha_id = False)
    population = fitness_fun(Data,n)
    print("Depth one done")
    return population

In [26]:
def best_d2_alphas(onetree,n,m):
    population = []
    print("Initial parent selection of depth two started")
    best_one_trees = d1_alpha_to_tree(onetree)
    for i in range(n*2):
        jhanda = random.choice([0,1])
        tree1 = random.choice(best_one_trees)
        tree2 = random.choice(best_one_trees)
        tree22 = depth_two_tree(tree1,tree2,ts_ops_values,ts_ops)
        d2_alpha = d2tree_to_alpha(tree22)
        population.append(d2_alpha)
    k = population

    k = list(OrderedDict.fromkeys(k))
    alpha_list = [generate_alpha(x) for x in k]
    cntx= simulate_alpha_list(s, alpha_list)
    Data = prettify_result(cntx, detailed_tests_view=False, clickable_alpha_id = False)
    population = fitness_fun(Data,n)

    for j in range(m):
        for i in range(n):
            jhanda = random.choice([0,1])
            tree1 = random.choice(best_one_trees)
            tree2 = random.choice(best_one_trees)
            tree22 = depth_two_tree(tree1,tree2,ts_ops_values,ts_ops)
            d2_alpha = d2tree_to_alpha(tree22)
            population.append(d2_alpha)

        k = population

        k = list(OrderedDict.fromkeys(k))
        alpha_list = [generate_alpha(x) for x in k]
        cntx= simulate_alpha_list(s, alpha_list)
        Data = prettify_result(cntx, detailed_tests_view=False, clickable_alpha_id = False)
        population = fitness_fun(Data,n)
    print("Initial parent selection of depth two done starting with mutation and crossover of depth two")
    best_trees = d2_alpha_to_tree(population)
    mut = []
    prefinal_d2_pop = []
    for i in range(len(best_trees)):
        mutt = mutate_random_node(best_trees[i], terminal_values, unary_ops, binary_ops, ts_ops, ts_ops_values)
        best_trees.append(mutt)
    for i in range(len(best_trees)):
        ft = d2tree_to_alpha(best_trees[i])
        mut.append(ft)

    mut = list(OrderedDict.fromkeys(mut))

    k = mut
    alpha_list = [generate_alpha(x) for x in k]
    cntx= simulate_alpha_list(s, alpha_list)
    Data = prettify_result(cntx, detailed_tests_view=False, clickable_alpha_id = False)
    d2m_population = fitness_fun(Data,n)

    cross = []
    for i in range(len(best_trees)):
        a,b = crossover(best_trees[i], best_trees[i+1],2)
        best_trees.append(a)
        best_trees.append(b)
    for i in range(len(best_trees)):
        ft = d2tree_to_alpha(best_trees[i])
        cross.append(ft)

    cross = list(OrderedDict.fromkeys(cross))
    k = cross
    alpha_list = [generate_alpha(x) for x in k]
    cntx= simulate_alpha_list(s, alpha_list)
    Data = prettify_result(cntx, detailed_tests_view=False, clickable_alpha_id = False)
    d2c_population = fitness_fun(Data,n)
    prefinal_d2_pop.extend(d2c_population)

    prefinal_d2_pop = list(OrderedDict.fromkeys(prefinal_d2_pop))
    k = prefinal_d2_pop
    alpha_list = [generate_alpha(x) for x in k]
    cntx= simulate_alpha_list(s, alpha_list)
    Data = prettify_result(cntx, detailed_tests_view=False, clickable_alpha_id = False)
    best_depth_two_population = fitness_fun(Data,n)
    print("Depth two done")
    return best_depth_two_population

In [44]:
def best_d3_alpha(best_depth_two_population,n,m):
    popu = []
    print("Initial parent selection of depth three started")
    d2 = d2_alpha_to_tree(best_depth_two_population)
    for i in range(n*2):
        f = random.choice([0,1,2])
        tree33 = depth_three_tree(d2)
        tree33 = d3tree_to_alpha(tree33)
        popu.append(tree33)
    k = popu

    k = list(OrderedDict.fromkeys(k))
    alpha_list = [generate_alpha(x) for x in k]
    cntx= simulate_alpha_list(s, alpha_list)
    Data = prettify_result(cntx, detailed_tests_view=False, clickable_alpha_id = False)
    popu = fitness_fun(Data,n)
    for j in range(m):
        for i in range(n):
            f = random.choice([0,1,2])
            tree33 = depth_three_tree(d2)
            tree33 = d3tree_to_alpha(tree33)
            popu.append(tree33)

        k = popu

        k = list(OrderedDict.fromkeys(k))
        alpha_list = [generate_alpha(x) for x in k]
        cntx= simulate_alpha_list(s, alpha_list)
        Data = prettify_result(cntx, detailed_tests_view=False, clickable_alpha_id = False)
        popu = fitness_fun(Data,n)
    print("Initial parent selection of depth three done starting with mutation and crossover of depth three")
    best_t3_trees = d3_alpha_to_tree(popu)
    mut3 = []
    prefinal_d3_pop = []
    for i in range(len(best_t3_trees)):
        mutt = mutate_random_node(best_t3_trees[i], terminal_values, unary_ops, binary_ops, ts_ops, ts_ops_values)
        best_t3_trees.append(mutt)
    for i in range(len(best_t3_trees)):
        ft = d3tree_to_alpha(best_t3_trees[i])
        mut3.append(ft)
    mut3 = list(OrderedDict.fromkeys(mut3))

    k = mut3
    alpha_list = [generate_alpha(x) for x in k]
    cntx= simulate_alpha_list(s, alpha_list)
    Data = prettify_result(cntx, detailed_tests_view=False, clickable_alpha_id = False)
    d3m_population = fitness_fun(Data,n)
    prefinal_d3_pop.extend(d3m_population)
    k = prefinal_d3_pop
    alpha_list = [generate_alpha(x) for x in k]
    cntx= simulate_alpha_list(s, alpha_list)
    Data = prettify_result(cntx, detailed_tests_view=False, clickable_alpha_id = False)
    best_depth_three_population = fitness_fun(Data,n)
    print("Depth three done")
    return best_depth_three_population

In [38]:
best_depth_one_alphas = best_d1_alphas(25,10)

Initial parent selection of Depth one staarted
Initial parent selection done starting with mutation of depth one
Depth one done


  0%|          | 0/50 [00:00<?, ?it/s]  2%|▏         | 1/50 [00:17<14:21, 17.59s/it]  6%|▌         | 3/50 [00:18<03:47,  4.84s/it]  8%|▊         | 4/50 [00:37<07:24,  9.67s/it] 10%|█         | 5/50 [00:37<04:53,  6.51s/it] 12%|█▏        | 6/50 [00:39<03:51,  5.27s/it] 14%|█▍        | 7/50 [00:58<06:51,  9.58s/it] 16%|█▌        | 8/50 [01:01<05:10,  7.40s/it] 18%|█▊        | 9/50 [01:03<03:57,  5.80s/it] 20%|██        | 10/50 [01:17<05:34,  8.37s/it] 24%|██▍       | 12/50 [01:25<03:54,  6.17s/it] 26%|██▌       | 13/50 [01:39<05:04,  8.22s/it] 28%|██▊       | 14/50 [01:40<03:44,  6.23s/it] 30%|███       | 15/50 [01:47<03:44,  6.41s/it] 32%|███▏      | 16/50 [01:58<04:25,  7.81s/it] 34%|███▍      | 17/50 [01:59<03:10,  5.77s/it] 36%|███▌      | 18/50 [02:08<03:38,  6.84s/it] 38%|███▊      | 19/50 [02:18<03:59,  7.73s/it] 40%|████      | 20/50 [02:20<02:57,  5.91s/it] 42%|████▏     | 21/50 [02:28<03:08,  6.52s/it] 44%|████▍     | 22/50 [02:40<03:49,  8.21s/it] 46%|███

In [39]:
best_depth_two_alphas =  best_d2_alphas(best_depth_one_alphas,25,10)

Initial parent selection of depth two started
Initial parent selection of depth two done starting with mutation and crossover of depth two
Depth two done


  0%|          | 0/50 [00:00<?, ?it/s]  2%|▏         | 1/50 [00:19<15:34, 19.07s/it]  4%|▍         | 2/50 [00:19<06:39,  8.32s/it]  6%|▌         | 3/50 [00:21<04:08,  5.28s/it]  8%|▊         | 4/50 [00:39<07:46, 10.15s/it] 10%|█         | 5/50 [00:43<05:56,  7.93s/it] 12%|█▏        | 6/50 [00:54<06:33,  8.95s/it] 14%|█▍        | 7/50 [00:58<05:28,  7.64s/it] 16%|█▌        | 8/50 [01:08<05:50,  8.34s/it] 18%|█▊        | 9/50 [01:20<06:29,  9.49s/it] 20%|██        | 10/50 [01:27<05:47,  8.69s/it] 22%|██▏       | 11/50 [01:33<05:08,  7.90s/it] 24%|██▍       | 12/50 [01:40<04:41,  7.40s/it] 26%|██▌       | 13/50 [01:49<04:55,  7.98s/it] 28%|██▊       | 14/50 [01:58<05:01,  8.37s/it] 30%|███       | 15/50 [01:59<03:31,  6.04s/it] 32%|███▏      | 16/50 [02:11<04:25,  7.82s/it] 34%|███▍      | 17/50 [02:20<04:35,  8.35s/it] 36%|███▌      | 18/50 [02:25<03:51,  7.23s/it] 38%|███▊      | 19/50 [02:30<03:21,  6.51s/it] 40%|████      | 20/50 [02:40<03:49,  7.66s/it] 42%|████

In [45]:
best_depth_three_alphas = best_d3_alpha(best_depth_two_alphas,25,10)

Initial parent selection of depth three started
Initial parent selection of depth three done starting with mutation and crossover of depth three
Depth three done


  0%|          | 0/48 [00:00<?, ?it/s]  2%|▏         | 1/48 [00:25<20:04, 25.62s/it]  4%|▍         | 2/48 [00:27<09:02, 11.79s/it]  6%|▋         | 3/48 [00:29<05:33,  7.42s/it]  8%|▊         | 4/48 [00:46<08:07, 11.08s/it] 10%|█         | 5/48 [00:47<05:21,  7.47s/it] 12%|█▎        | 6/48 [00:54<05:04,  7.25s/it] 15%|█▍        | 7/48 [01:04<05:27,  8.00s/it] 17%|█▋        | 8/48 [01:08<04:35,  6.88s/it] 19%|█▉        | 9/48 [01:25<06:34, 10.11s/it] 21%|██        | 10/48 [01:30<05:18,  8.39s/it] 23%|██▎       | 11/48 [01:36<04:49,  7.83s/it] 25%|██▌       | 12/48 [01:44<04:41,  7.83s/it] 27%|██▋       | 13/48 [01:54<04:51,  8.33s/it] 29%|██▉       | 14/48 [02:08<05:49, 10.29s/it] 31%|███▏      | 15/48 [02:13<04:45,  8.65s/it] 33%|███▎      | 16/48 [02:17<03:53,  7.30s/it] 35%|███▌      | 17/48 [02:35<05:17, 10.23s/it] 38%|███▊      | 18/48 [02:47<05:28, 10.95s/it] 40%|███▉      | 19/48 [02:56<04:58, 10.29s/it] 42%|████▏     | 20/48 [03:02<04:09,  8.90s/it] 44%|████

In [46]:
best_depth_one_alphas

['divide(vwap,close)',
 'subtract(open,vwap)',
 'subtract(low,close)',
 'divide(open,vwap)',
 'subtract(open,high)',
 'multiply(open,adv20)',
 'multiply(adv20,vwap)',
 'multiply(close,adv20)',
 'subtract(low,vwap)',
 'divide(adv20,open)',
 'divide(vwap,dividend)',
 'divide(high,dividend)',
 'divide(low,dividend)',
 'divide(close,dividend)',
 'multiply(cap,cap)',
 'divide(open,dividend)',
 'subtract(low,high)',
 'multiply(cap,adv20)',
 'divide(low,high)',
 'multiply(vwap,cap)',
 'multiply(cap,vwap)',
 'multiply(open,volume)',
 'multiply(open,cap)',
 'multiply(low,cap)',
 'multiply(cap,low)']

In [47]:
best_depth_two_alphas

['ts_product(multiply(cap,adv20),240)',
 'ts_product(multiply(open,adv20),240)',
 'ts_product(multiply(adv20,vwap),40)',
 'ts_median(divide(vwap,dividend),120)',
 'ts_median(divide(open,dividend),120)',
 'ts_std_dev(divide(adv20,open),20)',
 'ts_delta(divide(vwap,close),240)',
 'ts_zscore(divide(vwap,close),60)',
 'ts_mean(subtract(open,vwap),20)',
 'ts_backfill(divide(vwap,close),240)',
 'ts_arg_min(multiply(cap,low),240)',
 'ts_arg_min(multiply(vwap,cap),240)',
 'ts_mean(divide(vwap,close),20)',
 'ts_product(multiply(adv20,vwap),120)',
 'ts_ir(subtract(open,vwap),20)',
 'ts_product(divide(vwap,close),20)',
 'ts_zscore(divide(adv20,open),60)',
 'ts_median(multiply(adv20,vwap),120)',
 'ts_product(divide(open,vwap),20)',
 'ts_mean(multiply(open,adv20),120)',
 'ts_mean(multiply(adv20,vwap),120)',
 'ts_median(multiply(open,adv20),240)',
 'ts_median(multiply(adv20,vwap),240)',
 'ts_delta(subtract(low,close),60)',
 'ts_rank(divide(vwap,close),20)']

In [48]:
best_depth_three_alphas

['ts_rank(ts_product(multiply(cap,adv20),240),60)',
 'ts_rank(ts_product(multiply(cap,adv20),240),120)',
 'ts_backfill(ts_product(multiply(open,adv20),240),120)',
 'ts_rank(ts_product(multiply(cap,adv20),240),40)',
 'ts_rank(ts_product(multiply(open,adv20),240),40)',
 'ts_median(ts_rank(divide(vwap,close),20),60)',
 'ts_backfill(ts_product(multiply(adv20,vwap),40),120)',
 'ts_backfill(ts_std_dev(divide(adv20,open),20),240)',
 'ts_zscore(ts_product(divide(vwap,close),20),240)',
 'ts_ir(ts_product(multiply(adv20,vwap),120),40)',
 'ts_mean(ts_rank(divide(vwap,close),20),240)',
 'ts_median(ts_median(divide(vwap,dividend),120),20)',
 'ts_mean(ts_median(divide(open,dividend),120),20)',
 'ts_zscore(ts_product(divide(vwap,close),20),20)',
 'ts_mean(ts_product(multiply(adv20,vwap),40),40)',
 'ts_delta(ts_mean(divide(vwap,close),20),40)',
 'ts_median(ts_arg_min(multiply(cap,low),240),60)',
 'ts_median(ts_rank(divide(vwap,close),20),240)',
 'ts_mean(ts_arg_min(multiply(cap,low),240),240)',
 'ts_z