In [None]:
%load_ext autoreload

In [None]:
%autoreload 2

In [None]:
from TSFEA import *

In [None]:
tsfea = TSFEA()

In [None]:
PATH_TO_EARNINGS = "enter_path_to_estimates"
PATH_TO_ESTIMATES = "enter_path_to_estimates"
PATH_TO_RETURNS = "enter_path_to_returns"

# Get Earnings Number to Homogenize Dates across Tickers

In [None]:
homogenized_earnings = tsfea.homogenize_earnings_dates(PATH_TO_EARNINGS)
homogenized_earnings.head()

# Reconcile Estimates Dates

In [None]:
reconciled_estimates = tsfea.reconcile_estimates_with_earnings(
    PATH_TO_ESTIMATES,
    homogenized_earnings
)

In [None]:
reconciled_estimates.head()

In [None]:
reconciled_estimates.loc["AAPL"]

# Reconcile Returns

In [None]:
reconciled_returns = tsfea.reconcile_returns_with_earnings(
    PATH_TO_RETURNS,
    homogenized_earnings
)

In [None]:
reconciled_returns.head()

# Feature Extraction

In [None]:
medest_features = tsfea.extract_features_from_column(
    reconciled_estimates,
    'MEDEST'
)

In [None]:
medest_features.head()

In [None]:
medest_features.loc['AAPL']

In [None]:
return_features = tsfea.extract_features_from_column(
    reconciled_returns,
    'returns'
)

In [None]:
return_features.head()

# Prediction Test

### Add Models

In [None]:
tsfea.models = {
    'XGBClf': XGBClassifier(eval_metric='error')
}

tsfea.hyparam_space = {
    'XGBClf': {
        'max_depth': scope.int(hp.quniform("max_depth", 3, 18, 1)),
        'gamma': hp.uniform ('gamma', 0, 9),
        'reg_alpha' : hp.uniform('reg_alpha', 0, 1),
        'reg_lambda' : hp.uniform('reg_lambda', 0, 1),
        'colsample_bytree' : hp.uniform('colsample_bytree', 0.5, 1),
        'min_child_weight' : scope.int(hp.quniform('min_child_weight', 0, 10, 1)),
        'n_estimators': scope.int(hp.quniform('n_estimators', 100, 1000, 1))
    }
}

for more details on defining hyperparameter spaces, visit http://hyperopt.github.io/hyperopt/getting-started/search_spaces/, or google hyperparameter tuning [ml model] hyperopt.

In [None]:
target_earnings = homogenized_earnings.droplevel(
    1
).set_index(
    "ERNUM",
    append=True
).sort_index()

target_earnings["returns"] = (target_earnings.PostPRC / target_earnings.PrePRC - 1)
target_earnings["TARGET"] = pd.to_numeric(target_earnings.returns >= 0)

target_earnings = pd.DataFrame(
        target_earnings.loc[
        :,
        ["TARGET", "returns"]
    ]
)

In [None]:
df = target_earnings.join(
    return_features, 
    how='inner'
).join(
    medest_features,
    how='inner'
).groupby(
    level=[0, 1]
).fillna(
    method='ffill'
).fillna(
    0
)
df.head()

In [None]:
i, j = 0, 5
ret = []
while j < df.index.get_level_values(1).max():
    (
        X_train, 
        X_test, 
        y_train, 
        y_test
    ) = (
        df.loc[(slice(None), slice(i, j)), :].drop(columns=["TARGET", "returns"]),
        df.loc[(slice(None), j+1), :].drop(columns=["TARGET", "returns"]),
        df.loc[(slice(None), slice(i, j)), "TARGET"],
        df.loc[(slice(None), j+1), "TARGET"],
    )

    y_pred = tsfea.tune_train_predict(
        X_train,
        X_test,
        y_train,
        y_test,
        'XGBClf',
        5,
        20
    )

    temp = pd.Series(y_pred, index=y_test.index).replace(0, -1).mul(df.loc[(slice(None), j+1), "returns"]).groupby(level=1).sum()
    print(temp)
    ret.append(temp)

    i += 1
    j += 1

In [None]:
all_ret_rolling = pd.concat(ret)
all_ret_rolling.describe()

In [None]:
all_ret_rolling.plot()

In [None]:
all_ret_expanding = pd.concat(ret[:-1])
all_ret_expanding.describe()

In [None]:
all_ret_expanding.plot()

# Toy Prediction Test

In [None]:
from sklearn.datasets import load_breast_cancer

In [None]:
data = load_breast_cancer()
X, y = data.data, data.target

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [None]:
y_pred = tsfea.tune_train_predict(
    X_train,
    X_test,
    y_train,
    y_test,
    'XGBClf',
    15,
    50
)