In [2]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from soed import SOEDClassifier
from sklearn.metrics import precision_score, recall_score, roc_auc_score, accuracy_score

In [8]:
feature_df = pd.read_pickle('stock_feature_df.pkl')
feature_df['binary_sell'] = np.where(feature_df.alpaca_m1_label_480__share_holder_label=='Sell',1,0)
feature_df['binary_buy'] = np.where(feature_df.alpaca_m1_label_480__cash_holder_label=='Buy',1,0)

predictors = ['alpaca_m1_slope_15__slope',
              'alpaca_m1_slope_60__slope',
              'alpaca_m1_slope_120__slope',
              'alpaca_m1_slope_240__slope']

feature_df.timeStamp = pd.to_datetime(feature_df.timeStamp)

BM = feature_df.timeStamp < pd.to_datetime('2024-01-01T00:00:00.000Z')

train_df = feature_df[BM]
test_df = feature_df[~BM]

X = train_df[predictors]
y = train_df.binary_sell
c = train_df.alpaca_m1_label_480__change_percent

c2 = np.zeros([len(c),2])
c2[:,0] = np.where(c<0,-c,0.0)
c2[:,1] = np.where(c>0,c,0.0)

In [16]:
feature_df.groupby('binary_sell').alpaca_m1_slope_240__slope.mean()

binary_sell
0    0.000445
1    0.000239
Name: alpaca_m1_slope_240__slope, dtype: float64

In [17]:
feature_df.groupby('binary_sell').alpaca_m1_slope_120__slope.mean()

binary_sell
0    0.000736
1    0.000415
Name: alpaca_m1_slope_120__slope, dtype: float64

In [18]:
feature_df.groupby('binary_sell').alpaca_m1_slope_60__slope.mean()

binary_sell
0    0.001220
1    0.000573
Name: alpaca_m1_slope_60__slope, dtype: float64

In [19]:
feature_df.groupby('binary_sell').alpaca_m1_slope_15__slope.mean()

binary_sell
0    0.003172
1    0.001494
Name: alpaca_m1_slope_15__slope, dtype: float64

In [9]:
soed = SOEDClassifier(som_x=15,som_y=15,som_input_len=X.shape[1])
soed.fit(X.values,y,c2)


X_test = test_df[predictors]
y_test = test_df.binary_sell

y_decide = soed.decide(X_test)
y_util = soed.decide_util(X_test)

y_predict = soed.predict(X_test)
y_prob = soed.predict_proba(X_test)

recall = recall_score(y_test,y_predict)
precision = precision_score(y_test,y_predict)
accuracy = accuracy_score(y_test,y_predict)
auc = roc_auc_score(y_test,y_prob[:,1])

performance = {'recall':recall,'precision':precision,'accuracy':accuracy,'auc':auc}
print(performance)

recall = recall_score(y_test,y_decide)
precision = precision_score(y_test,y_decide)
accuracy = accuracy_score(y_test,y_decide)
auc = roc_auc_score(y_test,y_util[:,1])

performance = {'recall':recall,'precision':precision,'accuracy':accuracy,'auc':auc}
print(performance)

version 1.0.8
Model training complete.
{'recall': 0.2171553090332805, 'precision': 0.3100639248741302, 'accuracy': 0.6151622809658578, 'auc': np.float64(0.5034839397652422)}
{'recall': 0.4231378763866878, 'precision': 0.3054133661242815, 'accuracy': 0.5321370506412958, 'auc': np.float64(0.503038058789955)}


In [10]:
report_df = pd.DataFrame(index = [f'repeat{i}' for i in range(1,2)],
                         columns = ['DT','MLP','SOED'] )
report_df

Unnamed: 0,DT,MLP,SOED
repeat1,,,


In [11]:
for loop_i in range(1,2):

    #soed
    soed = SOEDClassifier(som_x=20,som_y=20,som_sigma=7)
    soed.fit(X.values,y)
    y_proba = soed.predict_proba(X_test.values)
    auc = roc_auc_score(y_test,y_proba[:,1])
    report_df.loc[f'repeat{loop_i}','SOED'] = auc

    #dt
    dt = DecisionTreeClassifier(max_depth=10)
    dt.fit(X.values,y)
    y_proba = dt.predict_proba(X_test.values)
    auc = roc_auc_score(y_test,y_proba[:,1])
    report_df.loc[f'repeat{loop_i}','DT'] = auc

    #mlp
    mlp = MLPClassifier(max_iter=10000)
    mlp.fit(X.values,y)
    y_proba = mlp.predict_proba(X_test.values)
    auc = roc_auc_score(y_test,y_proba[:,1])
    report_df.loc[f'repeat{loop_i}','MLP'] = auc

report_df.loc['Average'] = report_df.mean()

version 1.0.8
Model training complete.


In [12]:
report_df

Unnamed: 0,DT,MLP,SOED
repeat1,0.505921,0.506796,0.503702
Average,0.505921,0.506796,0.503702
