In [3]:
import os
import time
import datetime
import math
from tqdm import tqdm
sep = "-*-*-"

os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # disable GPU

import pandas as pd
pd.set_option('display.max_rows', 5000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import pandas_datareader.data as web
import numpy as np

# visuals.
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.figure_factory as ff
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# processing / validation
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# keras/tf
# %tensorflow_version 2.x
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.callbacks import EarlyStopping
print("tensorflow version: ", tf.__version__)

# models
from sklearn import svm
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import tree
from sklearn.ensemble import VotingRegressor

# metrics
from sklearn.metrics import mean_squared_error, r2_score # reg metrics
from sklearn.metrics import classification_report, roc_auc_score, roc_auc_score,\
accuracy_score, precision_score, average_precision_score, balanced_accuracy_score,\
precision_recall_fscore_support# clf metrics

# constant seed for reproducibility
SEED = 111 
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# cpu workers
WORKERS = 6 

  from pandas.util.testing import assert_frame_equal


tensorflow version:  2.0.0


In [23]:
def fetch_data_since(tickers, days, years):

    df_raw = pd.DataFrame() 
    attempt = 0
    drop = []
    while len(tickers) != 0 and attempt <= 5:
        tickers = [j for j in tickers if j not in drop] 
        for i in range(len(tickers)):
            try:
                temp = web.get_data_yahoo(tickers[i],
                                          datetime.date.today() - datetime.timedelta(days * years), # since delta
                                          datetime.date.today()) # until today 

                temp.dropna(inplace = True)
                df_raw[tickers[i]] = temp["Adj Close"]
                drop.append(tickers[i])       
            except:
                print(tickers[i]," :failed to fetch data...retrying")
                continue
        attempt+=1
       
    # missing values
    print("Missing Values:")
    print(df_raw.isnull().sum())
    df = df_raw.dropna()

    return df

###################################################################################################################

def fetch_data_from_to(tickers, _from, to):

    df_raw = pd.DataFrame() 
    attempt = 0
    drop = []
    while len(tickers) != 0 and attempt <= 5:
        tickers = [j for j in tickers if j not in drop] 
        for i in range(len(tickers)):
            try:
                temp = web.get_data_yahoo(tickers[i], _from, to ) # specified range

                temp.dropna(inplace = True)
                df_raw[tickers[i]] = temp["Adj Close"]
                drop.append(tickers[i])       
            except:
                print(tickers[i]," :failed to fetch data...retrying")
                continue
        attempt+=1
       
    # missing values
    print("Missing Values:")
    print(df_raw.isnull().sum())
    df = df_raw.dropna()

    return df

###################################################################################################################

def process_clf_data(target, seq_len, period, features_type, df):
    print(f"\n{sep*10}\nClassification(!) Processing\n{sep*10}")
    df_pct = pd.DataFrame() # blank dataframe


    if features_type=="since": # changes since previous days
        for col in df.columns:
            for i in range(1,seq_len+1):
                df_pct[f"{col}_snc_[t-{i}]"] = df[col].pct_change(i)
        df_pct.dropna(inplace=True)
    
    elif features_type=="shifted": # shifted changes of previous days
        for col in df.columns:
#             if col != target: # without target's pct_change
            df_pct[col] = df[col].pct_change(1)
        df_pct.dropna(inplace=True)
        
        # shifted previous
        for col in df_pct.columns:
            for i in range(1,seq_len+1):
                df_pct[f"{col}_sht_[t-{i}]"] = df_pct[col].shift(i)
        df_pct.dropna(inplace=True)
    else:
        raise ValueError("features_type can be either 'since' or 'shifted'.")


    df_pct[f"{target}_price_[t]"] = df[target] # price [t]

    # labeling
    df_pct[f"{target}_Future"] = df[target].shift(-period) # future price [t + perid]
    
    warnings = 0
    def classify(x):
        if x[f"{target}_Future"] >= x[f"{target}_price_[t]"]:
            return 1
        elif x[f"{target}_Future"] < x[f"{target}_price_[t]"]:
            return 0
        else:
            nonlocal warnings
            warnings += 1
            return None
    
    df_pct[f"{target}_Future"] = df_pct.apply(classify, axis=1) # classify


    if warnings > 1:
        raise ValueError("More than 1 NaN in classifying.")
        
    
    df_pct.dropna(inplace=True)
    if df_pct.isnull().any().any():
        raise ValueError("null values exist")
        
    return df_pct

###################################################################################################################

def process_reg_data(target, seq_len, period, features_type, df):
    print(f"\n{sep*10}\nRegression Processing\n{sep*10}")
    df_pct = pd.DataFrame() # blank dataframe


    if features_type=="since": # changes since previous days
        for col in df.columns:
            for i in range(1,seq_len+1):
                df_pct[f"{col}_snc_[t-{i}]"] = df[col].pct_change(i)
        df_pct.dropna(inplace=True)
    
    elif features_type=="shifted": # shifted changes of previous days
        for col in df.columns:
#             if col != target: # without target's pct_change
            df_pct[col] = df[col].pct_change(1)
        df_pct.dropna(inplace=True)
        
        # shifted previous
        for col in df_pct.columns:
            for i in range(1,seq_len+1):
                df_pct[f"{col}_sht_[t-{i}]"] = df_pct[col].shift(i)
        df_pct.dropna(inplace=True)
    else:
        raise ValueError("features_type can be either 'since' or 'shifted'.")


    df_pct[f"{target}_price_[t]"] = df[target] # target's price

    # labeling
    df_pct[f"{target}_Future"] = df[target].shift(-period)

    df_pct.dropna(inplace=True)
    if df_pct.isnull().any().any():
        raise ValueError("null values exist")
        
    return df_pct

###################################################################################################################

def split_data(forward_test, scaling, split_size, proc_data):
    
    # train/test & faeture/label split
    if forward_test==True:
    #     forward test (recommended)
        nth_prcntile = int(len(proc_data)*split_size)
        test_df = proc_data.iloc[nth_prcntile:,:]
        
        train_df = proc_data.drop(test_df.index)
        train_df = train_df.sample(frac=1, random_state=SEED) # shuffle train dataset

        # features
        X_train = train_df.drop(f"{TARGET}_Future", axis=1).values
        X_test = test_df.drop(f"{TARGET}_Future", axis=1).values

        # labels
        y_train = train_df[f"{TARGET}_Future"].values
        y_test = test_df[f"{TARGET}_Future"].values
        
    elif forward_test==False:
        proc_data = proc_data.sample(frac=(1), random_state=SEED) # shuffle all data
        test_df = proc_data.sample(frac=(1-split_size), random_state=SEED) # sample test dataset
        train_df = proc_data.drop(test_df.index)

        # features
        X_train = train_df.drop(f"{TARGET}_Future", axis=1).values
        X_test = test_df.drop(f"{TARGET}_Future", axis=1).values

        # labels
        y_train = train_df[f"{TARGET}_Future"].values
        y_test = test_df[f"{TARGET}_Future"].values
        
    else:
        raise ValueError("forward_test must be boolean.")


    # scaling
    if scaling=="minmax":
        scaler = MinMaxScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        
        x_test_df = pd.DataFrame(X_test, columns=proc_data.drop(f"{TARGET}_Future", axis=1).columns, index=test_df.index)
        y_test_df = pd.DataFrame(y_test, columns=[f"{TARGET}_Future"], index=test_df.index)
        test_df = pd.concat([x_test_df, y_test_df], axis=1)
        
    elif scaling=="standard":
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        
        x_test_df = pd.DataFrame(X_test, columns=proc_data.drop(f"{TARGET}_Future", axis=1).columns, index=test_df.index)
        y_test_df = pd.DataFrame(y_test, columns=[f"{TARGET}_Future"], index=test_df.index)
        test_df = pd.concat([x_test_df, y_test_df], axis=1)
        
    elif scaling=="none":
        pass
    
    else:
        raise ValueError("scaling can be either 'minmax', 'standard' or 'none'.")
    
    
    return X_train, X_test, y_train, y_test, test_df

###################################################################################################################

# tf neural
def fit_ann(PATIENCE=32, epochs=1000, nodes=100, hls=9):
    dnnReg = Sequential()
    
    for l in range(hls):
        dnnReg.add(Dense(nodes, activation="relu"))
#         dnnReg.add(Dropout(0.2))
    dnnReg.add(Dense(1))

    dnnReg.compile(optimizer="adam", loss="mse")

    early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=PATIENCE)

    dnnReg.fit(x=X_train, y=y_train, epochs=epochs, validation_data=(X_test,y_test), use_multiprocessing=True, workers=WORKERS, callbacks=[early_stop])
    return dnnReg

###################################################################################################################

def predict_sample(features_serie, model):
    features = np.array(features_serie).reshape(1,-1)
    pred = model.predict(features)[0]
    return pred

###################################################################################################################

def open_pos(qnt, open_price, pos_type):
    pos = {"qnt":qnt, "open_price":open_price, "cost":qnt * open_price, "type":pos_type}
    return pos

def get_profit(pos, close_price):
    profit = (pos["qnt"] * close_price) - (pos["qnt"] * pos["open_price"]) 
    if pos["type"] == "SHORT":
        profit = -profit
    return profit
    
def return_profit(x):
    global BALANCE
    global pos
    
    DATE = x.name.date()
    PRICE = x["todays_price"]
    
    # close position
    if pos:
        profit = get_profit(pos, close_price=PRICE)
        BALANCE += pos["cost"] + profit # remaining
                
#         if pos["type"] == "SHORT":
#             print(f"Closed {pos['qnt']} Short at {round(PRICE,6)} on {DATE} | -> {profit}")
            
#         if pos["type"] == "LONG":
#             print(f"Closed {pos['qnt']} Long at {round(PRICE,6)} on {DATE} | -> {profit}")
        
        pos = None # remove position
        
    # LONG    
    if x["pred_signal"] == 1 and not pos and BALANCE > PRICE:
        qnt = math.floor(BALANCE / PRICE)
        pos = open_pos(qnt=qnt, open_price=PRICE, pos_type="LONG")
        BALANCE -= pos["cost"]
#         print(f"LONG {pos['qnt']} at {round(PRICE,6)} on {DATE}")

 
    # SHORT
    if ALLOW_SHORTS:
        if x["pred_signal"] == 0 and not pos and BALANCE > PRICE:
            qnt = math.floor(BALANCE / PRICE)
            pos = open_pos(qnt=qnt, open_price=PRICE, pos_type="SHORT")
            BALANCE -= pos["cost"]
#             print(f"SHORT {pos['qnt']} at {round(PRICE,6)} on {DATE}")

    try:
        return profit
    except:
        return 0
    
def get_balance(x):
    global _balance
    global first_iter
    
    if first_iter:
        first_iter = False
        return _balance
    else:
        _balance += x["P&L"]
        return _balance

In [20]:
TICKERS = ["^DJI", "^GSPC", "MSFT", "AAPL", "AMZN", "GOOGL", "JPM", "JNJ", "V", "MA", "INTC"]
# TICKERS = ["MSFT", "AAPL", "AMZN"]

# DAYS = 365
# YEARS = 3
# DF = fetch_data_since(tickers=TICKERS, days=DAYS, years=YEARS)

FROM = '2015'
TO = '2020' # excld.
DF = fetch_data_from_to(tickers=TICKERS, _from=FROM, to=TO)

DF.tail()

Missing Values:
^DJI     0
^GSPC    0
MSFT     0
AAPL     0
AMZN     0
GOOGL    0
JPM      0
JNJ      0
V        0
MA       0
INTC     0
dtype: int64


Unnamed: 0_level_0,^DJI,^GSPC,MSFT,AAPL,AMZN,GOOGL,JPM,JNJ,V,MA,INTC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019-12-24,28515.449219,3223.379883,156.951309,283.596924,1789.209961,1344.430054,135.296478,145.005341,187.298752,296.624969,59.118862
2019-12-26,28621.390625,3239.909912,158.237793,289.223602,1868.77002,1362.469971,136.732239,144.905991,188.886444,298.340027,59.526852
2019-12-27,28645.259766,3240.02002,158.527008,289.113831,1869.800049,1354.640015,136.830582,144.826492,189.116104,299.87558,59.78558
2019-12-30,28462.140625,3221.290039,157.160736,290.829773,1846.890015,1339.709961,136.329041,144.379349,187.558365,296.864258,59.327831
2019-12-31,28538.439453,3230.780029,157.270432,292.954712,1847.839966,1339.390015,137.086258,144.945724,187.62825,297.731781,59.556702


In [21]:
# data prep.
TARGET="V"
SEQ_LEN=10 # previous data
PERIOD=1 # future data
FEATURES_TYPE="shifted"
FORWARD_TEST = True
SCALING = "none" # none / minmax / standard
SPLIT_SIZE = 0.5 # training size

processed_data = process_reg_data(target=TARGET, seq_len=SEQ_LEN, period=PERIOD, features_type=FEATURES_TYPE, df=DF)
X_train, X_test, y_train, y_test, test_df = split_data(forward_test=FORWARD_TEST, scaling=SCALING, split_size=SPLIT_SIZE, proc_data=processed_data)

print(f"processed shape: {processed_data.shape}", end=f"\n{sep}\n")
print(f"test_df shape: {test_df.shape}", end=f"\n{sep}\n")
print(f"features: train shape: {X_train.shape} | test Shape: {X_test.shape}", end=f"\n{sep}\n")
print(f"labels: train shape: {y_train.shape} | test Shape: {y_test.shape}", end=f"\n{sep}\n")
print(f"X_train Max/Min: {X_train.max()} / {X_train.min()}")
test_df.tail()


-*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*-
Regression Processing
-*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*-
processed shape: (1246, 123)
-*-*-
test_df shape: (623, 123)
-*-*-
features: train shape: (623, 122) | test Shape: (623, 122)
-*-*-
labels: train shape: (623,) | test Shape: (623,)
-*-*-
X_train Max/Min: 94.840576171875 / -0.09253331576226898


Unnamed: 0_level_0,^DJI,^GSPC,MSFT,AAPL,AMZN,GOOGL,JPM,JNJ,V,MA,INTC,^DJI_sht_[t-1],^DJI_sht_[t-2],^DJI_sht_[t-3],^DJI_sht_[t-4],^DJI_sht_[t-5],^DJI_sht_[t-6],^DJI_sht_[t-7],^DJI_sht_[t-8],^DJI_sht_[t-9],^DJI_sht_[t-10],^GSPC_sht_[t-1],^GSPC_sht_[t-2],^GSPC_sht_[t-3],^GSPC_sht_[t-4],^GSPC_sht_[t-5],^GSPC_sht_[t-6],^GSPC_sht_[t-7],^GSPC_sht_[t-8],^GSPC_sht_[t-9],^GSPC_sht_[t-10],MSFT_sht_[t-1],MSFT_sht_[t-2],MSFT_sht_[t-3],MSFT_sht_[t-4],MSFT_sht_[t-5],MSFT_sht_[t-6],MSFT_sht_[t-7],MSFT_sht_[t-8],MSFT_sht_[t-9],MSFT_sht_[t-10],AAPL_sht_[t-1],AAPL_sht_[t-2],AAPL_sht_[t-3],AAPL_sht_[t-4],AAPL_sht_[t-5],AAPL_sht_[t-6],AAPL_sht_[t-7],AAPL_sht_[t-8],AAPL_sht_[t-9],AAPL_sht_[t-10],AMZN_sht_[t-1],AMZN_sht_[t-2],AMZN_sht_[t-3],AMZN_sht_[t-4],AMZN_sht_[t-5],AMZN_sht_[t-6],AMZN_sht_[t-7],AMZN_sht_[t-8],AMZN_sht_[t-9],AMZN_sht_[t-10],GOOGL_sht_[t-1],GOOGL_sht_[t-2],GOOGL_sht_[t-3],GOOGL_sht_[t-4],GOOGL_sht_[t-5],GOOGL_sht_[t-6],GOOGL_sht_[t-7],GOOGL_sht_[t-8],GOOGL_sht_[t-9],GOOGL_sht_[t-10],JPM_sht_[t-1],JPM_sht_[t-2],JPM_sht_[t-3],JPM_sht_[t-4],JPM_sht_[t-5],JPM_sht_[t-6],JPM_sht_[t-7],JPM_sht_[t-8],JPM_sht_[t-9],JPM_sht_[t-10],JNJ_sht_[t-1],JNJ_sht_[t-2],JNJ_sht_[t-3],JNJ_sht_[t-4],JNJ_sht_[t-5],JNJ_sht_[t-6],JNJ_sht_[t-7],JNJ_sht_[t-8],JNJ_sht_[t-9],JNJ_sht_[t-10],V_sht_[t-1],V_sht_[t-2],V_sht_[t-3],V_sht_[t-4],V_sht_[t-5],V_sht_[t-6],V_sht_[t-7],V_sht_[t-8],V_sht_[t-9],V_sht_[t-10],MA_sht_[t-1],MA_sht_[t-2],MA_sht_[t-3],MA_sht_[t-4],MA_sht_[t-5],MA_sht_[t-6],MA_sht_[t-7],MA_sht_[t-8],MA_sht_[t-9],MA_sht_[t-10],INTC_sht_[t-1],INTC_sht_[t-2],INTC_sht_[t-3],INTC_sht_[t-4],INTC_sht_[t-5],INTC_sht_[t-6],INTC_sht_[t-7],INTC_sht_[t-8],INTC_sht_[t-9],INTC_sht_[t-10],V_price_[t],V_Future
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1
2019-12-23,0.003389,0.000866,0.0,0.016318,0.003638,-0.000437,-0.000291,0.002602,-0.004894,-0.004354,0.00475,0.002753,0.004876,-0.000986,0.001107,0.003572,0.000118,0.007909,0.001061,-0.000999,-0.003764,0.004945,0.004459,-0.000432,0.000335,0.007148,7.3e-05,0.008575,0.002908,-0.001097,-0.003163,0.010918,0.00868,-0.002069,-0.005401,0.006471,0.008418,0.010152,0.003772,-0.00152,-0.00257,-0.002071,0.001001,-0.002389,0.001965,0.017118,0.013593,0.002548,0.008529,0.005844,-0.014,-0.003225,0.004624,-0.003703,0.012124,0.004696,0.000347,0.006639,0.005468,-0.005887,-0.001193,-0.003848,0.003351,-0.002199,-0.00427,0.010268,-0.001201,0.003154,0.001013,-7.4e-05,0.002688,-0.000801,-0.004998,-0.001013,0.006116,0.003874,-0.008767,0.028618,-0.002379,0.00067,-0.004665,0.004885,0.015085,-0.002577,0.012483,0.0029,0.000283,0.002554,0.007072,-0.00363,0.000855,0.007827,0.00887,-0.003342,-0.003866,0.005942,0.013688,0.003461,-0.001372,-0.003608,0.004117,0.003495,0.009466,-0.004257,-0.003233,0.002938,0.013278,0.007447,0.004989,-0.001557,-0.004614,0.017081,0.013818,-0.002269,-0.006932,-0.001557,0.00417,0.008411,0.008482,0.001061,-0.004929,186.809448,187.298752
2019-12-24,-0.001264,-0.000195,-0.000191,0.000951,-0.002114,-0.00459,0.00277,-0.003483,0.002619,0.000706,0.003039,0.003389,0.002753,0.004876,-0.000986,0.001107,0.003572,0.000118,0.007909,0.001061,-0.000999,0.000866,0.004945,0.004459,-0.000432,0.000335,0.007148,7.3e-05,0.008575,0.002908,-0.001097,0.0,0.010918,0.00868,-0.002069,-0.005401,0.006471,0.008418,0.010152,0.003772,-0.00152,0.016318,-0.002071,0.001001,-0.002389,0.001965,0.017118,0.013593,0.002548,0.008529,0.005844,0.003638,-0.003225,0.004624,-0.003703,0.012124,0.004696,0.000347,0.006639,0.005468,-0.005887,-0.000437,-0.003848,0.003351,-0.002199,-0.00427,0.010268,-0.001201,0.003154,0.001013,-7.4e-05,-0.000291,-0.000801,-0.004998,-0.001013,0.006116,0.003874,-0.008767,0.028618,-0.002379,0.00067,0.002602,0.004885,0.015085,-0.002577,0.012483,0.0029,0.000283,0.002554,0.007072,-0.00363,-0.004894,0.007827,0.00887,-0.003342,-0.003866,0.005942,0.013688,0.003461,-0.001372,-0.003608,-0.004354,0.003495,0.009466,-0.004257,-0.003233,0.002938,0.013278,0.007447,0.004989,-0.001557,0.00475,0.017081,0.013818,-0.002269,-0.006932,-0.001557,0.00417,0.008411,0.008482,0.001061,187.298752,188.886444
2019-12-26,0.003715,0.005128,0.008197,0.01984,0.044467,0.013418,0.010612,-0.000685,0.008477,0.005782,0.006901,-0.001264,0.003389,0.002753,0.004876,-0.000986,0.001107,0.003572,0.000118,0.007909,0.001061,-0.000195,0.000866,0.004945,0.004459,-0.000432,0.000335,0.007148,7.3e-05,0.008575,0.002908,-0.000191,0.0,0.010918,0.00868,-0.002069,-0.005401,0.006471,0.008418,0.010152,0.003772,0.000951,0.016318,-0.002071,0.001001,-0.002389,0.001965,0.017118,0.013593,0.002548,0.008529,-0.002114,0.003638,-0.003225,0.004624,-0.003703,0.012124,0.004696,0.000347,0.006639,0.005468,-0.00459,-0.000437,-0.003848,0.003351,-0.002199,-0.00427,0.010268,-0.001201,0.003154,0.001013,0.00277,-0.000291,-0.000801,-0.004998,-0.001013,0.006116,0.003874,-0.008767,0.028618,-0.002379,-0.003483,0.002602,0.004885,0.015085,-0.002577,0.012483,0.0029,0.000283,0.002554,0.007072,0.002619,-0.004894,0.007827,0.00887,-0.003342,-0.003866,0.005942,0.013688,0.003461,-0.001372,0.000706,-0.004354,0.003495,0.009466,-0.004257,-0.003233,0.002938,0.013278,0.007447,0.004989,0.003039,0.00475,0.017081,0.013818,-0.002269,-0.006932,-0.001557,0.00417,0.008411,0.008482,188.886444,189.116104
2019-12-27,0.000834,3.4e-05,0.001828,-0.00038,0.000551,-0.005747,0.000719,-0.000549,0.001216,0.005147,0.004346,0.003715,-0.001264,0.003389,0.002753,0.004876,-0.000986,0.001107,0.003572,0.000118,0.007909,0.005128,-0.000195,0.000866,0.004945,0.004459,-0.000432,0.000335,0.007148,7.3e-05,0.008575,0.008197,-0.000191,0.0,0.010918,0.00868,-0.002069,-0.005401,0.006471,0.008418,0.010152,0.01984,0.000951,0.016318,-0.002071,0.001001,-0.002389,0.001965,0.017118,0.013593,0.002548,0.044467,-0.002114,0.003638,-0.003225,0.004624,-0.003703,0.012124,0.004696,0.000347,0.006639,0.013418,-0.00459,-0.000437,-0.003848,0.003351,-0.002199,-0.00427,0.010268,-0.001201,0.003154,0.010612,0.00277,-0.000291,-0.000801,-0.004998,-0.001013,0.006116,0.003874,-0.008767,0.028618,-0.000685,-0.003483,0.002602,0.004885,0.015085,-0.002577,0.012483,0.0029,0.000283,0.002554,0.008477,0.002619,-0.004894,0.007827,0.00887,-0.003342,-0.003866,0.005942,0.013688,0.003461,0.005782,0.000706,-0.004354,0.003495,0.009466,-0.004257,-0.003233,0.002938,0.013278,0.007447,0.006901,0.003039,0.00475,0.017081,0.013818,-0.002269,-0.006932,-0.001557,0.00417,0.008411,189.116104,187.558365
2019-12-30,-0.006393,-0.005781,-0.008619,0.005935,-0.012253,-0.011021,-0.003665,-0.003087,-0.008237,-0.010042,-0.007657,0.000834,0.003715,-0.001264,0.003389,0.002753,0.004876,-0.000986,0.001107,0.003572,0.000118,3.4e-05,0.005128,-0.000195,0.000866,0.004945,0.004459,-0.000432,0.000335,0.007148,7.3e-05,0.001828,0.008197,-0.000191,0.0,0.010918,0.00868,-0.002069,-0.005401,0.006471,0.008418,-0.00038,0.01984,0.000951,0.016318,-0.002071,0.001001,-0.002389,0.001965,0.017118,0.013593,0.000551,0.044467,-0.002114,0.003638,-0.003225,0.004624,-0.003703,0.012124,0.004696,0.000347,-0.005747,0.013418,-0.00459,-0.000437,-0.003848,0.003351,-0.002199,-0.00427,0.010268,-0.001201,0.000719,0.010612,0.00277,-0.000291,-0.000801,-0.004998,-0.001013,0.006116,0.003874,-0.008767,-0.000549,-0.000685,-0.003483,0.002602,0.004885,0.015085,-0.002577,0.012483,0.0029,0.000283,0.001216,0.008477,0.002619,-0.004894,0.007827,0.00887,-0.003342,-0.003866,0.005942,0.013688,0.005147,0.005782,0.000706,-0.004354,0.003495,0.009466,-0.004257,-0.003233,0.002938,0.013278,0.004346,0.006901,0.003039,0.00475,0.017081,0.013818,-0.002269,-0.006932,-0.001557,0.00417,187.558365,187.62825


In [24]:
# models
models = {
#     'svmReg':svm.SVR(kernel='linear').fit(X_train, y_train),
#     'gboost':GradientBoostingRegressor(loss='ls', random_state=SEED).fit(X_train, y_train),
#     'dtReg':tree.DecisionTreeRegressor(random_state=SEED).fit(X_train, y_train),
    'dnn':fit_ann()
}
# ensReg = VotingRegressor(estimators=models.items()).fit(X_train, y_train)
# models.update({'ensReg':ensReg})

Train on 623 samples, validate on 623 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 00037: early stopping


In [25]:
# pred. table
pred_table = pd.DataFrame()
for m in models.items():
    pred_table[f"future_pred_{m[0]}"] = test_df.drop(f"{TARGET}_Future",axis=1).apply(predict_sample, args=(m[1],), axis=1) # predict rows

pred_table = pd.concat([pred_table, processed_data[[f"{TARGET}_price_[t]", f"{TARGET}_Future"]]], axis=1) # concat today's & future's actuals
new_names = list(models.keys())
new_names.extend(["todays_price", "future_actual"])
pred_table.columns = new_names # rename
pred_table.dropna(inplace=True)
pred_table["actual_signal"] = pred_table.apply(lambda x: 1 if x["future_actual"] >= x["todays_price"] else 0, axis=1) # actual signal

for m1 in models.keys():
    pred_table[f"{m1}_pred_signal"] = pred_table.apply(lambda x: 1  if x[m1] >= x["todays_price"] else 0, axis=1) # strategy signal

pred_table.tail()

Unnamed: 0_level_0,dnn,todays_price,future_actual,actual_signal,dnn_pred_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-12-23,[186.21057],186.809448,187.298752,1,0
2019-12-24,[186.67055],187.298752,188.886444,1,0
2019-12-26,[188.29851],188.886444,189.116104,1,0
2019-12-27,[188.48889],189.116104,187.558365,0,0
2019-12-30,[186.93298],187.558365,187.62825,1,0


In [26]:
# evaluation
for i in pred_table.columns:
    if i.endswith("_pred_signal"):
        print(i)
        print(classification_report(pred_table["actual_signal"], pred_table[i]))

dnn_pred_signal
              precision    recall  f1-score   support

           0       0.41      0.95      0.57       257
           1       0.50      0.04      0.07       366

    accuracy                           0.41       623
   macro avg       0.45      0.49      0.32       623
weighted avg       0.46      0.41      0.27       623



In [27]:
PREDICTOR = "dnn"
trade_df = pred_table[[PREDICTOR, "todays_price", "future_actual", "actual_signal", f"{PREDICTOR}_pred_signal"]]
trade_df.columns = ["pred_signal" if n.endswith("pred_signal") else n for n in trade_df.columns] # rename last col.

pos = None
ALLOW_SHORTS = True
BALANCE = 10000.00
_balance = BALANCE
first_iter = True

trade_df["P&L"] = trade_df.apply(return_profit, axis=1)
trade_df["Cumulative_Profit"] = trade_df["P&L"].cumsum()
trade_df['Balance'] = trade_df.apply(get_balance, axis=1)
trade_df.tail()

Unnamed: 0_level_0,dnn,todays_price,future_actual,actual_signal,pred_signal,P&L,Cumulative_Profit,Balance
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-12-23,[186.21057],186.809448,187.298752,1,0,24.804108,-4855.772057,5144.227943
2019-12-24,[186.67055],187.298752,188.886444,1,0,-13.211197,-4868.983253,5131.016747
2019-12-26,[188.29851],188.886444,189.116104,1,0,-42.867691,-4911.850945,5088.149055
2019-12-27,[188.48889],189.116104,187.558365,0,0,-5.971161,-4917.822105,5082.177895
2019-12-30,[186.93298],187.558365,187.62825,1,0,40.501221,-4877.320885,5122.679115


In [28]:
trade_df["pred_signal"].value_counts()

0    597
1     26
Name: pred_signal, dtype: int64

In [None]:
# # grid search for best seq/per
# # seq 5, per 1
# results = []
# for seq in tqdm(range(1,2)):
#     for per in range(1,2):
#         TARGET="AAPL"
#         SEQ_LEN=seq # previous data
#         PERIOD=per # future data
#         FEATURES_TYPE="since"
#         FORWARD_TEST = True
#         SCALING = "none" # none / minmax / standard
#         SPLIT_SIZE = 0.5 # training size

#         processed_data = process_reg_data(target=TARGET, seq_len=SEQ_LEN, period=PERIOD, features_type=FEATURES_TYPE, df=DF)
#         X_train, X_test, y_train, y_test, test_df = split_data(forward_test=FORWARD_TEST, scaling=SCALING, split_size=SPLIT_SIZE, proc_data=processed_data)

#         # model
#         svmReg = svm.SVR(kernel='linear').fit(X_train, y_train)

#         # evaluation
#         eval_df = pd.DataFrame()
#         eval_df[f"future_{TARGET}_prediction"] = test_df.drop(f"{TARGET}_Future",axis=1).apply(predict_sample, args=(svmReg,), axis=1)
#         eval_df = pd.concat([eval_df, processed_data[[f"{TARGET}_price_[t]", f"{TARGET}_Future"]]], axis=1)
#         eval_df.columns = ['future_pred', "todays_price", "future_actual"]
#         eval_df.dropna(inplace=True)
#         eval_df["actual_signal"] = eval_df.apply(lambda x: 1 if x["future_actual"] >= x["todays_price"] else 0, axis=1)
#         eval_df["pred_signal"] = eval_df.apply(lambda x: 1  if x["future_pred"] >= x["todays_price"] else 0, axis=1)
        
#         preds = eval_df["pred_signal"]
#         acts = eval_df["actual_signal"]
        
#         res = {"SEQ":SEQ_LEN,
#                "PERIOD":PERIOD,
#                "ACC":accuracy_score(acts, preds),
#                "AUC":roc_auc_score(acts, preds),
#                "PRECISION_0":precision_recall_fscore_support(acts, preds)[0][0],
#                "PRECISION_1":precision_recall_fscore_support(acts, preds)[0][1]
#               }
        
    
#         try:
#             zero = preds.value_counts()[0]
#         except:
#             zero = None
        
#         try:
#             one = preds.value_counts()[1]
#         except:
#             one = None
            
#         res.update({"zeros":zero, "ones":one})
        
#         results.append(res)
        


# results_df = pd.DataFrame(results)
# results_df.to_csv(f"res_{int(time.time())}.csv", index=False)

In [None]:
# classification
# svmClf = svm.SVC(kernel='rbf').fit(X_train, y_train)
# svmClfPreds = svmClf.predict(X_test)
# print(classification_report(y_test, svmClfPreds))

In [None]:
# # visualisations:
# df = DF

# # model vs actual plot
# fig = go.Figure()
# fig.add_trace(go.Scatter(
#     x=test_df.index,
#     y=y_test,
#     name=f"Actual",
# #     line_color='#c761ff',
#     line=dict(width=2, dash="solid"),
#     opacity=0.6
#     )
# )
# fig.add_trace(go.Scatter(
#     x=test_df.index,
#     y=svmPreds,
#     name=f"Predicted",
# #     line_color='#c761ff',
#     line=dict(width=2, dash="dot"),
#     opacity=1
#     )
# )

# fig.update_layout(
#     title=f"{TARGET} Daily Chart<br>Actual vs Model's Prediction",
#     xaxis_title='Date',
#     yaxis_title='Price ($)',
# #     template="plotly_dark",
# )

# fig.show()
# ###



# # correlation heatmap
# # plt.figure(figsize=(15,7))
# corr = df.corr()
# mask = np.zeros_like(corr)
# mask[np.triu_indices_from(mask)] = True
# with sns.axes_style("white"):
#     ax1 = sns.heatmap(corr, mask=mask, square=False,cmap="coolwarm").set_title("Percent Change Correlation Heatmap")       
# ###



# # distributions
# fig = ff.create_distplot([df.pct_change().dropna()[c] for c in df.pct_change().columns], df.pct_change().columns, show_rug=False, show_hist=False)
# fig.update_layout(
#     title=f'Daily Return Distribution')
# fig.show()
# ###



# # prices subplots
# fig = make_subplots(rows=df.shape[1], cols=1, start_cell="bottom-left",     subplot_titles=df.columns,shared_xaxes=True)

# i, j = 1, 1
# for col in df.columns:
#     fig.add_trace(go.Scatter(x=df.index, y=df[col],name=col), row=i, col=1)
#     i += 1
# #     if j != 4: i += 1
# #     else:
# #         i += 1
# #         j = 1

# fig.update_layout(
#     title=f' Daily Chart',
#     xaxis_title='Date',
#     yaxis_title='Price ($)',
# #     xaxis=dict(position=1)
# #     template="plotly_dark",
# )
# fig.update_layout(
#     autosize=True,
# #      width=1500,
#     height=2000,
#     margin=dict(
#         l=50,
#         r=50,
#         b=100,
#         t=100,
#         pad=4
#     ),
# #     paper_bgcolor="LightSteelBlue",
# )

# fig.show()