In [1]:
import os
import time
import datetime
import math
from tqdm import tqdm # progress bar
import talib # financial indicators api
sep = "-*-*-"

os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # disable GPU

import pandas as pd
pd.set_option('display.max_rows', 5000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import pandas_datareader.data as web
import numpy as np

# visuals.
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.figure_factory as ff
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# processing / validation
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# keras/tf
# %tensorflow_version 2.x
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.callbacks import EarlyStopping
print("tensorflow version: ", tf.__version__)

# models
from sklearn import svm
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import tree
from sklearn.ensemble import VotingRegressor

# metrics
from sklearn.metrics import mean_squared_error, r2_score # reg metrics
from sklearn.metrics import classification_report, roc_auc_score, roc_auc_score,\
accuracy_score, precision_score, average_precision_score, balanced_accuracy_score,\
precision_recall_fscore_support# clf metrics

# constant seed for reproducibility
SEED = 111 
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# cpu workers
WORKERS = 6 

  from pandas.util.testing import assert_frame_equal


tensorflow version:  2.0.0


In [8]:
def fetch_data_since(tickers, days, years):

    df_raw = pd.DataFrame() 
    attempt = 0
    drop = []
    while len(tickers) != 0 and attempt <= 5:
        tickers = [j for j in tickers if j not in drop] 
        for i in range(len(tickers)):
            try:
                temp = web.get_data_yahoo(tickers[i],
                                          datetime.date.today() - datetime.timedelta(days * years), # since delta
                                          datetime.date.today()) # until today 

                temp.dropna(inplace = True)
                df_raw[tickers[i]] = temp["Adj Close"]
                drop.append(tickers[i])       
            except:
                print(tickers[i]," :failed to fetch data...retrying")
                continue
        attempt+=1
       
    # missing values
    print("Missing Values:")
    print(df_raw.isnull().sum())
    df = df_raw.dropna()

    return df

###################################################################################################################

def fetch_data_from_to(tickers, _from, to):

    df_raw = pd.DataFrame() 
    attempt = 0
    drop = []
    while len(tickers) != 0 and attempt <= 5:
        tickers = [j for j in tickers if j not in drop] 
        for i in range(len(tickers)):
            try:
                temp = web.get_data_yahoo(tickers[i], _from, to ) # specified range

                temp.dropna(inplace = True)
                df_raw[tickers[i]] = temp["Adj Close"]
                drop.append(tickers[i])       
            except:
                print(tickers[i]," :failed to fetch data...retrying")
                continue
        attempt+=1
       
    # missing values
    print("Missing Values:")
    print(df_raw.isnull().sum())
    df = df_raw.dropna()

    return df

###################################################################################################################

def process_clf_data(target, seq_len, period, features_type, df):
    print(f"\n{sep*10}\nClassification(!) Processing\n{sep*10}")
    df_pct = pd.DataFrame() # blank dataframe


    if features_type=="since": # changes since previous days
        for col in df.columns:
            for i in range(1,seq_len+1):
                df_pct[f"{col}_snc_[t-{i}]"] = df[col].pct_change(i)
        df_pct.dropna(inplace=True)
    
    elif features_type=="shifted": # shifted changes of previous days
        for col in df.columns:
#             if col != target: # without target's pct_change
            df_pct[col] = df[col].pct_change(1)
        df_pct.dropna(inplace=True)
        
        # shifted previous
        for col in df_pct.columns:
            for i in range(1,seq_len+1):
                df_pct[f"{col}_sht_[t-{i}]"] = df_pct[col].shift(i)
        df_pct.dropna(inplace=True)
    else:
        raise ValueError("features_type can be either 'since' or 'shifted'.")


    df_pct[f"{target}_price_[t]"] = df[target] # price [t]

    # labeling
    df_pct[f"{target}_Future"] = df[target].shift(-period) # future price [t + perid]
    
    warnings = 0
    def classify(x):
        if x[f"{target}_Future"] >= x[f"{target}_price_[t]"]:
            return 1
        elif x[f"{target}_Future"] < x[f"{target}_price_[t]"]:
            return 0
        else:
            nonlocal warnings
            warnings += 1
            return None
    
    df_pct[f"{target}_Future"] = df_pct.apply(classify, axis=1) # classify


    if warnings > 1:
        raise ValueError("More than 1 NaN in classifying.")
        
    
    df_pct.dropna(inplace=True)
    if df_pct.isnull().any().any():
        raise ValueError("null values exist")
        
    return df_pct

###################################################################################################################

def process_reg_data(target, seq_len, period, features_type, df, rsi=None):
    print(f"\n{sep*10}\nRegression Processing\n{sep*10}")
    df_pct = pd.DataFrame() # blank dataframe


    if features_type=="since": # changes since previous days
        for col in df.columns:
            for i in range(1,seq_len+1):
                df_pct[f"{col}_snc_[t-{i}]"] = df[col].pct_change(i)
        df_pct.dropna(inplace=True)
    
    elif features_type=="shifted": # shifted changes of previous days
        for col in df.columns:
#             if col != target: # without target's pct_change
            df_pct[col] = df[col].pct_change(1)
        df_pct.dropna(inplace=True)
        
        # shifted previous
        for col in df_pct.columns:
            for i in range(1,seq_len+1):
                df_pct[f"{col}_sht_[t-{i}]"] = df_pct[col].shift(i)
        df_pct.dropna(inplace=True)
    else:
        raise ValueError("features_type can be either 'since' or 'shifted'.")

    if rsi:
        df_pct[f"RSI_{rsi}"] = talib.RSI(df[target],rsi)
        
    df_pct[f"{target}_price_[t]"] = df[target] # target's price

    # labeling
    df_pct[f"{target}_Future"] = df[target].shift(-period)

    df_pct.dropna(inplace=True)
    if df_pct.isnull().any().any():
        raise ValueError("null values exist")
        
    return df_pct

###################################################################################################################

def split_data(forward_test, scaling, split_size, proc_data):
    
    # train/test & faeture/label split
    if forward_test==True:
    #     forward test (recommended)
        nth_prcntile = int(len(proc_data)*split_size)
        test_df = proc_data.iloc[nth_prcntile:,:]
        
        train_df = proc_data.drop(test_df.index)
        train_df = train_df.sample(frac=1, random_state=SEED) # shuffle train dataset

        # features
        X_train = train_df.drop(f"{TARGET}_Future", axis=1).values
        X_test = test_df.drop(f"{TARGET}_Future", axis=1).values

        # labels
        y_train = train_df[f"{TARGET}_Future"].values
        y_test = test_df[f"{TARGET}_Future"].values
        
    elif forward_test==False:
        proc_data = proc_data.sample(frac=(1), random_state=SEED) # shuffle all data
        test_df = proc_data.sample(frac=(1-split_size), random_state=SEED) # sample test dataset
        train_df = proc_data.drop(test_df.index)

        # features
        X_train = train_df.drop(f"{TARGET}_Future", axis=1).values
        X_test = test_df.drop(f"{TARGET}_Future", axis=1).values

        # labels
        y_train = train_df[f"{TARGET}_Future"].values
        y_test = test_df[f"{TARGET}_Future"].values
        
    else:
        raise ValueError("forward_test must be boolean.")


    # scaling
    if scaling=="minmax":
        scaler = MinMaxScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        
        x_test_df = pd.DataFrame(X_test, columns=proc_data.drop(f"{TARGET}_Future", axis=1).columns, index=test_df.index)
        y_test_df = pd.DataFrame(y_test, columns=[f"{TARGET}_Future"], index=test_df.index)
        test_df = pd.concat([x_test_df, y_test_df], axis=1)
        
    elif scaling=="standard":
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        
        x_test_df = pd.DataFrame(X_test, columns=proc_data.drop(f"{TARGET}_Future", axis=1).columns, index=test_df.index)
        y_test_df = pd.DataFrame(y_test, columns=[f"{TARGET}_Future"], index=test_df.index)
        test_df = pd.concat([x_test_df, y_test_df], axis=1)
        
    elif scaling=="none":
        pass
    
    else:
        raise ValueError("scaling can be either 'minmax', 'standard' or 'none'.")
    
    
    return X_train, X_test, y_train, y_test, test_df

###################################################################################################################

# tf neural
def fit_ann(PATIENCE=32, epochs=1000, nodes=100, hls=9):
    dnnReg = Sequential()
    
    for l in range(hls):
        dnnReg.add(Dense(nodes, activation="relu"))
#         dnnReg.add(Dropout(0.2))
    dnnReg.add(Dense(1))

    dnnReg.compile(optimizer="adam", loss="mse")

    early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=PATIENCE)

    dnnReg.fit(x=X_train, y=y_train, epochs=epochs, validation_data=(X_test,y_test), use_multiprocessing=True, workers=WORKERS, callbacks=[early_stop])
    return dnnReg

###################################################################################################################

def predict_sample(features_serie, model):
    features = np.array(features_serie).reshape(1,-1)
    pred = model.predict(features)[0]
    return pred

###################################################################################################################

def open_pos(qnt, open_price, pos_type):
    pos = {"qnt":qnt, "open_price":open_price, "cost":qnt * open_price, "type":pos_type}
    return pos

def get_profit(pos, close_price):
    profit = (pos["qnt"] * close_price) - (pos["qnt"] * pos["open_price"]) 
    if pos["type"] == "SHORT":
        profit = -profit
    return profit
    

    
def get_balance(x):
    global _balance
    global first_iter
    
    if first_iter:
        first_iter = False
        return _balance
    else:
        _balance += x["P&L"]
        return _balance

In [3]:
TICKERS = ["MSFT", "AAPL"]
FROM = '2009'
TO = '2010' # excld.
TARGET="AAPL"
DF = fetch_data_from_to(tickers=TICKERS, _from=FROM, to=TO)

print(DF.shape)
DF.tail()

Missing Values:
MSFT    0
AAPL    0
dtype: int64
(252, 2)


Unnamed: 0_level_0,MSFT,AAPL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2009-12-24,24.333616,25.922182
2009-12-28,24.46706,26.240877
2009-12-29,24.639746,25.929619
2009-12-30,24.302216,26.244589
2009-12-31,23.92544,26.131752


In [63]:
# data prep.
SEQ_LEN=3 # previous data
PERIOD=5 # future data
FEATURES_TYPE="since"
FORWARD_TEST = True
SCALING = "standard" # none / minmax / standard
SPLIT_SIZE = 0.5 # training size
RSI = None
processed_data = process_reg_data(target=TARGET, seq_len=SEQ_LEN, period=PERIOD, features_type=FEATURES_TYPE, df=DF, rsi=RSI)
X_train, X_test, y_train, y_test, test_df = split_data(forward_test=FORWARD_TEST, scaling=SCALING, split_size=SPLIT_SIZE, proc_data=processed_data)

print(f"processed shape: {processed_data.shape}", end=f"\n{sep}\n")
print(f"test_df shape: {test_df.shape}", end=f"\n{sep}\n")
print(f"features: train shape: {X_train.shape} | test Shape: {X_test.shape}", end=f"\n{sep}\n")
print(f"labels: train shape: {y_train.shape} | test Shape: {y_test.shape}", end=f"\n{sep}\n")
print(f"X_train Max/Min: {X_train.max()} / {X_train.min()}")
test_df.tail()


-*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*-
Regression Processing
-*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*-
processed shape: (244, 8)
-*-*-
test_df shape: (122, 8)
-*-*-
features: train shape: (122, 7) | test Shape: (122, 7)
-*-*-
labels: train shape: (122,) | test Shape: (122,)
-*-*-
X_train Max/Min: 3.528570876395311 / -4.055019747758036


Unnamed: 0_level_0,MSFT_snc_[t-1],MSFT_snc_[t-2],MSFT_snc_[t-3],AAPL_snc_[t-1],AAPL_snc_[t-2],AAPL_snc_[t-3],AAPL_price_[t],AAPL_Future
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2009-12-17,-0.62605,-0.457172,-0.503246,-0.798585,-0.513354,-0.824005,4.013704,25.922182
2009-12-18,0.816467,0.139789,0.145251,0.587218,-0.146204,-0.112382,4.194271,26.240877
2009-12-21,0.120388,0.73187,0.205544,0.417073,0.673722,0.104928,4.33589,25.929619
2009-12-22,0.27592,0.311633,0.831077,0.274668,0.463802,0.716063,4.443622,26.244589
2009-12-23,0.051296,0.257675,0.308615,0.192755,0.313674,0.49318,4.531629,26.131752


In [64]:
# models
from sklearn import linear_model
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
models = {
#     "nn":MLPRegressor(hidden_layer_sizes=(100,100,200,200,300,300,200,200,100,100,50,1),max_iter=10000,early_stopping=True).fit(X_train, y_train),
     'svmReg':svm.SVR(kernel='linear').fit(X_train, y_train),
#      'svmReg_2':svm.SVR(degree=1).fit(X_train, y_train),
#      'svmReg_3':svm.SVR(degree=2).fit(X_train, y_train),

#      'svmReg_4':svm.SVR(degree=5).fit(X_train, y_train),

#      'svmReg_5':svm.SVR(degree=10).fit(X_train, y_train),
#       'reg':linear_model.Ridge(alpha=.5).fit(X_train, y_train),
    "knn": KNeighborsRegressor().fit(X_train, y_train),
#     "dt":DecisionTreeRegressor().fit(X_train, y_train)

#      'gboost':GradientBoostingRegressor(loss='ls', random_state=SEED).fit(X_train, y_train),
#     'dtReg':tree.DecisionTreeRegressor(random_state=SEED).fit(X_train, y_train),
#     'dnn':fit_ann()
}
# ensReg = VotingRegressor(estimators=models.items()).fit(X_train, y_train)
# models.update({'ensReg':ensReg})

In [61]:
counter = 0
limit  = PERIOD
def return_profit(x):
    global BALANCE
    global pos
    global counter


    counter += 1
#     print(counter)
    DATE = x.name.date()
    PRICE = x["todays_price"]
    
    # close position
    if pos and counter == limit:
#         print(DATE)
        profit = get_profit(pos, close_price=PRICE)
        BALANCE += pos["cost"] + profit # remaining
                
#         if pos["type"] == "SHORT":
#             print(f"Closed {pos['qnt']} Short at {round(PRICE,6)} on {DATE} | -> {profit}")
            
#         if pos["type"] == "LONG":
#             print(f"Closed {pos['qnt']} Long at {round(PRICE,6)} on {DATE} | -> {profit}")

        
        pos = None # remove position
        counter = 0

        
    # LONG    
    if x["pred_signal"] == 1 and not pos and BALANCE > PRICE:
#         qnt = math.floor(BALANCE / PRICE)
        qnt = 1
        pos = open_pos(qnt=qnt, open_price=PRICE, pos_type="LONG")
        BALANCE -= pos["cost"]
#         print(f"LONG {pos['qnt']} at {round(PRICE,6)} on {DATE}")

 
    # SHORT
    if ALLOW_SHORTS:
        if x["pred_signal"] == 0 and not pos and BALANCE > PRICE:
#             qnt = math.floor(BALANCE / PRICE)
            qnt = 1
            pos = open_pos(qnt=qnt, open_price=PRICE, pos_type="SHORT")
            BALANCE -= pos["cost"]
#             print(f"SHORT {pos['qnt']} at {round(PRICE,6)} on {DATE}")

    try:
        return profit
    except:
        return 0

In [62]:
# pred. table
pred_table = pd.DataFrame()
for m in models.items():
    pred_table[f"future_pred_{m[0]}"] = test_df.drop(f"{TARGET}_Future",axis=1).apply(predict_sample, args=(m[1],), axis=1) # predict rows

pred_table = pd.concat([pred_table, processed_data[[f"{TARGET}_price_[t]", f"{TARGET}_Future"]]], axis=1) # concat today's & future's actuals
new_names = list(models.keys())
new_names.extend(["todays_price", "future_actual"])
pred_table.columns = new_names # rename
pred_table.dropna(inplace=True)
pred_table["actual_signal"] = pred_table.apply(lambda x: 1 if x["future_actual"] >= x["todays_price"] else 0, axis=1) # actual signal

for m1 in models.keys():
    pred_table[f"{m1}_pred_signal"] = pred_table.apply(lambda x: 1  if x[m1] >= x["todays_price"] else 0, axis=1) # strategy signal


    
trade_perfs = []
for PREDICTOR in models.keys():
    trade_df = pred_table[[PREDICTOR, "todays_price", "future_actual", "actual_signal", f"{PREDICTOR}_pred_signal"]]
    trade_df.columns = ["pred_signal" if n.endswith("pred_signal") else n for n in trade_df.columns] # rename last col.
    
        
    preds = trade_df["pred_signal"]
    acts = trade_df["actual_signal"]
    
    # longs and shorts
    try:
        zero = preds.value_counts()[0]
    except:
        zero = None
    try:
        one = preds.value_counts()[1]
    except:
        one = None
        
    pos = None
    ALLOW_SHORTS = True
    BALANCE = 10000.00
    _balance = BALANCE
    first_iter = True
    trade_df["P&L"] = trade_df.apply(return_profit, axis=1)
    trade_df["Cumulative_Profit"] = trade_df["P&L"].cumsum()
    trade_df['Balance'] = trade_df.apply(get_balance, axis=1)
    
    # financial perf.
    trade_perf = {
        "alg":PREDICTOR,
        "final_balance":trade_df["Balance"][-1],
        "avg_profit":trade_df["P&L"].mean(),
        "total_profit":trade_df["Cumulative_Profit"][-1],
        "Shorts":zero,
        "Longs":one,
    }

    
    # alg. perf.
    trade_perf.update({
        "ACC":accuracy_score(acts, preds),
        "AUC":roc_auc_score(acts, preds),
        "PRECISION_0":precision_recall_fscore_support(acts, preds)[0][0],
        "PRECISION_1":precision_recall_fscore_support(acts, preds)[0][1]
    })

    trade_perfs.append(trade_perf)

            
    
# pd.DataFrame(trade_perfs).sort_values("total_profit", ascending=False)
trade_df


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



Unnamed: 0_level_0,knn,todays_price,future_actual,actual_signal,pred_signal,P&L,Cumulative_Profit,Balance
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2009-07-02,16.85264,17.363295,17.177294,0,0,0.0,0.0,10000.0
2009-07-06,16.442179,17.188444,17.650986,1,0,0.0,0.0,10000.0
2009-07-07,16.41961,16.790394,17.642317,1,0,0.572901,0.572901,10000.572901
2009-07-08,16.078598,17.016081,18.213978,1,0,0.0,0.572901,10000.572901
2009-07-09,16.291887,16.909433,18.293341,1,0,0.0,0.572901,10000.572901
2009-07-10,16.240301,17.177294,18.817886,1,0,0.0,0.572901,10000.572901
2009-07-13,17.404466,17.650986,18.961735,1,0,0.0,0.572901,10000.572901
2009-07-14,17.473909,17.642317,18.788124,1,0,-0.851923,-0.279022,9999.720978
2009-07-15,17.219201,18.213978,19.43667,1,0,0.0,-0.279022,9999.720978
2009-07-16,17.570633,18.293341,19.570602,1,0,0.0,-0.279022,9999.720978


In [None]:
# # grid search for best seq/per
# # seq 5, per 1
# results = []
# for seq in tqdm(range(1,2)):
#     for per in range(1,2):
#         TARGET="AAPL"
#         SEQ_LEN=seq # previous data
#         PERIOD=per # future data
#         FEATURES_TYPE="since"
#         FORWARD_TEST = True
#         SCALING = "none" # none / minmax / standard
#         SPLIT_SIZE = 0.5 # training size

#         processed_data = process_reg_data(target=TARGET, seq_len=SEQ_LEN, period=PERIOD, features_type=FEATURES_TYPE, df=DF)
#         X_train, X_test, y_train, y_test, test_df = split_data(forward_test=FORWARD_TEST, scaling=SCALING, split_size=SPLIT_SIZE, proc_data=processed_data)

#         # model
#         svmReg = svm.SVR(kernel='linear').fit(X_train, y_train)

#         # evaluation
#         eval_df = pd.DataFrame()
#         eval_df[f"future_{TARGET}_prediction"] = test_df.drop(f"{TARGET}_Future",axis=1).apply(predict_sample, args=(svmReg,), axis=1)
#         eval_df = pd.concat([eval_df, processed_data[[f"{TARGET}_price_[t]", f"{TARGET}_Future"]]], axis=1)
#         eval_df.columns = ['future_pred', "todays_price", "future_actual"]
#         eval_df.dropna(inplace=True)
#         eval_df["actual_signal"] = eval_df.apply(lambda x: 1 if x["future_actual"] >= x["todays_price"] else 0, axis=1)
#         eval_df["pred_signal"] = eval_df.apply(lambda x: 1  if x["future_pred"] >= x["todays_price"] else 0, axis=1)
        
#         preds = eval_df["pred_signal"]
#         acts = eval_df["actual_signal"]
        
#         res = {"SEQ":SEQ_LEN,
#                "PERIOD":PERIOD,
#                "ACC":accuracy_score(acts, preds),
#                "AUC":roc_auc_score(acts, preds),
#                "PRECISION_0":precision_recall_fscore_support(acts, preds)[0][0],
#                "PRECISION_1":precision_recall_fscore_support(acts, preds)[0][1]
#               }
        
    
#         try:
#             zero = preds.value_counts()[0]
#         except:
#             zero = None
        
#         try:
#             one = preds.value_counts()[1]
#         except:
#             one = None
            
#         res.update({"zeros":zero, "ones":one})
        
#         results.append(res)
        


# results_df = pd.DataFrame(results)
# results_df.to_csv(f"res_{int(time.time())}.csv", index=False)

In [None]:
# classification
# svmClf = svm.SVC(kernel='rbf').fit(X_train, y_train)
# svmClfPreds = svmClf.predict(X_test)
# print(classification_report(y_test, svmClfPreds))

In [None]:
# # visualisations:
# df = DF

# # model vs actual plot
# fig = go.Figure()
# fig.add_trace(go.Scatter(
#     x=test_df.index,
#     y=y_test,
#     name=f"Actual",
# #     line_color='#c761ff',
#     line=dict(width=2, dash="solid"),
#     opacity=0.6
#     )
# )
# fig.add_trace(go.Scatter(
#     x=test_df.index,
#     y=svmPreds,
#     name=f"Predicted",
# #     line_color='#c761ff',
#     line=dict(width=2, dash="dot"),
#     opacity=1
#     )
# )

# fig.update_layout(
#     title=f"{TARGET} Daily Chart<br>Actual vs Model's Prediction",
#     xaxis_title='Date',
#     yaxis_title='Price ($)',
# #     template="plotly_dark",
# )

# fig.show()
# ###



# # correlation heatmap
# # plt.figure(figsize=(15,7))
# corr = df.corr()
# mask = np.zeros_like(corr)
# mask[np.triu_indices_from(mask)] = True
# with sns.axes_style("white"):
#     ax1 = sns.heatmap(corr, mask=mask, square=False,cmap="coolwarm").set_title("Percent Change Correlation Heatmap")       
# ###



# # distributions
# fig = ff.create_distplot([df.pct_change().dropna()[c] for c in df.pct_change().columns], df.pct_change().columns, show_rug=False, show_hist=False)
# fig.update_layout(
#     title=f'Daily Return Distribution')
# fig.show()
# ###



# # prices subplots
# fig = make_subplots(rows=df.shape[1], cols=1, start_cell="bottom-left",     subplot_titles=df.columns,shared_xaxes=True)

# i, j = 1, 1
# for col in df.columns:
#     fig.add_trace(go.Scatter(x=df.index, y=df[col],name=col), row=i, col=1)
#     i += 1
# #     if j != 4: i += 1
# #     else:
# #         i += 1
# #         j = 1

# fig.update_layout(
#     title=f' Daily Chart',
#     xaxis_title='Date',
#     yaxis_title='Price ($)',
# #     xaxis=dict(position=1)
# #     template="plotly_dark",
# )
# fig.update_layout(
#     autosize=True,
# #      width=1500,
#     height=2000,
#     margin=dict(
#         l=50,
#         r=50,
#         b=100,
#         t=100,
#         pad=4
#     ),
# #     paper_bgcolor="LightSteelBlue",
# )

# fig.show()