In [1]:
import os
import time
import datetime
from tqdm import tqdm
sep = "-*-*-"

os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # disable GPU

import pandas as pd
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import pandas_datareader.data as web
import numpy as np

# visuals.
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.figure_factory as ff
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# processing / validation
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# keras/tf
# %tensorflow_version 2.x
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.callbacks import EarlyStopping
print("tensorflow version: ", tf.__version__)

# models
from sklearn import svm
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import tree
from sklearn.ensemble import VotingRegressor

# metrics
from sklearn.metrics import mean_squared_error, r2_score # reg metrics
from sklearn.metrics import classification_report, roc_auc_score, roc_auc_score,\
accuracy_score, precision_score, average_precision_score, balanced_accuracy_score,\
precision_recall_fscore_support# clf metrics

# constant seed for reproducibility
SEED = 111 
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# cpu workers
WORKERS = 6 

  from pandas.util.testing import assert_frame_equal


tensorflow version:  2.0.0


In [2]:
def fetch_data_since(tickers, days, years):

    df_raw = pd.DataFrame() 
    attempt = 0
    drop = []
    while len(tickers) != 0 and attempt <= 5:
        tickers = [j for j in tickers if j not in drop] 
        for i in range(len(tickers)):
            try:
                temp = web.get_data_yahoo(tickers[i],
                                          datetime.date.today() - datetime.timedelta(days * years), # since delta
                                          datetime.date.today()) # until today 

                temp.dropna(inplace = True)
                df_raw[tickers[i]] = temp["Adj Close"]
                drop.append(tickers[i])       
            except:
                print(tickers[i]," :failed to fetch data...retrying")
                continue
        attempt+=1
       
    # missing values
    print("Missing Values:")
    print(df_raw.isnull().sum())
    df = df_raw.dropna()

    return df

###################################################################################################################

def fetch_data_from_to(tickers, _from, to):

    df_raw = pd.DataFrame() 
    attempt = 0
    drop = []
    while len(tickers) != 0 and attempt <= 5:
        tickers = [j for j in tickers if j not in drop] 
        for i in range(len(tickers)):
            try:
                temp = web.get_data_yahoo(tickers[i], _from, to ) # specified range

                temp.dropna(inplace = True)
                df_raw[tickers[i]] = temp["Adj Close"]
                drop.append(tickers[i])       
            except:
                print(tickers[i]," :failed to fetch data...retrying")
                continue
        attempt+=1
       
    # missing values
    print("Missing Values:")
    print(df_raw.isnull().sum())
    df = df_raw.dropna()

    return df

###################################################################################################################

def process_clf_data(target, seq_len, period, features_type, df):
    print(f"\n{sep*10}\nClassification(!) Processing\n{sep*10}")
    df_pct = pd.DataFrame() # blank dataframe


    if features_type=="since": # changes since previous days
        for col in df.columns:
            for i in range(1,seq_len+1):
                df_pct[f"{col}_snc_[t-{i}]"] = df[col].pct_change(i)
        df_pct.dropna(inplace=True)
    
    elif features_type=="shifted": # shifted changes of previous days
        for col in df.columns:
#             if col != target: # without target's pct_change
            df_pct[col] = df[col].pct_change(1)
        df_pct.dropna(inplace=True)
        
        # shifted previous
        for col in df_pct.columns:
            for i in range(1,seq_len+1):
                df_pct[f"{col}_sht_[t-{i}]"] = df_pct[col].shift(i)
        df_pct.dropna(inplace=True)
    else:
        raise ValueError("features_type can be either 'since' or 'shifted'.")


    df_pct[f"{target}_price_[t]"] = df[target] # price [t]

    # labeling
    df_pct[f"{target}_Future"] = df[target].shift(-period) # future price [t + perid]
    
    warnings = 0
    def classify(x):
#         print(warnings)
        if x[f"{target}_Future"] >= x[f"{target}_price_[t]"]:
            return 1
        elif x[f"{target}_Future"] < x[f"{target}_price_[t]"]:
            return 0
        else:
            nonlocal warnings
            warnings += 1
            return None
    
    df_pct[f"{target}_Future"] = df_pct.apply(classify, axis=1) # classify


    if warnings > 1:
        raise ValueError("More than 1 NaN in classifying.")
        
    
    df_pct.dropna(inplace=True)
    if df_pct.isnull().any().any():
        raise ValueError("null values exist")
        
    return df_pct

###################################################################################################################

def process_reg_data(target, seq_len, period, features_type, df):
    print(f"\n{sep*10}\nRegression Processing\n{sep*10}")
    df_pct = pd.DataFrame() # blank dataframe


    if features_type=="since": # changes since previous days
        for col in df.columns:
            for i in range(1,seq_len+1):
                df_pct[f"{col}_snc_[t-{i}]"] = df[col].pct_change(i)
        df_pct.dropna(inplace=True)
    
    elif features_type=="shifted": # shifted changes of previous days
        for col in df.columns:
#             if col != target: # without target's pct_change
            df_pct[col] = df[col].pct_change(1)
        df_pct.dropna(inplace=True)
        
        # shifted previous
        for col in df_pct.columns:
            for i in range(1,seq_len+1):
                df_pct[f"{col}_sht_[t-{i}]"] = df_pct[col].shift(i)
        df_pct.dropna(inplace=True)
    else:
        raise ValueError("features_type can be either 'since' or 'shifted'.")


    df_pct[f"{target}_price_[t]"] = df[target] # target's price

    # labeling
    df_pct[f"{target}_Future"] = df[target].shift(-period)

    df_pct.dropna(inplace=True)
    if df_pct.isnull().any().any():
        raise ValueError("null values exist")
        
    return df_pct

###################################################################################################################

def split_data(forward_test, scaling, split_size, proc_data):
    
    # train/test & faeture/label split
    if forward_test==True:
    #     forward test (recommended)
        nth_prcntile = int(len(proc_data)*split_size)
        test_df = proc_data.iloc[nth_prcntile:,:]
        
        train_df = proc_data.drop(test_df.index)
        train_df = train_df.sample(frac=1, random_state=SEED) # shuffle train dataset

        # features
        X_train = train_df.drop(f"{TARGET}_Future", axis=1).values
        X_test = test_df.drop(f"{TARGET}_Future", axis=1).values

        # labels
        y_train = train_df[f"{TARGET}_Future"].values
        y_test = test_df[f"{TARGET}_Future"].values
        
    elif forward_test==False:
        proc_data = proc_data.sample(frac=(1), random_state=SEED) # shuffle all data
        test_df = proc_data.sample(frac=(1-split_size), random_state=SEED) # sample test dataset
        train_df = proc_data.drop(test_df.index)

        # features
        X_train = train_df.drop(f"{TARGET}_Future", axis=1).values
        X_test = test_df.drop(f"{TARGET}_Future", axis=1).values

        # labels
        y_train = train_df[f"{TARGET}_Future"].values
        y_test = test_df[f"{TARGET}_Future"].values
        
    else:
        raise ValueError("forward_test must be boolean.")


    # scaling
    if scaling=="minmax":
        scaler = MinMaxScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        
        x_test_df = pd.DataFrame(X_test, columns=proc_data.drop(f"{TARGET}_Future", axis=1).columns, index=test_df.index)
        y_test_df = pd.DataFrame(y_test, columns=[f"{TARGET}_Future"], index=test_df.index)
        test_df = pd.concat([x_test_df, y_test_df], axis=1)
        
    elif scaling=="standard":
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        
        x_test_df = pd.DataFrame(X_test, columns=proc_data.drop(f"{TARGET}_Future", axis=1).columns, index=test_df.index)
        y_test_df = pd.DataFrame(y_test, columns=[f"{TARGET}_Future"], index=test_df.index)
        test_df = pd.concat([x_test_df, y_test_df], axis=1)
        
    elif scaling=="none":
        pass
    
    else:
        raise ValueError("scaling can be either 'minmax', 'standard' or 'none'.")
    
    
    return X_train, X_test, y_train, y_test, test_df

###################################################################################################################

def predict_sample(features_serie, model):
    features = np.array(features_serie).reshape(1,-1)
    pred = model.predict(features)[0]
    return pred

In [31]:
# TICKERS = ["^DJI", "^GSPC", "MSFT", "AAPL", "AMZN", "GOOGL", "JPM", "JNJ", "V", "MA", "INTC"]
TICKERS = ["MSFT", "AAPL", "AMZN"]

# DAYS = 365
# YEARS = 3
# DF = fetch_data_since(tickers=TICKERS, days=DAYS, years=YEARS)

FROM = '2000'
TO = '2020' # excld.
DF = fetch_data_from_to(tickers=TICKERS, _from=FROM, to=TO)

DF

Missing Values:
MSFT    0
AAPL    0
AMZN    0
dtype: int64


Unnamed: 0_level_0,MSFT,AAPL,AMZN
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-03,37.393559,3.470226,89.375000
2000-01-04,36.130390,3.177650,81.937500
2000-01-05,36.511333,3.224152,69.750000
2000-01-06,35.288280,2.945139,65.562500
2000-01-07,35.749432,3.084645,69.562500
...,...,...,...
2019-12-24,156.951309,283.596924,1789.209961
2019-12-26,158.237793,289.223602,1868.770020
2019-12-27,158.527008,289.113831,1869.800049
2019-12-30,157.160736,290.829773,1846.890015


In [33]:
# data prep.
TARGET="AAPL"
SEQ_LEN=5 # previous data
PERIOD=1 # future data
FEATURES_TYPE="shifted"
FORWARD_TEST = True
SCALING = "none" # none / minmax / standard
SPLIT_SIZE = 0.7 # training size

processed_data = process_reg_data(target=TARGET, seq_len=SEQ_LEN, period=PERIOD, features_type=FEATURES_TYPE, df=DF)
X_train, X_test, y_train, y_test, test_df = split_data(forward_test=FORWARD_TEST, scaling=SCALING, split_size=SPLIT_SIZE, proc_data=processed_data)

print(f"processed shape: {processed_data.shape}", end=f"\n{sep}\n")
print(f"test_df shape: {test_df.shape}", end=f"\n{sep}\n")
print(f"features: train shape: {X_train.shape} | test Shape: {X_test.shape}", end=f"\n{sep}\n")
print(f"labels: train shape: {y_train.shape} | test Shape: {y_test.shape}", end=f"\n{sep}\n")
print(f"X_train Max/Min: {X_train.max()} / {X_train.min()}")
test_df


-*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*-
Regression Processing
-*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*--*-*-
processed shape: (5024, 20)
-*-*-
test_df shape: (1508, 20)
-*-*-
features: train shape: (3516, 19) | test Shape: (1508, 19)
-*-*-
labels: train shape: (3516,) | test Shape: (1508,)
-*-*-
X_train Max/Min: 87.43832397460938 / -0.5186915122085852


Unnamed: 0_level_0,MSFT,AAPL,AMZN,MSFT_sht_[t-1],MSFT_sht_[t-2],MSFT_sht_[t-3],MSFT_sht_[t-4],MSFT_sht_[t-5],AAPL_sht_[t-1],AAPL_sht_[t-2],AAPL_sht_[t-3],AAPL_sht_[t-4],AAPL_sht_[t-5],AMZN_sht_[t-1],AMZN_sht_[t-2],AMZN_sht_[t-3],AMZN_sht_[t-4],AMZN_sht_[t-5],AAPL_price_[t],AAPL_Future
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2014-01-03,-0.006728,-0.021966,-0.003845,-0.006682,0.003217,0.000000,-0.004006,0.009708,-0.014064,0.011722,-0.009944,-0.006757,-0.006641,-0.002056,0.013778,-0.011832,-0.015604,0.013001,69.380615,69.758965
2014-01-06,-0.021132,0.005453,-0.007088,-0.006728,-0.006682,0.003217,0.000000,-0.004006,-0.021966,-0.014064,0.011722,-0.009944,-0.006757,-0.003845,-0.002056,0.013778,-0.011832,-0.015604,69.758965,69.260056
2014-01-07,0.007750,-0.007152,0.011178,-0.021132,-0.006728,-0.006682,0.003217,0.000000,0.005453,-0.021966,-0.014064,0.011722,-0.009944,-0.007088,-0.003845,-0.002056,0.013778,-0.011832,69.260056,69.698692
2014-01-08,-0.017852,0.006333,0.009773,0.007750,-0.021132,-0.006728,-0.006682,0.003217,-0.007152,0.005453,-0.021966,-0.014064,0.011722,0.011178,-0.007088,-0.003845,-0.002056,0.013778,69.698692,68.808632
2014-01-09,-0.006432,-0.012770,-0.002264,-0.017852,0.007750,-0.021132,-0.006728,-0.006682,0.006333,-0.007152,0.005453,-0.021966,-0.014064,0.009773,0.011178,-0.007088,-0.003845,-0.002056,68.808632,68.349464
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-23,0.000000,0.016318,0.003638,0.010918,0.008680,-0.002069,-0.005401,0.006471,-0.002071,0.001001,-0.002389,0.001965,0.017118,-0.003225,0.004624,-0.003703,0.012124,0.004696,283.327576,283.596924
2019-12-24,-0.000191,0.000951,-0.002114,0.000000,0.010918,0.008680,-0.002069,-0.005401,0.016318,-0.002071,0.001001,-0.002389,0.001965,0.003638,-0.003225,0.004624,-0.003703,0.012124,283.596924,289.223602
2019-12-26,0.008197,0.019840,0.044467,-0.000191,0.000000,0.010918,0.008680,-0.002069,0.000951,0.016318,-0.002071,0.001001,-0.002389,-0.002114,0.003638,-0.003225,0.004624,-0.003703,289.223602,289.113831
2019-12-27,0.001828,-0.000380,0.000551,0.008197,-0.000191,0.000000,0.010918,0.008680,0.019840,0.000951,0.016318,-0.002071,0.001001,0.044467,-0.002114,0.003638,-0.003225,0.004624,289.113831,290.829773


In [35]:
# models
models = {'svmReg':svm.SVR(kernel='linear').fit(X_train, y_train),
          'gboost':GradientBoostingRegressor(loss='ls', random_state=SEED).fit(X_train, y_train),
          'dtReg':tree.DecisionTreeRegressor(random_state=SEED).fit(X_train, y_train)
         }
ensReg = VotingRegressor(estimators=models.items()).fit(X_train, y_train)
models.update({'ensReg':ensReg})

In [36]:
# pred. table
pred_table = pd.DataFrame()
for m in models.items():
    pred_table[f"future_pred_{m[0]}"] = test_df.drop(f"{TARGET}_Future",axis=1).apply(predict_sample, args=(m[1],), axis=1) # predict rows

pred_table = pd.concat([pred_table, processed_data[[f"{TARGET}_price_[t]", f"{TARGET}_Future"]]], axis=1) # concat today's & future's actuals
new_names = list(models.keys())
new_names.extend(["todays_price", "future_actual"])
pred_table.columns = new_names # rename
pred_table.dropna(inplace=True)
pred_table["actual_signal"] = pred_table.apply(lambda x: 1 if x["future_actual"] >= x["todays_price"] else 0, axis=1) # actual signal

for m1 in models.keys():
    pred_table[f"{m1}_pred_signal"] = pred_table.apply(lambda x: 1  if x[m1] >= x["todays_price"] else 0, axis=1) # strategy signal

pred_table

Unnamed: 0_level_0,svmReg,gboost,dtReg,ensReg,todays_price,future_actual,actual_signal,svmReg_pred_signal,gboost_pred_signal,dtReg_pred_signal,ensReg_pred_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2014-01-03,69.404204,70.411419,71.447174,70.420932,69.380615,69.758965,1,1,1,1,1
2014-01-06,69.786297,70.756005,69.727234,70.089845,69.758965,69.260056,0,1,1,0,1
2014-01-07,69.281755,69.459602,67.820526,68.853961,69.260056,69.698692,1,1,1,0,0
2014-01-08,69.707896,70.524119,71.177399,70.469805,69.698692,68.808632,0,1,1,1,1
2014-01-09,68.835315,68.291336,67.820526,68.315726,68.808632,68.349464,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
2019-12-23,283.381396,87.039909,87.149361,152.523555,283.327576,283.596924,1,1,0,0,0
2019-12-24,283.657725,86.872274,87.014877,152.514959,283.596924,289.223602,1,1,0,0,0
2019-12-26,289.288338,86.988578,87.438324,154.571747,289.223602,289.113831,0,1,0,0,0
2019-12-27,289.177153,86.400226,86.029793,153.869057,289.113831,290.829773,1,1,0,0,0


In [37]:
for i in pred_table.columns:
    if i.endswith("_pred_signal"):
        print(i)
        print(classification_report(pred_table["actual_signal"], pred_table[i]))

svmReg_pred_signal
              precision    recall  f1-score   support

           0       0.45      0.01      0.01       701
           1       0.54      0.99      0.70       807

    accuracy                           0.53      1508
   macro avg       0.49      0.50      0.35      1508
weighted avg       0.50      0.53      0.38      1508

gboost_pred_signal
              precision    recall  f1-score   support

           0       0.47      0.95      0.63       701
           1       0.57      0.05      0.10       807

    accuracy                           0.47      1508
   macro avg       0.52      0.50      0.36      1508
weighted avg       0.52      0.47      0.34      1508

dtReg_pred_signal
              precision    recall  f1-score   support

           0       0.47      0.95      0.63       701
           1       0.61      0.06      0.12       807

    accuracy                           0.48      1508
   macro avg       0.54      0.51      0.37      1508
weighted avg      

In [None]:
# # grid search for best seq/per
# # seq 5, per 1
# results = []
# for seq in tqdm(range(1,2)):
#     for per in range(1,2):
#         TARGET="AAPL"
#         SEQ_LEN=seq # previous data
#         PERIOD=per # future data
#         FEATURES_TYPE="since"
#         FORWARD_TEST = True
#         SCALING = "none" # none / minmax / standard
#         SPLIT_SIZE = 0.5 # training size

#         processed_data = process_reg_data(target=TARGET, seq_len=SEQ_LEN, period=PERIOD, features_type=FEATURES_TYPE, df=DF)
#         X_train, X_test, y_train, y_test, test_df = split_data(forward_test=FORWARD_TEST, scaling=SCALING, split_size=SPLIT_SIZE, proc_data=processed_data)

#         # model
#         svmReg = svm.SVR(kernel='linear').fit(X_train, y_train)

#         # evaluation
#         eval_df = pd.DataFrame()
#         eval_df[f"future_{TARGET}_prediction"] = test_df.drop(f"{TARGET}_Future",axis=1).apply(predict_sample, args=(svmReg,), axis=1)
#         eval_df = pd.concat([eval_df, processed_data[[f"{TARGET}_price_[t]", f"{TARGET}_Future"]]], axis=1)
#         eval_df.columns = ['future_pred', "todays_price", "future_actual"]
#         eval_df.dropna(inplace=True)
#         eval_df["actual_signal"] = eval_df.apply(lambda x: 1 if x["future_actual"] >= x["todays_price"] else 0, axis=1)
#         eval_df["pred_signal"] = eval_df.apply(lambda x: 1  if x["future_pred"] >= x["todays_price"] else 0, axis=1)
        
#         preds = eval_df["pred_signal"]
#         acts = eval_df["actual_signal"]
        
#         res = {"SEQ":SEQ_LEN,
#                "PERIOD":PERIOD,
#                "ACC":accuracy_score(acts, preds),
#                "AUC":roc_auc_score(acts, preds),
#                "PRECISION_0":precision_recall_fscore_support(acts, preds)[0][0],
#                "PRECISION_1":precision_recall_fscore_support(acts, preds)[0][1]
#               }
        
    
#         try:
#             zero = preds.value_counts()[0]
#         except:
#             zero = None
        
#         try:
#             one = preds.value_counts()[1]
#         except:
#             one = None
            
#         res.update({"zeros":zero, "ones":one})
        
#         results.append(res)
        


# results_df = pd.DataFrame(results)
# results_df.to_csv(f"res_{int(time.time())}.csv", index=False)

In [None]:
# classification
# svmClf = svm.SVC(kernel='rbf').fit(X_train, y_train)
# svmClfPreds = svmClf.predict(X_test)
# print(classification_report(y_test, svmClfPreds))

In [None]:
# tf neural
# def fit_ann():
#     dnnReg = Sequential()

#     dnnReg.add(Dense(100, activation="relu"))
#     dnnReg.add(Dense(100, activation="relu"))
#     dnnReg.add(Dense(100, activation="relu"))
#     dnnReg.add(Dense(100, activation="relu"))
#     dnnReg.add(Dense(100, activation="relu"))
#     dnnReg.add(Dense(100, activation="relu"))
#     dnnReg.add(Dense(100, activation="relu"))
#     dnnReg.add(Dense(100, activation="relu"))
#     dnnReg.add(Dense(100, activation="relu"))

#     dnnReg.add(Dense(1))

#     dnnReg.compile(optimizer="adam", loss="mse")

#     early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=PATIENCE)

#     dnnReg.fit(x=X_train, y=y_train, epochs=1000,validation_data=(X_test,y_test),use_multiprocessing=True, workers=WORKERS, callbacks=[early_stop])
#     return dnnReg

# if False:
#     dnnReg = fit_ann()
#     pd.DataFrame(dnnReg.history.history).plot()
#     dnnPreds = dnnReg.predict(X_test)
#     print(sep*5)
#     print(mean_squared_error(y_test, dnnPreds))
#     print(r2_score(y_test, dnnPreds))


In [None]:
# # visualisations:
# df = DF

# # model vs actual plot
# fig = go.Figure()
# fig.add_trace(go.Scatter(
#     x=test_df.index,
#     y=y_test,
#     name=f"Actual",
# #     line_color='#c761ff',
#     line=dict(width=2, dash="solid"),
#     opacity=0.6
#     )
# )
# fig.add_trace(go.Scatter(
#     x=test_df.index,
#     y=svmPreds,
#     name=f"Predicted",
# #     line_color='#c761ff',
#     line=dict(width=2, dash="dot"),
#     opacity=1
#     )
# )

# fig.update_layout(
#     title=f"{TARGET} Daily Chart<br>Actual vs Model's Prediction",
#     xaxis_title='Date',
#     yaxis_title='Price ($)',
# #     template="plotly_dark",
# )

# fig.show()
# ###



# # correlation heatmap
# # plt.figure(figsize=(15,7))
# corr = df.corr()
# mask = np.zeros_like(corr)
# mask[np.triu_indices_from(mask)] = True
# with sns.axes_style("white"):
#     ax1 = sns.heatmap(corr, mask=mask, square=False,cmap="coolwarm").set_title("Percent Change Correlation Heatmap")       
# ###



# # distributions
# fig = ff.create_distplot([df.pct_change().dropna()[c] for c in df.pct_change().columns], df.pct_change().columns, show_rug=False, show_hist=False)
# fig.update_layout(
#     title=f'Daily Return Distribution')
# fig.show()
# ###



# # prices subplots
# fig = make_subplots(rows=df.shape[1], cols=1, start_cell="bottom-left",     subplot_titles=df.columns,shared_xaxes=True)

# i, j = 1, 1
# for col in df.columns:
#     fig.add_trace(go.Scatter(x=df.index, y=df[col],name=col), row=i, col=1)
#     i += 1
# #     if j != 4: i += 1
# #     else:
# #         i += 1
# #         j = 1

# fig.update_layout(
#     title=f' Daily Chart',
#     xaxis_title='Date',
#     yaxis_title='Price ($)',
# #     xaxis=dict(position=1)
# #     template="plotly_dark",
# )
# fig.update_layout(
#     autosize=True,
# #      width=1500,
#     height=2000,
#     margin=dict(
#         l=50,
#         r=50,
#         b=100,
#         t=100,
#         pad=4
#     ),
# #     paper_bgcolor="LightSteelBlue",
# )

# fig.show()