In [None]:
# import libraries for time-series analysis
import os
import numpy as np
import pandas as pd
import seaborn as sns

from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

import matplotlib.pyplot as plt
from matplotlib import cm
from pandas import read_csv, set_option

from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer, RobustScaler
from sklearn.model_selection import train_test_split, KFold, cross_val_score, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from catboost import CatBoostClassifier, CatBoostRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.feature_selection import SelectKBest, f_regression
import xgboost
from xgboost import plot_importance, XGBClassifier, XGBRegressor
from sklearn.model_selection import learning_curve
from sklearn.pipeline import Pipeline
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier, ExtraTreesClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import shap

from sklearn.decomposition import PCA
from sklearn.decomposition import SparsePCA
from tqdm.autonotebook import tqdm
from sklearn.decomposition import KernelPCA
from sklearn.decomposition import IncrementalPCA
from sklearn.decomposition import TruncatedSVD
from sklearn.decomposition import MiniBatchDictionaryLearning
from sklearn.decomposition import FastICA
from sklearn.manifold import Isomap
from sklearn.manifold import MDS
from sklearn.manifold import LocallyLinearEmbedding
from sklearn.manifold import TSNE
from sklearn.random_projection import GaussianRandomProjection
from sklearn.random_projection import SparseRandomProjection

import time
import warnings
warnings.filterwarnings('ignore')
sns.set(style='whitegrid')
%matplotlib inline

In [None]:
import datetime

In [None]:
# define time conversion custom function for timestamps in string format -> native timestamps in the csv file
def datetime_parser(timestamp_str):
    return datetime.datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S%z")

# define path for train_df1 dataset
path = './data/train_df1.csv'

# for time series data, the datetime is always the index
train_df1 = pd.read_csv(path,
                        parse_dates=[0], # to be decided later
                        date_parser = datetime_parser,
                        index_col = 'Timestamp')    

train_df1.info()

### 2. Time-Series Visualisation of Variables in Dataset

In [None]:
from plotly.subplots import make_subplots

In [None]:
# visualize overall asset price history during training data period and the associated buy/sell signals
    
# plot n verticle subplots
def plot_vsubplots(ldf,
                   lst,
                   title='',
                   nplots=None,
                   lw_id=None,
                   size=[400,1000]):

    # lw_id list of line widths if added
        
    assert(nplots is not None) 
    fig = make_subplots(rows=nplots,
                        shared_xaxes=True)
    ii=-1
    for i in lst:
        ii+=1
        fig.add_trace(go.Scatter(x=ldf.index,
                                 y=ldf[lst[ii]], 
                                 mode='lines',
                                 name=lst[ii],
                                 line=dict(width=lw_id[ii])), 
                      row=ii+1, 
                      col=1) 

    fig.update_layout(height=size[0],
                      width=size[1],
                      template='plotly_white',
                      title=title,
                      margin=dict(l=50,
                                  r=80,
                                  t=50,
                                  b=40));
    fig.show()


In [None]:
plot_vsubplots(train_df1,
               ['Close','Signal'],
               title='Weighted Price & Signal Fluctuation in Training Data',
               nplots=2,
               lw_id=[2,0.4],
               size=[500,1000])

Conclusions: 
* General upward trend of weight asset price, having gone up from 6.5k at the beginning of this period to 13.8k within the span of a year.
* Visualizing the extent of the intersections between the 20-day MA and the 50-day MA - buy signals when the market is bullish.

In [None]:
import plotly.graph_objects as go

In [None]:
# customize function to plot trends across time with line plot
def plot_line(ldf, 
              lst, 
              title='',
              sec_id=None,
              size=[350,1000]):
    
    """
    Function to plot trends across time with a line plot.
    
        Parameters:

        (1) ldf : dataframe 
                The DataFrame containing the data to plot.

        (2) lst : list of str
                A list of column names to plot.

        (3) title : str, optional 
                The title of the plot - default is an empty string.

        (4) sec_id : list of bool, optional 
                A list of boolean values indicating whether to activate subplots; 
                Must be the same length as lst - default is None.

        (5) size : list of int, optional 
                The size of the plot as [height, width] - default is [350, 1000].
            
    """
        
    # if sec_id is provided, we create a subplot with secondary y-axis
    if(sec_id is not None):
        fig = make_subplots(specs=[[{"secondary_y": True}]])
    else:
        fig = go.Figure() # otherwise, create a simple figure without subplots 
        
    # check if lst contains more than one element
    if(len(lst) is not 1): # use '!=' instead of 'is not' for integer comparison 
        ii =-1 # initialize an index for iterating through lst
        for i in lst:
            ii +=1 # increment index
            if(sec_id != None):
                # add a trace with a secondary y-axis if sec_id is provided and matches index
                fig.add_trace(go.Scatter(x=ldf.index, 
                                         y=ldf[lst[ii]], 
                                         mode='lines', 
                                         name=lst[ii], 
                                         line=dict(width=2.0)), 
                              secondary_y=sec_id[ii])
            else:
                # add a trace to the figure without secondary y-axis
                fig.add_trace(go.Scatter(x=ldf.index, 
                                         y=ldf[lst[ii]], 
                                         mode='lines', 
                                         name=lst[ii], 
                                         line=dict(width=2.0)))
    else:
        # if lst contains only one element, add a simple trace
        fig.add_trace(go.Scatter(x=ldf.index, 
                                 y=ldf[lst[0]],
                                 mode='lines',
                                 name=lst[0],
                                 line=dict(width=2.0)))

    # update the layout of the figure with the specific size and title
    fig.update_layout(height=size[0],
                      width=size[1],
                      template='plotly_white',
                      title=title,
                      margin=dict(l=50,
                                  r=80,
                                  t=50,
                                  b=40));
    
    # display the plot  
    fig.show()

In [None]:
train_df1

In [None]:
# visualize the features related to the target variable in the training data (20MA, 50MA, Signal)
engineered_features = ['SMA', 'LMA', 'Signal']
plot_period = slice('2019-7-7 0:00','2019-7-7 8:00')  
ldf = train_df1.loc[plot_period, engineered_features]

# apply customized function to training data 
plot_line(ldf, 
          engineered_features, 
          title = '20MA, 50MA & Signal Created from Closing Price',
          sec_id = [False, False, True]) 