In [4]:
%run -i  "../scutil/scml.ipynb"
%mkdir output
import stumpy
import plotly.subplots as sp

mkdir: output: File exists


## Stumpy EDA

In [5]:
init_notebook_mode(connected=True)  
data_file = 'output/stump_training.csv'
peer_list = ['^GSPC']
target = '^GSPC'
start = datetime.datetime(2010,1,1)
end = datetime.datetime(2022,12,31)
inputs = list(set(peer_list) - set(target))
util = Util()
m = Modeling()
data = util.combine_stocks(peer_list)
data.to_csv(data_file)
dataset = pd.read_csv(data_file, index_col='Date', parse_dates=['Date'])
dataset.index = pd.to_datetime(dataset.index, utc=True)
split_year = '2022'
m.data_preprocessing(dataset, split_year, target, inputs)
m.preprocessed_check()

Total data:(3272, 1)
Train data:(3021, 1)
Val data:(251, 1)
Index(['^GSPC'], dtype='object') Index(['^GSPC'], dtype='object')


In [6]:
ts = dataset[[target]]
ts.index = pd.to_datetime(pd.to_datetime(ts.index, format = '%m/%d/%Y').strftime('%Y-%m-%d'), format = '%Y-%m-%d')
ts.head()

Unnamed: 0_level_0,^GSPC
Date,Unnamed: 1_level_1
2010-01-04,1132.98999
2010-01-05,1136.52002
2010-01-06,1137.140015
2010-01-07,1141.689941
2010-01-08,1144.97998


In [10]:
class StumpAnalyzer(object):
    def __init__(self, ts):
        self.ts = ts
        self.timeseries = ts.T.values[0]
        self.default_window_size = 28
        self.date_format = '%Y %b %d'
        self.blue = '#6484cd'

    def index_to_date(self, idx):
        return self.ts.index[idx].strftime(self.date_format)

    def stump_summary(self, matrix_profile, w):
        motif = np.argsort(matrix_profile[:, 0])[0]
        motif_date = self.index_to_date(motif)
        neighbormotif = matrix_profile[motif, 1]
        neighbormotif_date = self.index_to_date(neighbormotif)
        top_10_motifs_idx = np.argsort(matrix_profile[:, 0])[10]
        top_10_motifs_dates = [self.index_to_date(idx) for idx in [top_10_motifs_idx]]
        discord = np.argsort(matrix_profile[:, 0])[-1]
        discord_date = self.index_to_date(discord)
        neighbordiscord = matrix_profile[discord, 0]
        distance_between_motifs = np.abs(motif - neighbormotif)
#         motif_idx, motif_indices = stumpy.motifs(matrix_profile, w)
#         motifs_dates = [self.index_to_date(idx) for idx in [motif_indices]]


        print("\nMatrix Profile:")
        print("Window size:", w)
        print("Motif index", motif, "-", motif_date)
        print("Nearest neighbor index", neighbormotif, "-", neighbormotif_date)
        print("Distance between motifs:", distance_between_motifs)
#         print("Motif Indices:", motif_indices)
#         print("Motif Dates:", motifs_dates)
        print("Top 10 Motif indices:", top_10_motifs_idx)
        print("Top 10 Motif dates:", top_10_motifs_dates)
        print("Discord index", discord, "-", discord_date)

    def run_one_window(self, window=28):
        matrix_profile = stumpy.stump(self.timeseries, m=window)
        self.stump_summary(matrix_profile, window)
        self.plot_raw_matrix(matrix_profile, window)
        
    def run_many_windows(self, windows=[28]):
        profiles = {}
        for _window in windows:
            mp = stumpy.stump(self.timeseries, m=_window)
            profiles[_window] = mp
        self.plot_matrix_profiles(profiles)
        for _w in profiles:
            self.stump_summary(profiles[_w], _w)
    
    def plot_matrix_profiles(self, matrix_profiles):
        num = len(matrix_profiles.keys())
        fig = sp.make_subplots(rows=num, cols=1, shared_xaxes=True, vertical_spacing=0.1)
        i = 1

        for w, mp in matrix_profiles.items():
            m = np.argsort(mp[:, 0])[0]
            n = mp[m, 1]
            d = np.argsort(mp[:, 0])[-1]
            maxh = mp[:, 0].max()

            # Plot Matrix Profile
            fig.add_trace(go.Scatter(x=ts[target].index.strftime('%Y %b %d'), y=mp[:, 0], line=dict(color=self.blue), name=str(w)), row=i, col=1)
            fig.add_shape(type="rect", xref="x", yref="y", x0=m, y0=0, x1=m + w, y1=maxh, fillcolor="green", opacity=0.7, line=dict(color="green", width=0), row=i, col=1)
            fig.add_shape(type="rect", xref="x", yref="y", x0=n, y0=0, x1=n + w, y1=maxh, fillcolor="green", opacity=0.7, line=dict(color="green", width=0), row=i, col=1)
            fig.add_shape(type="rect", xref="x", yref="y", x0=d, y0=0, x1=d + w, y1=maxh, fillcolor="red", opacity=0.7, line=dict(color="red", width=0), row=i, col=1)

            # Update y-axis label
            fig.update_yaxes(title_text="w: " + str(w), row=i, col=1)
            i += 1

        # Set layout properties
        fig.update_layout(
            height=900,
            width=900,
            title="Matrix Profiles",
            xaxis=dict(tickangle=75),
        )
        fig.update_xaxes(title_text="Date")
        fig.write_image("output/matrix-profiles.png", width=1960, height=1080)
        fig.show()
        
        
    def plot_raw_matrix(self, mp, w):
        fig = sp.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.02)
        motif = np.argsort(mp[:, 0])[0]
        neighbormotif = mp[motif, 1]
        discord = np.argsort(mp[:, 0])[-1]
        maxh = int(self.ts.values.max())

        # First subplot
        fig.add_trace(go.Scatter(x=ts[target].index.strftime('%Y %b %d'), y=ts[target].values, name=target), row=1, col=1)
        fig.add_shape(type="rect", xref="x", yref="y", x0=motif, y0=0, x1=motif + w, y1=maxh, fillcolor="green", opacity=0.7, name='Motif', line=dict(color="green", width=0), row=1, col=1)
        fig.add_shape(type="rect", xref="x", yref="y", x0=neighbormotif, y0=0, x1=neighbormotif + w, y1=maxh, fillcolor="green", opacity=0.7, name='Motif neighbour', line=dict(color="green", width=0), row=1, col=1)
        fig.add_shape(type="rect", xref="x", yref="y", x0=discord, y0=0, x1=discord + w, y1=maxh, fillcolor="red", opacity=0.7, name='Discord', line=dict(color="red", width=0), row=1, col=1)
        fig.update_yaxes(title_text=target, row=1)

        maxh = mp[:, 0].max()
        # Second subplot
        fig.add_trace(go.Scatter(x=ts[target].index.strftime('%Y %b %d'), y=mp[:, 0], name="Matrix Profile"), row=2, col=1)
        fig.add_shape(type="rect", xref="x", yref="y", x0=motif, y0=0, x1=motif + w, y1=maxh, fillcolor="green", opacity=0.7, name='Motif', line=dict(color="green", width=0), row=2, col=1)
        fig.add_shape(type="rect", xref="x", yref="y", x0=neighbormotif, y0=0, x1=neighbormotif + w, y1=maxh, fillcolor="green", opacity=0.7, name='Motif neighbour', line=dict(color="green", width=0), row=2, col=1)
        fig.add_shape(type="rect", xref="x", yref="y", x0=discord, y0=0, x1=discord + w, y1=maxh, fillcolor="red", opacity=0.7, name='Discord', line=dict(color="red", width=0), row=2, col=1)
        fig.update_xaxes(title_text="Date", row=2)
        fig.update_yaxes(title_text="Matrix Profile", row=2)

        # Set layout properties
        fig.update_layout(
            title="Data Profile: Motifs, Discords",
            height=800,
            width=1000,
            xaxis=dict(tickangle=75),
            xaxis2=dict(tickangle=75),
            xaxis3=dict(tickangle=75),
            xaxis4=dict(tickangle=75),
            yaxis=dict(domain=[0.55, 1]),
            yaxis2=dict(domain=[0, 0.45]),
            yaxis3=dict(domain=[0.55, 1]),
            yaxis4=dict(domain=[0, 0.45]),
        )
        fig.show()    
    
        
stumpa = StumpAnalyzer(ts)
# stumpa.run_one_window(90)
multiwindows = [5,10, 60, 90, 180, 365, 730]
stumpa.run_many_windows(multiwindows)


Matrix Profile:
Window size: 5
Motif index 1857 - 2017 May 19
Nearest neighbor index 3076 - 2022 Mar 23
Distance between motifs: 1219
Top 10 Motif indices: 2614
Top 10 Motif dates: ['2020 May 22']
Discord index 911 - 2013 Aug 16

Matrix Profile:
Window size: 10
Motif index 2321 - 2019 Mar 26
Nearest neighbor index 2827 - 2021 Mar 29
Distance between motifs: 506
Top 10 Motif indices: 1890
Top 10 Motif dates: ['2017 Jul 07']
Discord index 1875 - 2017 Jun 15

Matrix Profile:
Window size: 60
Motif index 222 - 2010 Nov 18
Nearest neighbor index 158 - 2010 Aug 19
Distance between motifs: 64
Top 10 Motif indices: 21
Top 10 Motif dates: ['2010 Feb 03']
Discord index 1356 - 2015 May 26

Matrix Profile:
Window size: 90
Motif index 2461 - 2019 Oct 14
Nearest neighbor index 717 - 2012 Nov 07
Distance between motifs: 1744
Top 10 Motif indices: 170
Top 10 Motif dates: ['2010 Sep 07']
Discord index 1326 - 2015 Apr 13

Matrix Profile:
Window size: 180
Motif index 139 - 2010 Jul 23
Nearest neighbor in