In [None]:
import sys
sys.path.append("../")

import numpy as np
import pandas as pd
import pprint
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from kats.consts import TimeSeriesData
from statsmodels.tsa.seasonal import STL
from kats.utils.simulator import Simulator
from sklearn.preprocessing import StandardScaler
from kats.tsfeatures.tsfeatures import TsFeatures

import warnings
warnings.simplefilter(action='ignore')

In [None]:
sim = Simulator(n=90, freq="D", start = "2021-01-01") # simulate 90 days of data
random_seed = 100

# generate 10 TimeSeriesData with arima_sim
np.random.seed(random_seed) # setting numpy seed
arima_sim_list = [sim.arima_sim(ar=[0.1, 0.05], ma = [0.04, 0.1], d = 1) for _ in range(10)]

# generate 10 TimeSeriesData with trend shifts
trend_sim_list = [
    sim.trend_shift_sim(
        cp_arr = [30, 60, 75],
        trend_arr=[3, 15, 2, 8],
        intercept=30,
        noise=50,
        seasonal_period=7,
        seasonal_magnitude=np.random.uniform(10, 100),
        random_seed=random_seed
    ) for _ in range(10)
]


# generate 10 TimeSeriesData with level shifts
level_shift_list = [
    sim.level_shift_sim(
        cp_arr = [30, 60, 75],
        level_arr=[1.35, 1.05, 1.35, 1.2],
        noise=0.05,
        seasonal_period=7,
        seasonal_magnitude=np.random.uniform(0.1, 1.0),
        random_seed=random_seed
    ) for _ in range(10)
]

ts_list = arima_sim_list + trend_sim_list + level_shift_list

In [None]:
multi_ts_df = pd.read_csv("datasets/wit-10hz.csv", sep='\t')
multi_ts_df.columns = ["time", "acx", "acy", "acz", "gyx", "gyy", "gyz", "roll", "pitch", "yaw"]
multi_ts_df['timedf'] = pd.to_datetime(multi_ts_df['time'])
multi_ts_df.info()
multi_ts_df.drop('time', axis=1, inplace=True)
multi_ts_df.columns = ["acx", "acy", "acz", "gyx", "gyy", "gyz", "roll", "pitch", "yaw", "time"]
multi_ts_df.head()

In [None]:
#ts = ts_list[0]
ts = TimeSeriesData(multi_ts_df)
#ts = multi_ts_df

# plot the time series
ts.plot(cols=['acx','acy','acz'])
plt.xticks(rotation = 45)
plt.show()

In [None]:
model = TsFeatures()

# Step 2. use .transform() method, and apply on the target time series data
output_features = model.transform(ts)
output_features

In [None]:
model = TsFeatures()
output_features = [model.transform(ts) for ts in ts_list] # loop through time series data and perform transformation

In [None]:
df_features = pd.DataFrame(output_features) # converting to dataframe
df_features.head()

In [None]:
# finding the index of the time series sample with the highest seasonality strength
index_target_ts = df_features['seasonality_strength'].argmax() 

target_ts = ts_list[index_target_ts] 

# Plot the time series
target_ts.plot(cols=['value'])
plt.xticks(rotation = 45)
plt.show()

In [None]:
stl = STL(target_ts.value.values, period=7)
res = stl.fit()
plt.plot(
    pd.to_datetime(target_ts.time.values),
    res.seasonal
)
plt.xticks(rotation = 90);
plt.title(f'Seasonal component - variance: {np.round(np.var(res.seasonal), 2)}');

In [None]:
# finding the index of the time series sample with the smallest seasonality strength
index_target_ts = df_features['seasonality_strength'].argmin() 
target_ts = ts_list[index_target_ts].to_dataframe() 

# Do an STL decomposition and plot the results
stl = STL(target_ts.value.values, period=7)
res = stl.fit()
plt.plot(
    pd.to_datetime(target_ts.time.values),
    res.seasonal
)
plt.xticks(rotation = 45);
plt.title(f'Seasonal component - variance: {np.round(np.var(res.seasonal), 2)}');

In [None]:
# find the index of the time series sample with the highest entropy
index_target_ts = df_features['entropy'].argmax() 

target_ts = ts_list[index_target_ts] 

# Plot the time series
target_ts.plot(cols=['value'])
plt.xticks(rotation = 45)
plt.show()

In [None]:
# find the index of the time series sample with the lowest entropy
index_target_ts = df_features['entropy'].argmin() 
target_ts = ts_list[index_target_ts]

# Plot the time series
target_ts.plot(cols=['value'])
plt.xticks(rotation = 45)
plt.show()

In [None]:
# performing dimension reduction on the time series samples
ls_features = ['lumpiness', 'entropy', 'seasonality_strength', 'stability', 'level_shift_size']
df_dataset = df_features[ls_features]
x_2d = PCA(n_components=2).fit_transform(X=StandardScaler().fit_transform(df_dataset[ls_features].values))
df_dataset['pca_component_1'] = x_2d[:,0]
df_dataset['pca_component_2'] = x_2d[:,1]

In [None]:
# Plot the PCA projections of each time series
plt.figure(figsize = (15,8))
# Plot ARIMA time series in red
ax = df_dataset.iloc[0:10].plot(x='pca_component_1', y='pca_component_2', kind='scatter', color='red')
# Plot trend shift time series in green
df_dataset.iloc[10:20].plot(x='pca_component_1', y='pca_component_2', kind='scatter', color='green', ax=ax)
# Plot level shift time series in yellow
df_dataset.iloc[20:].plot(x='pca_component_1', y='pca_component_2', kind='scatter', color='yellow', ax=ax)

plt.title('Visualization of the dimension reduced time series samples')
plt.legend(['ARIMA', 'Trend Shift', 'Level Shift'])
plt.show()