# Data Reproductivity Check

In [1]:
from sktime.transformations.panel.rocket import Rocket
import seaborn as sns
import matplotlib.pyplot as plt

from dataloader.dataset import UniformSegmentDataset
from param import *
from utils.util import *

sns.set_theme(style="whitegrid")
output_dir = ParamDir().output_dir
datalist = ParamDir().data_path_list


### Non-shuffle

In [None]:
data_dir = datalist[2] # A WT mouse
dataset1 = UniformSegmentDataset(data_dir, ParamData().mobility, False, ParamData().random_state)
(X_train1, y_train1), (_) = dataset1.load_all_data(ParamData().window_size, ParamData().train_ratio, ParamData().K)

dataset2 = UniformSegmentDataset(data_dir, ParamData().mobility, False, ParamData().random_state)
(X_train2, y_train2), (_) = dataset2.load_all_data(ParamData().window_size, ParamData().train_ratio, ParamData().K)

In [None]:
print(f"Shape \nX1:{X_train1.shape}, X2:{X_train2.shape}")

Shape 
X1:(56, 66, 16), X2:(56, 66, 16)


In [None]:
(X_train1 == X_train2).all()

False

#### `split_data`

In [None]:
(X_train1, y_train1), (_) = dataset1.split_data(dataset1.X, dataset1.y, ParamData().train_ratio)
(X_train2, y_train2), (_) = dataset2.split_data(dataset2.X, dataset2.y, ParamData().train_ratio)
(X_train1 == X_train2).all()

True

#### `segment_with_threshold` and `get_segment_data`

In [None]:
K, window_size = 16, 8
segment_ind = segment_with_threshold(y_train1, K) # get the segmentation indices
X_train_new1, y_train1 = get_segment_data(segment_ind, K, window_size, X_train1, y_train1)
# test set
segment_ind = segment_with_threshold(y_train2, K) # get the segmentation indices
X_train_new2, y_train2 = get_segment_data(segment_ind, K, window_size, X_train2, y_train2)

(X_train_new1 == X_train_new2).all()

True

#### `downsample`

In [None]:
X_train1, y_train1 = downsample(X_train_new1, y_train1)
X_train2, y_train2 = downsample(X_train_new2, y_train2)

print(f"X: {(X_train1 == X_train2).all()}, y: {(y_train1 == y_train2).all()}")

X: False, y: True


#### Result
after seeding `downsample`

In [None]:
data_dir = datalist[2] # A WT mouse
dataset1 = UniformSegmentDataset(data_dir, ParamData().mobility, False, ParamData().random_state)
(X_train1, y_train1), (X_test1, y_test1) = dataset1.load_all_data(ParamData().window_size, ParamData().train_ratio, ParamData().K)

dataset2 = UniformSegmentDataset(data_dir, ParamData().mobility, False, ParamData().random_state)
(X_train2, y_train2), (X_test2, y_test2) = dataset2.load_all_data(ParamData().window_size, ParamData().train_ratio, ParamData().K)

print(f"Train:\nX:{(X_train1 == X_train2).all()}, y:{(y_train1 == y_train2).all()}")
print(f"Test:\nX:{(X_test1 == X_test2).all()}, y:{(y_test1 == y_test2).all()}")

Train:
X:True, y:True
Test:
X:True, y:True


### Shuffle

#### "behavior shuffling"

In [None]:
data_dir = datalist[2] # A WT mouse
dataset1 = UniformSegmentDataset(data_dir, ParamData().mobility, "behavior shuffling", ParamData().random_state)
(X_train1, y_train1), (X_test1, y_test1) = dataset1.load_all_data(ParamData().window_size, ParamData().train_ratio, ParamData().K)

dataset2 = UniformSegmentDataset(data_dir, ParamData().mobility, "behavior shuffling", ParamData().random_state)
(X_train2, y_train2), (X_test2, y_test2) = dataset2.load_all_data(ParamData().window_size, ParamData().train_ratio, ParamData().K)

print(f"Shape \nX1:{X_train1.shape}, X2:{X_train2.shape}")

Shape 
X1:(72, 66, 16), X2:(52, 66, 16)


##### `_shuffle`

In [None]:
print(f"random_state:{dataset1.randome_state}")
coords_xy1 = dataset1.coords_xy
dataset1._shuffle()
coords_xy2 = dataset1.coords_xy
print(f"coords_xy: {(coords_xy1 == coords_xy2).all()}")

random_state:20230411
coords_xy: False


##### Results

In [None]:
data_dir = datalist[2] # A WT mouse
dataset1 = UniformSegmentDataset(data_dir, ParamData().mobility, "behavior shuffling", ParamData().random_state)
(X_train1, y_train1), (X_test1, y_test1) = dataset1.load_all_data(ParamData().window_size, ParamData().train_ratio, ParamData().K)

dataset2 = UniformSegmentDataset(data_dir, ParamData().mobility, "behavior shuffling", ParamData().random_state)
(X_train2, y_train2), (X_test2, y_test2) = dataset2.load_all_data(ParamData().window_size, ParamData().train_ratio, ParamData().K)

print(f"Train:\nX:{(X_train1 == X_train2).all()}, y:{(y_train1 == y_train2).all()}")
print(f"Test:\nX:{(X_test1 == X_test2).all()}, y:{(y_test1 == y_test2).all()}")

Train:
X:True, y:True
Test:
X:True, y:True


#### "segment label shuffling"

In [2]:
data_dir = datalist[2] # A WT mouse
dataset1 = UniformSegmentDataset(data_dir, ParamData().mobility, "segment label shuffling", ParamData().random_state)
(X_train1, y_train1), (X_test1, y_test1) = dataset1.load_all_data(ParamData().window_size, ParamData().K, ParamData().train_ratio)

dataset2 = UniformSegmentDataset(data_dir, ParamData().mobility, "segment label shuffling", ParamData().random_state)
(X_train2, y_train2), (X_test2, y_test2) = dataset2.load_all_data(ParamData().window_size, ParamData().K, ParamData().train_ratio)

print(f"Shape \nX1:{X_train1.shape}, X2:{X_train2.shape}")

Shape 
X1:(56, 66, 16), X2:(56, 66, 16)


##### Results

In [5]:
data_dir = datalist[2] # A WT mouse
dataset1 = UniformSegmentDataset(data_dir, ParamData().mobility, "segment label shuffling", ParamData().random_state)
(X_train1, y_train1), (X_test1, y_test1) = dataset1.load_all_data(ParamData().window_size, ParamData().K, ParamData().train_ratio)

dataset2 = UniformSegmentDataset(data_dir, ParamData().mobility, "segment label shuffling", ParamData().random_state)
(X_train2, y_train2), (X_test2, y_test2) = dataset2.load_all_data(ParamData().window_size, ParamData().K, ParamData().train_ratio)

print(f"Train:\nX:{(X_train1 == X_train2).all()}, y:{(y_train1 == y_train2).all()}")
print(f"Test:\nX:{(X_test1 == X_test2).all()}, y:{(y_test1 == y_test2).all()}")

Train:
X:True, y:True
Test:
X:True, y:True
