In [49]:
import traja
from traja import TrajaCollection
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import ShuffleSplit

In [50]:
# create trajectories with shorter step lengths

num_short = 500
x_short_nps = []
y_short_nps = []
short_trjs = {ind: traja.generate(n=1000, random=True,step_length= 2, seed=ind) for ind in range(num_short)}
for trj in short_trjs:
    derivs = traja.get_derivatives(short_trjs[trj])
    ang = traja.calc_angle(short_trjs[trj])
    short_trjs[trj] = short_trjs[trj].join(derivs).join(ang.rename('angles'))
    short_trjs[trj].drop(['displacement_time', 'speed_times', 'acceleration_times'], inplace=True, axis=1)
    # print(short_trjs[trj][short_trjs[trj].isnull().any(axis=1)])
    
    short_trjs[trj].to_numpy
    x_short_nps.append(short_trjs[trj])
    y_short_nps.append(0)
    

x_short_data = np.dstack(x_short_nps)
print(x_short_data.shape)


(1000, 7, 500)


In [51]:
# create trajectories with longer step lengths
# data set now has shorter and longer step lengths

num_long = 500
x_long_nps = [x_short_data]
long_trjs = {ind: traja.generate(n=1000, random=True,step_length= 5, seed=ind+num_short) for ind in range(num_long)}
for trj in long_trjs:
    derivs = traja.get_derivatives(long_trjs[trj])
    ang = traja.calc_angle(long_trjs[trj])
    long_trjs[trj] = long_trjs[trj].join(derivs).join(ang.rename('angles'))
    long_trjs[trj].drop(['displacement_time', 'speed_times', 'acceleration_times'], inplace=True, axis=1)
    long_trjs[trj].to_numpy
    x_long_nps.append(long_trjs[trj])
    y_short_nps.append(1)

X = np.dstack(x_long_nps)
X = np.transpose(X)
Y = np.array(y_short_nps)
print(X.shape)
print(Y.shape)

(1000, 7, 1000)
(1000,)


In [52]:
# shuffle + split data set into training and testing set
from sklearn.model_selection import train_test_split
all_indices = list(range(num_short+num_long))
train_ind, test_ind = train_test_split(all_indices, test_size=0.5)
print(train_ind)
print(test_ind)

X_train = X[train_ind, :, 2:]
X_test = X[test_ind, :, 2:]
Y_train = Y[train_ind]
Y_test = Y[test_ind]




[792, 249, 788, 773, 359, 314, 668, 392, 380, 30, 667, 225, 84, 754, 806, 277, 192, 940, 8, 728, 161, 634, 440, 865, 999, 589, 716, 310, 114, 384, 943, 574, 700, 951, 621, 727, 765, 883, 886, 650, 299, 996, 173, 736, 307, 902, 48, 595, 919, 27, 435, 235, 115, 770, 408, 887, 193, 289, 22, 306, 263, 242, 915, 993, 350, 436, 465, 607, 172, 251, 484, 513, 579, 748, 178, 927, 649, 511, 585, 803, 856, 186, 664, 215, 268, 910, 555, 211, 669, 916, 315, 908, 808, 710, 858, 903, 400, 427, 65, 187, 413, 360, 57, 422, 451, 713, 295, 9, 194, 850, 553, 302, 54, 313, 949, 601, 470, 38, 697, 472, 663, 159, 446, 278, 31, 62, 590, 835, 776, 514, 429, 543, 230, 271, 24, 726, 775, 23, 406, 297, 195, 990, 981, 539, 829, 599, 972, 551, 32, 332, 283, 414, 220, 542, 181, 95, 586, 721, 206, 239, 370, 671, 603, 69, 740, 636, 399, 611, 357, 131, 722, 784, 382, 456, 339, 88, 926, 328, 816, 259, 250, 207, 355, 967, 505, 986, 769, 797, 738, 376, 593, 334, 988, 333, 594, 624, 950, 330, 183, 628, 467, 410, 796, 512, 

In [53]:
# Try time-series concatenation
from sktime.transformations.panel.compose import ColumnConcatenator
from sklearn.pipeline import Pipeline
from sktime.classification.interval_based import TimeSeriesForestClassifier


steps = [
    ("concatenate", ColumnConcatenator()),
    ("classify", TimeSeriesForestClassifier(n_estimators=10)),
]
clf = Pipeline(steps)

from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate

k_fold = StratifiedKFold(n_splits=4, shuffle=True, random_state=0)

output = cross_validate(clf, X[:,:,2:], Y, cv=k_fold, scoring = 'roc_auc', return_estimator =True)
print(output)

{'fit_time': array([9.65801597, 9.78967214, 9.66700196, 8.92758417]), 'score_time': array([2.80781603, 2.94097781, 2.82999706, 2.50501704]), 'estimator': [Pipeline(steps=[('concatenate', ColumnConcatenator()),
                ('classify', TimeSeriesForestClassifier(n_estimators=10))]), Pipeline(steps=[('concatenate', ColumnConcatenator()),
                ('classify', TimeSeriesForestClassifier(n_estimators=10))]), Pipeline(steps=[('concatenate', ColumnConcatenator()),
                ('classify', TimeSeriesForestClassifier(n_estimators=10))]), Pipeline(steps=[('concatenate', ColumnConcatenator()),
                ('classify', TimeSeriesForestClassifier(n_estimators=10))])], 'test_score': array([1., 1., 1., 1.])}


In [57]:
ColumnConcatenator.transform(X[:,:,2:])

TypeError: transform() missing 1 required positional argument: 'X'

In [54]:
clf.fit(X_train, Y_train)
score = clf.score(X_test, Y_test)
Y_pred = clf.predict(X_test)
print(Y_pred)
print(Y_test)
print(score)

[0 0 0 0 0 1 0 1 0 0 1 1 1 0 0 0 1 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 1 1 0 1 0
 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 1 0 0 1 0 0 0 1 1 1 0 0 0 1 0 1 1 1 1 1 1 0
 0 0 1 0 1 1 1 0 1 1 0 0 1 0 0 0 0 1 1 1 0 1 0 1 1 0 1 0 1 0 0 0 1 0 0 0 1
 1 0 1 1 1 1 1 1 0 1 1 0 1 1 1 0 0 1 0 1 0 1 1 1 1 1 1 0 1 1 0 0 0 1 0 0 1
 1 1 1 1 1 0 1 0 0 1 0 0 0 1 0 0 1 0 1 1 0 0 0 1 0 0 1 1 1 0 0 1 1 1 1 0 0
 1 1 0 1 1 1 1 0 0 1 0 1 1 1 1 1 0 1 1 0 1 1 1 1 1 0 1 0 1 1 1 1 1 0 0 0 1
 1 0 0 0 1 1 0 0 0 1 1 1 0 0 0 0 1 1 1 0 0 1 0 1 0 1 1 1 1 1 0 1 0 1 0 0 1
 0 1 0 1 0 1 1 0 0 1 1 0 0 1 0 1 1 1 0 1 1 1 1 0 0 1 0 0 1 0 1 1 1 0 0 0 0
 1 1 1 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
 1 0 0 1 1 0 1 1 1 0 0 1 1 1 0 1 1 1 1 0 1 1 1 1 1 0 0 1 0 1 1 0 1 0 1 0 1
 0 0 1 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 0 0 0 1 1 0 0 1 0 0 0 0 1 1 0 0 0 1 1
 1 0 0 1 1 1 1 1 0 1 1 0 0 0 1 1 0 0 1 0 0 0 0 0 1 1 0 1 1 1 0 0 0 0 1 0 1
 0 1 0 0 0 0 1 1 0 0 1 1 0 1 0 0 0 1 1 1 1 0 1 1 1 1 0 1 0 1 0 1 0 1 0 0 1
 0 0 1 0 0 1 0 0 0 1 1 1 

In [170]:
# Use rocket Classifier
from sktime.classification.kernel_based import RocketClassifier
from sklearn.metrics import accuracy_score

rocket = RocketClassifier()
rocket.fit(X_train, Y_train)
Y_pred = rocket.predict(X_test)
accuracy_score(Y_test, Y_pred)



0.375

In [155]:
# Try Column Ensembling
from sktime.classification.compose import ColumnEnsembleClassifier
from sktime.classification.dictionary_based import BOSSEnsemble
clf = ColumnEnsembleClassifier(
    estimators=[
        ("TSF0", TimeSeriesForestClassifier(n_estimators=10), [0]),
        ("BOSSEnsemble3", BOSSEnsemble(max_ensemble_size=5), [3]),
    ]
)
clf.fit(X_train, Y_train)
clf.score(X_test, Y_test)

KeyboardInterrupt: 

In [126]:
!ln -s /Users/kellyfang/opt/anaconda3/envs/env_pytorch/lib/python3.6/site-packages/sktime/distances/elastic_cython.cpython-36m-darwin.so /usr/local/opt/libomp/lib/libomp.dylib

ln: /usr/local/opt/libomp/lib/libomp.dylib: File exists


In [130]:
# try time-series clustering
import sktime.clustering as cluster
b = cluster.BaseClusterer(n_cluster=2)
b.predict(X_train)

AttributeError: module 'sktime.clustering' has no attribute 'BaseClusterer'

[31mERROR: Could not find a version that satisfies the requirement sktime==0.11.4 (from versions: 0.1.dev0, 0.1.0, 0.2.0, 0.3.0, 0.4.0, 0.4.1, 0.4.2, 0.4.3, 0.5.0, 0.5.1, 0.5.2, 0.5.3, 0.6.0, 0.6.1, 0.7.0, 0.8.0, 0.8.1, 0.9.0)[0m
[31mERROR: No matching distribution found for sktime==0.11.4[0m
