In [49]:
import numpy as np
from scipy.spatial import distance
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import pandas as pd

In [2]:
seed = 0
np.random.seed(seed)

In [3]:
train_file = 'data/Beef_TRAIN'

In [4]:
test_file = 'data/Beef_TEST'

# Prep Train Data

In [5]:
train_data = np.loadtxt(train_file)

In [6]:
np.random.shuffle(train_data)

In [7]:
X_train = train_data[:,1:]

In [8]:
y_train = train_data[:,0]

In [9]:
print(X_train[0])

[ 0.05932   0.059252  0.059367  0.059677  0.060162  0.060548  0.061128
  0.061507  0.06174   0.062208  0.063082  0.063917  0.064914  0.065933
  0.066705  0.067779  0.069004  0.070079  0.071097  0.072026  0.072972
  0.074065  0.074961  0.075688  0.076339  0.077263  0.077875  0.078381
  0.079081  0.079974  0.080479  0.080593  0.081714  0.08269   0.083475
  0.084856  0.086031  0.087057  0.087752  0.088645  0.089918  0.090997
  0.092619  0.094209  0.095676  0.097526  0.099507  0.101466  0.103168
  0.104676  0.106047  0.107261  0.108482  0.109722  0.110691  0.111015
  0.110353  0.10901   0.107401  0.106395  0.105731  0.105195  0.105676
  0.106562  0.107589  0.108637  0.108813  0.108071  0.105878  0.102527
  0.099287  0.097013  0.096188  0.096435  0.098962  0.103803  0.109379
  0.11531   0.121071  0.125988  0.129832  0.132161  0.133751  0.136124
  0.138426  0.13993   0.141581  0.143068  0.143892  0.143858  0.142706
  0.141114  0.139034  0.13648   0.13391   0.130977  0.126984  0.122589
  0.11

In [10]:
print(y_train)

[1. 5. 3. 2. 5. 5. 5. 2. 3. 4. 1. 3. 2. 3. 4. 4. 1. 5. 2. 1. 4. 4. 2. 2.
 5. 1. 1. 4. 3. 3.]


# Prep Test Data

In [11]:
test_data = np.loadtxt(test_file)

In [12]:
np.random.shuffle(test_data)

In [13]:
X_test = test_data[:,1:]

In [14]:
y_test = test_data[:,0]

In [None]:
print(X_test[0])

In [None]:
print(y_test[0])

In [None]:
print(type(X_test[0][1]))

# Train Model

In [15]:
def DTW(a, b):
    # ถ้า DTW  ทำช้าอาจต้องทำ sampling เพื่อลดปริมาณ data ทำ DTW จะได้เร็วขึ้น    
    an = a.size
    bn = b.size
    pointwise_distance = distance.cdist(a.reshape(-1,1),b.reshape(-1,1))
    cumdist = np.matrix(np.ones((an+1,bn+1)) * np.inf)
    cumdist[0,0] = 0
    
    for ai in range(an):
        for bi in range(bn):
            minimum_cost = np.min([cumdist[ai, bi+1],
                                   cumdist[ai+1, bi],
                                   cumdist[ai, bi]])
            cumdist[ai+1, bi+1] = pointwise_distance[ai,bi] + minimum_cost

    return cumdist[an, bn]

In [52]:
def DTWWithWeightNeighborCell(a,b,**kwargs):
    # print("weight",kwargs["alpha"],kwargs["beta"],kwargs["gamma"])
    an = a.size
    bn = b.size
    pointwise_distance = distance.cdist(a.reshape(-1,1),b.reshape(-1,1))
    cumdist = np.matrix(np.ones((an+1,bn+1)) * np.inf)
    cumdist[0,0] = 0
    
    for ai in range(an):
        for bi in range(bn):
            minimum_cost = np.min([kwargs["alpha"]*cumdist[ai, bi+1],
                                   kwargs["beta"]*cumdist[ai+1, bi],
                                   kwargs["gamma"]*cumdist[ai, bi]])
            cumdist[ai+1, bi+1] = pointwise_distance[ai,bi] + minimum_cost

    return cumdist[an, bn]

In [None]:
clf = KNeighborsClassifier(metric =DTW,n_neighbors=1,n_jobs=-1)
clf.fit(X_train[0:5], y_train[0:5])

# Test Model

In [None]:
y_pred = clf.predict(X_test[0:5])

In [None]:
print(classification_report(y_test[0:5], y_pred[0:5]))

# Create Model with weight neighboring cells

In [56]:
parameters = {'n_neighbors':[1],'metric_params':[{"alpha":1,"beta":1,"gamma":1},{"alpha":1,"beta":1,"gamma":2}]}
clf = GridSearchCV(KNeighborsClassifier(metric =DTWWithWeightNeighborCell), parameters, cv=2,verbose=10,n_jobs=-1)
clf.fit(X_train, y_train)

Fitting 2 folds for each of 2 candidates, totalling 4 fits
[CV 1/2; 1/2] START metric_params={'alpha': 1, 'beta': 1, 'gamma': 1}, n_neighbors=1
[CV 1/2; 1/2] END metric_params={'alpha': 1, 'beta': 1, 'gamma': 1}, n_neighbors=1;, score=0.333 total time= 7.7min
[CV 2/2; 1/2] START metric_params={'alpha': 1, 'beta': 1, 'gamma': 1}, n_neighbors=1
[CV 2/2; 1/2] END metric_params={'alpha': 1, 'beta': 1, 'gamma': 1}, n_neighbors=1;, score=0.400 total time= 7.6min
[CV 1/2; 2/2] START metric_params={'alpha': 1, 'beta': 1, 'gamma': 2}, n_neighbors=1
[CV 1/2; 2/2] END metric_params={'alpha': 1, 'beta': 1, 'gamma': 2}, n_neighbors=1;, score=0.333 total time= 7.8min
[CV 2/2; 2/2] START metric_params={'alpha': 1, 'beta': 1, 'gamma': 2}, n_neighbors=1
[CV 2/2; 2/2] END metric_params={'alpha': 1, 'beta': 1, 'gamma': 2}, n_neighbors=1;, score=0.400 total time= 7.6min


In [57]:
grid_search_result = clf.cv_results_
pd.DataFrame.from_dict(grid_search_result)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_metric_params,param_n_neighbors,params,split0_test_score,split1_test_score,mean_test_score,std_test_score,rank_test_score
0,0.0005,0.0005,459.195146,1.483151,"{'alpha': 1, 'beta': 1, 'gamma': 1}",1,"{'metric_params': {'alpha': 1, 'beta': 1, 'gam...",0.333333,0.4,0.366667,0.033333,1
1,0.001504,0.000496,463.826981,5.088543,"{'alpha': 1, 'beta': 1, 'gamma': 2}",1,"{'metric_params': {'alpha': 1, 'beta': 1, 'gam...",0.333333,0.4,0.366667,0.033333,1


In [63]:
model = clf.best_estimator_

In [67]:
y_pred = model.predict(X_test[0:2])
print(classification_report(y_test[0:2], y_pred[0:2]))

              precision    recall  f1-score   support

         1.0       1.00      1.00      1.00         1
         3.0       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

