In [1]:
import numpy as np
from scipy.spatial import distance
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

In [2]:
seed = 0
np.random.seed(seed)

In [3]:
train_file = 'data/Beef_TRAIN'

In [4]:
test_file = 'data/Beef_TEST'

# Prep Train Data

In [5]:
train_data = np.loadtxt(train_file)

In [6]:
np.random.shuffle(train_data)

In [7]:
X_train = train_data[:,1:]

In [16]:
y_train = train_data[:,0]

In [9]:
print(X_train[0])

[ 0.05932   0.059252  0.059367  0.059677  0.060162  0.060548  0.061128
  0.061507  0.06174   0.062208  0.063082  0.063917  0.064914  0.065933
  0.066705  0.067779  0.069004  0.070079  0.071097  0.072026  0.072972
  0.074065  0.074961  0.075688  0.076339  0.077263  0.077875  0.078381
  0.079081  0.079974  0.080479  0.080593  0.081714  0.08269   0.083475
  0.084856  0.086031  0.087057  0.087752  0.088645  0.089918  0.090997
  0.092619  0.094209  0.095676  0.097526  0.099507  0.101466  0.103168
  0.104676  0.106047  0.107261  0.108482  0.109722  0.110691  0.111015
  0.110353  0.10901   0.107401  0.106395  0.105731  0.105195  0.105676
  0.106562  0.107589  0.108637  0.108813  0.108071  0.105878  0.102527
  0.099287  0.097013  0.096188  0.096435  0.098962  0.103803  0.109379
  0.11531   0.121071  0.125988  0.129832  0.132161  0.133751  0.136124
  0.138426  0.13993   0.141581  0.143068  0.143892  0.143858  0.142706
  0.141114  0.139034  0.13648   0.13391   0.130977  0.126984  0.122589
  0.11

In [17]:
print(y_train)

[1. 5. 3. 2. 5. 5. 5. 2. 3. 4. 1. 3. 2. 3. 4. 4. 1. 5. 2. 1. 4. 4. 2. 2.
 5. 1. 1. 4. 3. 3.]


# Prep Test Data

In [18]:
test_data = np.loadtxt(test_file)

In [19]:
np.random.shuffle(test_data)

In [20]:
X_test = test_data[:,1:]

In [21]:
y_test = test_data[:,0]

In [22]:
print(X_test[0])

[0.011017 0.010999 0.010705 0.010713 0.010817 0.010985 0.011075 0.010923
 0.010946 0.011039 0.01111  0.011085 0.011153 0.011331 0.011505 0.011624
 0.011802 0.011841 0.011968 0.012156 0.012282 0.012337 0.012289 0.012418
 0.012562 0.012686 0.01275  0.012953 0.013388 0.013734 0.013806 0.013843
 0.013962 0.013713 0.013618 0.014183 0.014406 0.014174 0.014282 0.014685
 0.014867 0.014935 0.015094 0.015192 0.015488 0.015634 0.015749 0.01622
 0.016404 0.016632 0.016856 0.016725 0.016781 0.016792 0.016577 0.016689
 0.016632 0.016307 0.016011 0.015758 0.015774 0.015704 0.015337 0.01504
 0.015256 0.015125 0.014855 0.014984 0.014889 0.014584 0.014188 0.013943
 0.013517 0.013088 0.013162 0.013349 0.013606 0.014158 0.014486 0.014622
 0.014982 0.015368 0.015528 0.01577  0.015921 0.016174 0.016612 0.016838
 0.01716  0.017152 0.017207 0.017507 0.017346 0.017289 0.017539 0.01765
 0.017464 0.017142 0.016706 0.016085 0.015622 0.015088 0.014525 0.01429
 0.014132 0.014139 0.01419  0.01415  0.013957 0.013587 

In [23]:
print(y_test[0])

1.0


In [24]:
print(type(X_test[0][1]))

<class 'numpy.float64'>


# Train Model

In [25]:
def DTW(a, b):
    # ถ้า DTW  ทำช้าอาจต้องทำ sampling เพื่อลดปริมาณ data ทำ DTW จะได้เร็วขึ้น    
    an = a.size
    bn = b.size
    pointwise_distance = distance.cdist(a.reshape(-1,1),b.reshape(-1,1))
    cumdist = np.matrix(np.ones((an+1,bn+1)) * np.inf)
    cumdist[0,0] = 0
    
    for ai in range(an):
        for bi in range(bn):
            minimum_cost = np.min([cumdist[ai, bi+1],
                                   cumdist[ai+1, bi],
                                   cumdist[ai, bi]])
            cumdist[ai+1, bi+1] = pointwise_distance[ai,bi] + minimum_cost

    return cumdist[an, bn]

In [26]:
def DTWWithWeightNeighborCell(a,b,alpha,beta,gamma):
    an = a.size
    bn = b.size
    pointwise_distance = distance.cdist(a.reshape(-1,1),b.reshape(-1,1))
    cumdist = np.matrix(np.ones((an+1,bn+1)) * np.inf)
    cumdist[0,0] = 0
    
    for ai in range(an):
        for bi in range(bn):
            minimum_cost = np.min([alpha*cumdist[ai, bi+1],
                                   beta*cumdist[ai+1, bi],
                                   gamma*cumdist[ai, bi]])
            cumdist[ai+1, bi+1] = pointwise_distance[ai,bi] + minimum_cost

    return cumdist[an, bn]

In [27]:
clf = KNeighborsClassifier(metric =DTW,n_neighbors=1,n_jobs=-1)
clf.fit(X_train[0:5], y_train[0:5])

# Test Model

In [28]:
y_pred = clf.predict(X_test[0:5])

In [29]:
print(classification_report(y_test[0:5], y_pred[0:5]))

              precision    recall  f1-score   support

         1.0       0.33      1.00      0.50         1
         2.0       0.00      0.00      0.00         1
         3.0       1.00      0.50      0.67         2
         4.0       0.00      0.00      0.00         1
         5.0       0.00      0.00      0.00         0

    accuracy                           0.40         5
   macro avg       0.27      0.30      0.23         5
weighted avg       0.47      0.40      0.37         5



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Create Model with weight neighboring cells

In [30]:
def DTW1(a,b):
    return DTWWithWeightNeighborCell(a,b,1,1,1)

In [31]:
def DTW2(a,b):
    return DTWWithWeightNeighborCell(a,b,1,1,2)

In [32]:
def DTW3(a,b):
    return DTWWithWeightNeighborCell(a,b,1,2,1)

In [33]:
def DTW4(a,b):
    return DTWWithWeightNeighborCell(a,b,3,1,1)

In [34]:
knn1 = KNeighborsClassifier(metric =DTW1,n_neighbors=1,n_jobs=-1)
knn1.fit(X_train[0:5], y_train[0:5])

In [35]:
knn2 = KNeighborsClassifier(metric =DTW2,n_neighbors=1,n_jobs=-1)
knn2.fit(X_train[0:5], y_train[0:5])

In [36]:
knn3 = KNeighborsClassifier(metric =DTW3,n_neighbors=1,n_jobs=-1)
knn3.fit(X_train[0:5], y_train[0:5])

In [37]:
knn4 = KNeighborsClassifier(metric =DTW4,n_neighbors=1,n_jobs=-1)
knn4.fit(X_train[0:5], y_train[0:5])

In [None]:
# for i in range (0,len(DTWArray)):
#     clf = KNeighborsClassifier(metric =DTWArray[i],n_neighbors=1,n_jobs=-1)
#     clf.fit(X_train[0:5], y_train[0:5])
#     ClfArray.append(clf) 

In [38]:
y_pred1 = knn1.predict(X_test[0:5])
print(classification_report(y_test[0:5], y_pred1[0:5]))

              precision    recall  f1-score   support

         1.0       0.33      1.00      0.50         1
         2.0       0.00      0.00      0.00         1
         3.0       1.00      0.50      0.67         2
         4.0       0.00      0.00      0.00         1
         5.0       0.00      0.00      0.00         0

    accuracy                           0.40         5
   macro avg       0.27      0.30      0.23         5
weighted avg       0.47      0.40      0.37         5



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [39]:
y_pred2 = knn2.predict(X_test[0:5])
print(classification_report(y_test[0:5], y_pred2[0:5]))

              precision    recall  f1-score   support

         1.0       0.33      1.00      0.50         1
         2.0       0.00      0.00      0.00         1
         3.0       1.00      0.50      0.67         2
         4.0       0.00      0.00      0.00         1
         5.0       0.00      0.00      0.00         0

    accuracy                           0.40         5
   macro avg       0.27      0.30      0.23         5
weighted avg       0.47      0.40      0.37         5



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [40]:
y_pred3 = knn3.predict(X_test[0:5])
print(classification_report(y_test[0:5], y_pred3[0:5]))

              precision    recall  f1-score   support

         1.0       0.50      1.00      0.67         1
         2.0       0.00      0.00      0.00         1
         3.0       1.00      0.50      0.67         2
         4.0       0.00      0.00      0.00         1
         5.0       0.00      0.00      0.00         0

    accuracy                           0.40         5
   macro avg       0.30      0.30      0.27         5
weighted avg       0.50      0.40      0.40         5



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [41]:
y_pred4 = knn4.predict(X_test[0:5])
print(classification_report(y_test[0:5], y_pred4[0:5]))

              precision    recall  f1-score   support

         1.0       0.50      1.00      0.67         1
         2.0       0.00      0.00      0.00         1
         3.0       1.00      0.50      0.67         2
         4.0       0.00      0.00      0.00         1
         5.0       0.00      0.00      0.00         0

    accuracy                           0.40         5
   macro avg       0.30      0.30      0.27         5
weighted avg       0.50      0.40      0.40         5



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
