In [3]:
import pickle
import numpy as np
import lightgbm as lgb
from sklearn import preprocessing
from sklearn import datasets, metrics
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import  classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedShuffleSplit

#試取資料
file = open("feature/arkanoid_N5_20200824_10.pkl", "rb")
test = pickle.load(file)
file.close()

print(test)
print(test.shape)

[[  1.           2.           3.         ...   5.           6.
    7.        ]
 [  0.           0.          93.         ...  75.           0.
   90.17721519]
 [ 93.         395.         100.         ...  75.           1.
   82.        ]
 ...
 [126.          87.         133.         ... 160.           2.
  193.        ]
 [133.          80.         140.         ... 160.           0.
  185.        ]
 [140.          73.         147.         ... 160.           2.
  189.        ]]
(23826, 7)


In [4]:
## 初始化feature
feature = np.array([1, 2, 3, 4, 5, 6, 7])
#將所有矩陣疊加
for i in range(1, 11):
    path = "feature/arkanoid_N5_20200824_"+str(i*10)+".pkl"
    file = open(path, "rb")
    tmp = pickle.load(file)
    #去除前兩筆資料後疊加
    feature = np.vstack((feature, tmp[2:]))
    file.close()
for i in range(1, 11):
    path = "feature/arkanoid_N3_20200824_"+str(i*10)+".pkl"
    file = open(path, "rb")
    tmp = pickle.load(file)
    #去除前兩筆資料後疊加
    feature = np.vstack((feature, tmp[2:]))
    file.close()
for i in range(1, 6):
    path = "feature/arkanoid_E5_20200824_"+str(i*10)+".pkl"
    file = open(path, "rb")
    tmp = pickle.load(file)
    #去除前兩筆資料後疊加
    feature = np.vstack((feature, tmp[2:]))
    file.close()
for i in range(1, 6):
    path = "feature/arkanoid_E3_20200824_"+str(i*10)+".pkl"
    file = open(path, "rb")
    tmp = pickle.load(file)
    #去除前兩筆資料後疊加
    feature = np.vstack((feature, tmp[2:]))
    file.close()


#要去除第一筆資料並洗牌
feature = feature[1:]
print(feature)
#列洗牌
np.random.seed(5)
np.random.shuffle(feature)
np.random.seed(97)
np.random.shuffle(feature)
np.random.seed(44)
np.random.shuffle(feature)
print(feature)

#顯示資料數
print(feature.shape)

[[ 93. 395. 100. ...  75.   1.  82.]
 [100. 388. 107. ...  70.   0.  87.]
 [107. 381. 114. ...  70.   1.  82.]
 ...
 [174.  42. 167. ... 150.   0. 176.]
 [167.  49. 160. ... 155.   0. 179.]
 [160.  56. 153. ... 155.   2. 188.]]
[[  0. 199.   7. ... 160.   2. 196.]
 [182.  77. 189. ... 110.   1. 121.]
 [119. 374. 126. ...  75.   0.  89.]
 ...
 [  7. 315.  14. ...  65.   2.  93.]
 [  0.  59.   7. ...  65.   1.  48.]
 [174. 333. 167. ...  85.   2. 113.]]
(771673, 7)


In [3]:
#[上一楨球的x, 上一楨球的y, x, y, 平台x值, 平台移動模式, 平台正確x值]
#取出lightgbm需要的特徵(x_data)和對應的解(y_data)
#濾掉平台正確x值
x_data = feature[:, 0:5]
print(x_data)
print(x_data.shape)

y_data = feature[:, 5]
#y_data = y_data.reshape(y_data.size, 1)
print(y_data)
print(y_data.shape)

[[  0. 199.   7. 192. 160.]
 [182.  77. 189.  84. 110.]
 [119. 374. 126. 367.  75.]
 ...
 [  7. 315.  14. 322.  65.]
 [  0.  59.   7.  52.  65.]
 [174. 333. 167. 340.  85.]]
(771673, 5)
[2. 1. 0. ... 2. 1. 2.]
(771673,)


In [4]:
#現在球座標 - 先前球座標 = 方向 [可做可不做]
x_data[0:, 0:2] = x_data[0:, 2:4] - x_data[0:, 0:2]
print(x_data)
print(x_data.shape)
print(y_data.shape)

[[  7.  -7.   7. 192. 160.]
 [  7.   7. 189.  84. 110.]
 [  7.  -7. 126. 367.  75.]
 ...
 [  7.   7.  14. 322.  65.]
 [  7.  -7.   7.  52.  65.]
 [ -7.   7. 167. 340.  85.]]
(771673, 5)
(771673,)


In [13]:
#資料數量等化 [可做可不做]

t0 = np.sum(y_data == 0)
t1 = np.sum(y_data == 1)
t2 = np.sum(y_data == 2)
print(t0, t1, t2)

#根據最小的值來做削減
c0 = t0 - min(t0, t1, t2)
c1 = t1 - min(t0, t1, t2)
c2 = t2 - min(t0, t1, t2)
print(c0, c1, c2)
total = c0 + c1 + c2
print("total: ", total)

#由後往前，避免index改變
for j in range(y_data.size-1, -1, -1):
    print(c0 + c1 + c2, end='\r')
    if(y_data[j] == 0):
        if(c0 != 0):
            c0 -= 1
            x_data = np.delete(x_data, j, axis = 0)
            y_data = np.delete(y_data, j, axis = 0)
    elif(y_data[j] == 1):
        if(c1 != 0):
            c1 -= 1
            x_data = np.delete(x_data, j, axis = 0)
            y_data = np.delete(y_data, j, axis = 0)
    else:
        if(c2 != 0):
            c2 -= 1
            x_data = np.delete(x_data, j, axis = 0)
            y_data = np.delete(y_data, j, axis = 0)
            
    if(c0==0 and c1==0 and c2==0):
        break

f0 = np.sum(y_data == 0)
f1 = np.sum(y_data == 1)
f2 = np.sum(y_data == 2)
print(f0, f1, f2)

432996 170761 167916
265080 2845 0
total:  267925
167916 167916 167916


In [24]:
#資料標準化 [可做可不做]
# x_data = preprocessing.scale(x_data)
# print(x_data)

In [5]:
#訓練前最後資料狀愾確認
print(x_data)
print(x_data.shape)
print(y_data)
print(y_data.shape)

#儲存陣列
# file = open('arkanoid_N5N3E5E3_20200824_perpared_x_data.pkl', 'wb')
# pickle.dump(x_data, file)
# file.close()
# file = open('arkanoid_N5N3E5E3_20200824_perpared_y_data.pkl', 'wb')
# pickle.dump(y_data, file)
# file.close()

[[  7.  -7.   7. 192. 160.]
 [  7.   7. 189.  84. 110.]
 [  7.  -7. 126. 367.  75.]
 ...
 [  7.   7.  14. 322.  65.]
 [  7.  -7.   7.  52.  65.]
 [ -7.   7. 167. 340.  85.]]
(771673, 5)
[2. 1. 0. ... 2. 1. 2.]
(771673,)


In [3]:
#直接取資料來用
file = open("arkanoid_N5_20200823_perpared_x_data.pkl", "rb")
x_data = pickle.load(file)
file.close()
file = open("arkanoid_N5_20200823_perpared_y_data.pkl", "rb")
y_data = pickle.load(file)
file.close()

In [7]:
#資料劃分
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3, random_state=9)
#參數區間
param_grid = {'n_neighbors':[5, 10]}
#交叉驗證 n_splits多可以驗證資料的一致性，但浪費時間效益不大
cv = StratifiedShuffleSplit(n_splits=1, test_size=0.3, random_state=12)
grid = GridSearchCV(KNeighborsClassifier(), param_grid, cv=cv, verbose=10, n_jobs=-1)
grid.fit(x_train, y_train)
grid_predictions = grid.predict(x_test)

#儲存
file = open('arkanoid_NE_20200902_svm_model.pkl', 'wb')
pickle.dump(grid, file)
file.close()

Fitting 1 folds for each of 2 candidates, totalling 2 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:   54.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:   54.0s finished


In [8]:
#最佳參數
print(grid.best_params_)
#預測結果
#print(grid_predictions)
#混淆矩陣
print(confusion_matrix(y_test, grid_predictions))
#分類結果
print(classification_report(y_test, grid_predictions))

{'n_neighbors': 10}
[[119937   4427   5162]
 [ 19767  30967    771]
 [ 20227    858  29386]]
              precision    recall  f1-score   support

         0.0       0.75      0.93      0.83    129526
         1.0       0.85      0.60      0.71     51505
         2.0       0.83      0.58      0.69     50471

    accuracy                           0.78    231502
   macro avg       0.81      0.70      0.74    231502
weighted avg       0.79      0.78      0.77    231502

