**统计特征提取**

In [80]:
import numpy as np

train_x = np.load('/home/intelligence/Robin/Dataset/save_raw_data/pocket_test_win_x.npy')
print(f'train_x shape:{train_x.shape}')

train_x shape:(11924, 300, 9)


In [81]:
mean_feature_dim = np.array([])
std_feature_dim = np.array([])
range_feature_dim = np.array([])
var_feature_dim = np.array([])
for i in range(len(train_x)):
    mean_win = np.mean(train_x[i], axis=0).reshape(1,-1)
    std_win = np.std(train_x[i], axis=0).reshape(1,-1)
    variance = np.var(train_x[i], axis=0).reshape(1,-1)

    data_max = np.max(train_x[i], axis=0).reshape(1,-1)
    data_min = np.min(train_x[i], axis=0).reshape(1,-1)
    range_win = (data_max - data_min)

    mean_feature_dim = np.concatenate((mean_feature_dim, mean_win), axis=0) if mean_feature_dim.size else mean_win  # 平均值
    std_feature_dim = np.concatenate((std_feature_dim, std_win), axis=0) if std_feature_dim.size else std_win  # 标准差
    range_feature_dim = np.concatenate((range_feature_dim, range_win), axis=0) if range_feature_dim.size else range_win  # 范围
    var_feature_dim = np.concatenate((var_feature_dim, variance), axis=0) if var_feature_dim.size else variance  # 方差

from scipy import stats
# 提取偏度和峰度
Skewness_feature_dim = np.array([])
kurtosis_feature_dim = np.array([])
for i in range(len(train_x)):
    Skewness = stats.skew(train_x[i]).reshape(1,-1)
    Kurtosis = stats.kurtosis(train_x[i]).reshape(1,-1)
    Skewness_feature_dim = np.concatenate((Skewness_feature_dim, Skewness), axis=0) if Skewness_feature_dim.size else Skewness
    kurtosis_feature_dim = np.concatenate((kurtosis_feature_dim, Kurtosis), axis=0) if kurtosis_feature_dim.size else Kurtosis


feature_train_x = np.hstack((mean_feature_dim, std_feature_dim, var_feature_dim, range_feature_dim, Skewness_feature_dim,kurtosis_feature_dim))
np.save('/home/intelligence/Robin/Dataset/feature_data/feature_pocket_test_x.npy', feature_train_x)

**SVM/RB/KNN**

In [82]:
import numpy as np
train_x = np.load('/home/intelligence/Robin/Dataset/feature_data/feature_pocket_train_x.npy',allow_pickle=True)
train_y = np.load('/home/intelligence/Robin/Dataset/feature_data/pocket_train_win_y.npy',allow_pickle=True)
test_x = np.load('/home/intelligence/Robin/Dataset/feature_data/feature_pocket_test_x.npy',allow_pickle=True)
test_y = np.load('/home/intelligence/Robin/Dataset/feature_data/pocket_test_win_y.npy',allow_pickle=True)

print("x_train: {}, x_test: {}, y_train: {}, y_test: {}, ".format(train_x.shape, test_x.shape,train_y.shape,test_y.shape))

x_train: (27820, 54), x_test: (11924, 54), y_train: (27820,), y_test: (11924,), 


In [83]:
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score

classifier = svm.SVC()
parameters = [{'kernel': ['rbf'], 'gamma': [0.01, 0.001, 0.0001, 0.00001], 'C': [1, 10, 100, 1000]}]
model = GridSearchCV(classifier,parameters,n_jobs=-1,cv=4,verbose=1)
model.fit(train_x,train_y)

pred_y = model.predict(test_x)
metrics = f1_score(test_y, pred_y, average='micro')
print('Best Parameters: '+ str(model.best_params_))
print('F1 Score: '+ str(metrics * 100) + ' %')

Fitting 4 folds for each of 16 candidates, totalling 64 fits
Best Parameters: {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
F1 Score: 78.56424018785644 %


In [84]:
from sklearn.ensemble import RandomForestClassifier

classifier = RandomForestClassifier()
parameters = {'n_estimators': [10, 100, 1000], 'max_depth': [3, 6, 9], 'max_features' : ['auto', 'log2']}
model=GridSearchCV(classifier,parameters,n_jobs=-1,cv=4,scoring='f1_micro',verbose=4)
model.fit(train_x,train_y)

pred_y = model.predict(test_x)
metrics = f1_score(test_y, pred_y, average='micro')
print('Best Parameters: '+ str(model.best_params_))
print('F1 Score: '+ str(metrics * 100) + ' %')

Fitting 4 folds for each of 18 candidates, totalling 72 fits
Best Parameters: {'max_depth': 9, 'max_features': 'auto', 'n_estimators': 1000}
F1 Score: 82.53102985575312 %


In [85]:
from sklearn.neighbors import KNeighborsClassifier

#Finding the optimal model by varying the no. of neighbors
scores = []
for i in range(1, 30):
    knn = KNeighborsClassifier(n_neighbors = i, n_jobs = -1)
    knn.fit(train_x, train_y)
    pred_y = knn.predict(test_x)
    metrics = f1_score(test_y, pred_y, average='micro')
    scores.append(metrics)

scores = np.array(scores)
print('Optimal No. Of Neighbors:{}'.format(scores.argmax()+1))
print('F1 Score: '+ str(scores.max() * 100) + ' %')

[CV 4/4] END max_depth=3, max_features=auto, n_estimators=1000;, score=0.563 total time=  40.3s
[CV 2/4] END max_depth=3, max_features=auto, n_estimators=1000;, score=0.539 total time=  40.4s
[CV 3/4] END max_depth=3, max_features=auto, n_estimators=1000;, score=0.545 total time=  40.9s
[CV 1/4] END max_depth=3, max_features=auto, n_estimators=1000;, score=0.557 total time=  41.0s
Optimal No. Of Neighbors:1
F1 Score: 68.82757463938276 %
