In [57]:
from sklearn.svm import SVC
import pandas as pd              
import numpy as np  
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_validate, RepeatedStratifiedKFold, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier as rf
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.preprocessing import normalize, scale
from pprint import pprint

In [58]:
raw_data = pd.read_csv('final_data.csv')
raw_data.head()

Unnamed: 0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,...,t493,t494,t495,t496,t497,t498,t499,t500,t501,y
0,0.019336,0.0,0.0,0.0,0.003223,0.0,0.0,0.0,0.0,0.0,...,0.029004,0.009668,0.012891,0.0,0.0,0.0,0.003223,0.003223,0.0,0
1,0.0,0.0,0.012891,0.0,0.016113,0.0,0.006445,0.0,0.003223,0.022559,...,0.0,0.0,0.0,0.009668,0.0,0.0,0.0,0.009668,0.0,0
2,0.0,0.009668,0.0,0.0,0.006445,0.012891,0.0,0.0,0.029004,0.025781,...,0.006445,0.003223,0.012891,0.0,0.0,0.0,0.0,0.003223,0.0,0
3,0.0,0.0,0.0,0.016113,0.006445,0.003223,0.0,0.022559,0.012891,0.0,...,0.0,0.003223,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,0.016113,0.0,0.0,0.0,0.012891,0.0,0.0,0.003223,0.003223,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003223,0.0,0


In [60]:
def make_df(x):
    mean_value = np.apply_along_axis(get_mean, 1, x)
    std_value = np.apply_along_axis(get_std, 1, x)
    skewness_value = np.apply_along_axis(get_skewness, 1, x)
    kurtosis_value = np.apply_along_axis(get_kurtosis, 1, x)
    p2p_value = np.apply_along_axis(get_p2p, 1, x)
    rms_value = np.apply_along_axis(get_rms, 1, x)
    crestFactor_value = np.apply_along_axis(get_crestFactor, 1, x)
    shapeFactor_value = np.apply_along_axis(get_shapeFactor, 1, x)
    marginFactor_value = np.apply_along_axis(get_marginFactor, 1, x)
    impulseFactor_value = np.apply_along_axis(get_impulseFactor, 1, x)
    
    df = pd.DataFrame({'mean':mean_value, 'std':std_value, 'skewness':skewness_value, 'kurtosis':kurtosis_value, 'p2p':p2p_value, 'rms':rms_value,
                       'crest':crestFactor_value, 'shape':shapeFactor_value, 'margin':marginFactor_value, 'impulse':impulseFactor_value, 'y':raw_data['y']})
    
    return(df)

In [61]:
raw_data_new = make_df(raw_data)

#raw_data_new_normalized = pd.DataFrame(scale(raw_data_new.iloc[:,:-1], axis = 0))
#raw_data_new_normalized['y'] = raw_data['y']
raw_data_new_normalized = raw_data_new

raw_data_new_normalized.head()

Unnamed: 0,mean,std,skewness,kurtosis,p2p,rms,crest,shape,margin,impulse,y
0,0.007941,0.018704,5.229514,39.909885,0.199805,0.02032,9.832731,2.55889,75.581889,25.160873,0
1,0.008243,0.021776,6.284033,55.605198,0.25459,0.023284,10.933988,2.824797,105.119178,30.886293,0
2,0.008121,0.018945,4.607522,31.27307,0.190137,0.020612,9.224341,2.538223,72.899359,23.413439,0
3,0.007594,0.01661,4.342791,28.234115,0.151465,0.018264,8.293289,2.404861,62.211131,19.94421,0
4,0.007671,0.016822,3.978609,23.424995,0.151465,0.018489,8.192289,2.410063,60.315242,19.743933,0


In [62]:
seed = 345
test_size = 0.3
x_train, x_test, y_train, y_test = train_test_split(raw_data_new_normalized.iloc[:,0:10], raw_data_new_normalized['y'], test_size=test_size, random_state=seed)

In [63]:
model = SVC(C = 1.0, kernel = 'rbf', gamma = 'auto', probability = True, verbose = 2)

In [64]:
model.fit(x_train, y_train)

[LibSVM]

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=2)

In [65]:
pred = model.predict(x_test)

In [66]:
print(classification_report(y_test, pred))
print(confusion_matrix(y_test, pred))
print('acc:', np.mean(y_test == pred))
print('f1:', f1_score(y_test, pred, pos_label=1))

             precision    recall  f1-score   support

          0       1.00      0.85      0.92        20
          1       0.81      1.00      0.90        13

avg / total       0.93      0.91      0.91        33

[[17  3]
 [ 0 13]]
acc: 0.9090909090909091
f1: 0.896551724137931


In [67]:
random_grid = [{'kernel': ['rbf'], 'gamma': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5],
               'C': [0.001,0.1,1,10,25,50,100,1000]}, 
              {'kernel': ['sigmoid'], 'gamma': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5],
               'C': [0.001, 0.10, 0.1, 10, 25, 50, 100, 1000]},
              {'kernel': ['linear'], 'C': [0.001, 0.1, 0.1, 10, 25, 50, 100, 1000]}]

pprint(random_grid)

[{'C': [0.001, 0.1, 1, 10, 25, 50, 100, 1000],
  'gamma': [0.1, 0.01, 0.001, 0.0001, 1e-05],
  'kernel': ['rbf']},
 {'C': [0.001, 0.1, 0.1, 10, 25, 50, 100, 1000],
  'gamma': [0.1, 0.01, 0.001, 0.0001, 1e-05],
  'kernel': ['sigmoid']},
 {'C': [0.001, 0.1, 0.1, 10, 25, 50, 100, 1000], 'kernel': ['linear']}]


In [68]:
svc_random = GridSearchCV(estimator = SVC(), param_grid = random_grid, scoring = 'accuracy', cv = 5, verbose=2, n_jobs = 4,
                          return_train_score = True)

In [69]:
svc_random.fit(x_train, y_train)

Fitting 5 folds for each of 88 candidates, totalling 440 fits
[CV] C=0.001, gamma=0.1, kernel=rbf ..................................
[CV] C=0.001, gamma=0.1, kernel=rbf ..................................
[CV] C=0.001, gamma=0.1, kernel=rbf ..................................
[CV] ................... C=0.001, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=0.001, gamma=0.1, kernel=rbf ..................................
[CV] ................... C=0.001, gamma=0.1, kernel=rbf, total=   0.0s
[CV] ................... C=0.001, gamma=0.1, kernel=rbf, total=   0.0s
[CV] ................... C=0.001, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=0.001, gamma=0.1, kernel=rbf ..................................
[CV] C=0.001, gamma=0.01, kernel=rbf .................................
[CV] C=0.001, gamma=0.01, kernel=rbf .................................
[CV] C=0.001, gamma=0.01, kernel=rbf .................................
[CV] .................. C=0.001, gamma=0.01, kernel=rbf, total=   0.0s
[CV] ..........

[Parallel(n_jobs=4)]: Done 440 out of 440 | elapsed:    1.2s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=4,
       param_grid=[{'kernel': ['rbf'], 'gamma': [0.1, 0.01, 0.001, 0.0001, 1e-05], 'C': [0.001, 0.1, 1, 10, 25, 50, 100, 1000]}, {'kernel': ['sigmoid'], 'gamma': [0.1, 0.01, 0.001, 0.0001, 1e-05], 'C': [0.001, 0.1, 0.1, 10, 25, 50, 100, 1000]}, {'kernel': ['linear'], 'C': [0.001, 0.1, 0.1, 10, 25, 50, 100, 1000]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='accuracy', verbose=2)

In [70]:
svc_random.best_params_

{'C': 0.1, 'gamma': 0.0001, 'kernel': 'rbf'}

In [71]:
svc_random.best_score_

1.0

In [72]:
pred = svc_random.best_estimator_.predict(x_test)

In [73]:
print(classification_report(y_test, pred))
print(confusion_matrix(y_test, pred))
print('acc:', np.mean(y_test == pred))
print('f1:', f1_score(y_test, pred, pos_label=1))

             precision    recall  f1-score   support

          0       1.00      1.00      1.00        20
          1       1.00      1.00      1.00        13

avg / total       1.00      1.00      1.00        33

[[20  0]
 [ 0 13]]
acc: 1.0
f1: 1.0


In [74]:
#cross validation
X = raw_data.iloc[:,:-1]
Y = raw_data.iloc[:,501]

In [31]:
X = make_df(X)

In [32]:
X = pd.DataFrame(scale(X, axis = 0))
Y = np.array(Y)

In [41]:
best_model = svc_random.best_estimator_
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=500)

cv_acc = []
cv_precision = []
cv_recall = []
cv_f1 = []

In [42]:
for train, test in kfold.split(X, Y):
    # evaluate the model
    y_pred = best_model.predict(X.iloc[test,:])
    
    accuracy = np.mean(y_pred == Y[test])
    precision = precision_score(Y[test], y_pred, pos_label=1)
    recall = recall_score(Y[test], y_pred, pos_label=1)
    f_score = f1_score(Y[test], y_pred, pos_label=1)
    
    cv_acc.append(accuracy)
    cv_precision.append(precision)
    cv_recall.append(recall)
    cv_f1.append(f_score)
    
print('accuracy:', np.mean(cv_acc))
print('precision:', np.mean(cv_precision))
print('recall:', np.mean(cv_recall))
print('f1:', np.mean(cv_f1))

accuracy: 0.7978354978354978
precision: 0.7757575757575758
recall: 0.8200000000000001
f1: 0.7924963924963926


In [None]:
y