In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

reviews = pd.read_csv('../input/Reviews.csv')
reviews.head(1)

Unnamed: 0,Id,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Summary,Text
0,1,B001E4KFG0,A3SGXH7AUHU8GW,delmartian,1,1,5,1303862400,Good Quality Dog Food,I have bought several of the Vitality canned d...


In [2]:
reviews['smry_txt'] = reviews['Summary'].astype(str) + ' ' + reviews['Text']
dataset = reviews.loc[reviews['Score'] != 3][['smry_txt','Score']]

dataset.loc[dataset['Score']==1, 'Score'] = 'negative'
dataset.loc[dataset['Score']==2, 'Score'] = 'negative'
dataset.loc[dataset['Score']==4, 'Score'] = 'positive'
dataset.loc[dataset['Score']==5, 'Score'] = 'positive'
dataset.head()

Unnamed: 0,smry_txt,Score
0,Good Quality Dog Food I have bought several of...,positive
1,Not as Advertised Product arrived labeled as J...,negative
2,"""Delight"" says it all This is a confection tha...",positive
3,Cough Medicine If you are looking for the secr...,negative
4,Great taffy Great taffy at a great price. The...,positive


In [3]:
from sklearn.feature_extraction.text import CountVectorizer
count_vec_total = CountVectorizer()

x_data = count_vec_total.fit_transform(dataset.smry_txt)
y_data = dataset['Score']

y_data.head()

print(dataset.iloc[0]['smry_txt'], end='\n\n')
print(count_vec_total.inverse_transform(x_data[0]), end='\n\n')
print(x_data[0])

Good Quality Dog Food I have bought several of the Vitality canned dog food products and have found them all to be of good quality. The product looks more like a stew than a processed meat and it smells better. My Labrador is finicky and she appreciates this product better than  most.

[array(['most', 'this', 'appreciates', 'she', 'finicky', 'is', 'labrador',
       'my', 'better', 'smells', 'it', 'meat', 'processed', 'than', 'stew',
       'like', 'more', 'looks', 'product', 'be', 'to', 'all', 'them',
       'found', 'and', 'products', 'canned', 'vitality', 'the', 'of',
       'several', 'bought', 'have', 'food', 'dog', 'quality', 'good'],
      dtype='<U124')]

  (0, 74176)	1
  (0, 108779)	1
  (0, 9541)	1
  (0, 97878)	1
  (0, 47541)	1
  (0, 61725)	1
  (0, 65358)	1
  (0, 75122)	1
  (0, 20970)	2
  (0, 100122)	1
  (0, 61875)	1
  (0, 71077)	1
  (0, 87084)	1
  (0, 108307)	2
  (0, 103477)	1
  (0, 66977)	1
  (0, 73996)	1
  (0, 67944)	1
  (0, 87176)	2
  (0, 19907)	1
  (0, 109606)	1
  (0, 749

# split the data into training and testing

In [4]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data)

y_train.shape

(394360,)

# Linear svm with Gridsearch

In [5]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC

In [7]:
param_grid = {'C':[0.03125, 0.125, 0.5, 2, 8]}
lr_svm = LinearSVC(verbose=20, max_iter = 100000)
lr_svm_grid_cv = GridSearchCV(lr_svm, param_grid=param_grid, n_jobs=-1, verbose=20)
lr_svm_grid_cv

GridSearchCV(cv=None, error_score='raise',
       estimator=LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=100000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=20),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'C': [0.03125, 0.125, 0.5, 2, 8]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=20)

In [8]:
from datetime import datetime
start_time = datetime.now()

#perform grid search for c
lr_svm_grid_cv.fit(x_train, y_train)

print(datetime.now() - start_time)

Fitting 3 folds for each of 5 candidates, totalling 15 fits
[CV] C=0.03125 .......................................................
[CV] C=0.03125 .......................................................
[CV] C=0.03125 .......................................................
[CV] C=0.125 .........................................................
[CV] C=0.125 .........................................................
[CV] C=0.125 .........................................................
[CV] C=0.5 ...........................................................
[CV] C=0.5 ...........................................................
[CV] C=0.5 ...........................................................
[CV] C=2 .............................................................
[CV] C=2 .............................................................
[CV] C=2 .............................................................
[CV] C=8 .............................................................
[CV] C=8 ........

[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    4.5s


[LibLinear][CV] .............. C=0.03125, score=0.9214157168566287, total=   5.9s


[Parallel(n_jobs=-1)]: Done   2 out of  15 | elapsed:    6.2s remaining:   40.1s


[LibLinear][CV] .............. C=0.03125, score=0.9226154769046191, total=   6.4s


[Parallel(n_jobs=-1)]: Done   3 out of  15 | elapsed:    6.6s remaining:   26.3s


[LibLinear][CV] ................ C=0.125, score=0.9165666266506602, total=  10.9s


[Parallel(n_jobs=-1)]: Done   4 out of  15 | elapsed:   11.1s remaining:   30.5s


[LibLinear][CV] ................ C=0.125, score=0.9169166166766647, total=  12.2s


[Parallel(n_jobs=-1)]: Done   5 out of  15 | elapsed:   12.4s remaining:   24.8s


[LibLinear][CV] ................ C=0.125, score=0.9187162567486503, total=  14.0s


[Parallel(n_jobs=-1)]: Done   6 out of  15 | elapsed:   14.2s remaining:   21.3s


[LibLinear][CV] .................. C=0.5, score=0.9138655462184874, total=  15.1s


[Parallel(n_jobs=-1)]: Done   7 out of  15 | elapsed:   15.3s remaining:   17.5s


[LibLinear][CV] .................... C=2, score=0.9147659063625451, total=  18.1s


[Parallel(n_jobs=-1)]: Done   8 out of  15 | elapsed:   18.6s remaining:   16.3s


[LibLinear][CV] .................. C=0.5, score=0.9124175164967007, total=  23.8s


[Parallel(n_jobs=-1)]: Done   9 out of  15 | elapsed:   24.0s remaining:   16.0s


[LibLinear][CV] .................. C=0.5, score=0.9157168566286743, total=  26.5s


[Parallel(n_jobs=-1)]: Done  10 out of  15 | elapsed:   26.8s remaining:   13.4s


[LibLinear][CV] .................... C=2, score=0.9115176964607079, total=  37.7s


[Parallel(n_jobs=-1)]: Done  11 out of  15 | elapsed:   38.0s remaining:   13.8s


[LibLinear][CV] ..................... C=2, score=0.913617276544691, total=  41.7s


[Parallel(n_jobs=-1)]: Done  12 out of  15 | elapsed:   42.1s remaining:   10.5s


[LibLinear][CV] .................... C=8, score=0.9139172165566887, total=  43.4s


[Parallel(n_jobs=-1)]: Done  13 out of  15 | elapsed:   44.0s remaining:    6.8s


[LibLinear][CV] .................... C=8, score=0.9141656662665066, total= 1.0min




[LibLinear][CV] .................... C=8, score=0.9103179364127174, total= 1.1min


[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  1.1min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  1.1min finished


[LibLinear]0:01:11.413450


In [9]:
lr_svm_grid_cv.best_params_, lr_svm_grid_cv.best_estimator_, lr_svm_grid_cv.best_score_

({'C': 0.03125},
 LinearSVC(C=0.03125, class_weight=None, dual=True, fit_intercept=True,
      intercept_scaling=1, loss='squared_hinge', max_iter=100000,
      multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
      verbose=20),
 0.92159999999999997)

In [10]:
y_true, y_pred = y_test, lr_svm_grid_cv.predict(x_test)

In [11]:
from sklearn.metrics import classification_report, precision_recall_fscore_support
print(classification_report(y_true, y_pred))
print(precision_recall_fscore_support(y_true, y_pred, average='micro'))

             precision    recall  f1-score   support

   negative       0.81      0.69      0.75     20513
   positive       0.94      0.97      0.96    110941

avg / total       0.92      0.93      0.92    131454

(0.92626317951526771, 0.92626317951526771, 0.92626317951526771, None)


In [12]:
from sklearn.metrics import roc_curve, auc, roc_auc_score

y_pred = np.where(y_pred == 'positive', 1, 0)
y_true = np.where(y_true == 'positive', 1, 0)

fpr, tpr, threshold = roc_curve(y_true, y_pred)
print("FPR: {}, \nTPR: {}, \nAUC : {}".format(fpr[1], tpr[1], auc(fpr,tpr)))

FPR: 0.3071710622532053, 
TPR: 0.9694251899658377, 
AUC : 0.8311270638563162


# SVM with RBF kernel

In [13]:
from sklearn.svm import SVC

In [14]:
param_grid = {'C':[0.03125, 0.125, 0.5, 2, 8],
             'gamma': [3.0517578125e-05, 0.00048828125, 0.0078125, 0.125, 2]}
rbf_svm = SVC(kernel='rbf')
rbf_svm_grid_cv = GridSearchCV(rbf_svm, param_grid=param_grid, n_jobs=-1, verbose=30)
rbf_svm_grid_cv

GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'C': [0.03125, 0.125, 0.5, 2, 8], 'gamma': [3.0517578125e-05, 0.00048828125, 0.0078125, 0.125, 2]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=30)

In [15]:
from datetime import datetime
start_time = datetime.now()

#perform gridsearch
rbf_svm_grid_cv.fit(x_train, y_train)

print(datetime.now() - start_time)

Fitting 3 folds for each of 25 candidates, totalling 75 fits
[CV] C=0.03125, gamma=3.0517578125e-05 ...............................
[CV] C=0.03125, gamma=3.0517578125e-05 ...............................
[CV] C=0.03125, gamma=0.00048828125 ..................................
[CV] C=0.03125, gamma=3.0517578125e-05 ...............................
[CV] C=0.03125, gamma=0.00048828125 ..................................
[CV] C=0.03125, gamma=0.00048828125 ..................................
[CV] C=0.03125, gamma=0.0078125 ......................................
[CV] C=0.03125, gamma=0.0078125 ......................................
[CV] C=0.03125, gamma=0.0078125 ......................................
[CV] C=0.03125, gamma=0.125 ..........................................
[CV] C=0.03125, gamma=0.125 ..........................................
[CV] C=0.03125, gamma=0.125 ..........................................
[CV] C=0.03125, gamma=2 ..............................................
[CV] C=0.03125, 

[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.3min


[CV]  C=0.03125, gamma=0.00048828125, score=0.8425314937012598, total=  56.3s
[CV] C=0.125, gamma=3.0517578125e-05 .................................


[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.4min


[CV]  C=0.03125, gamma=0.00048828125, score=0.8425314937012598, total= 1.1min
[CV] C=0.125, gamma=0.00048828125 ....................................


[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  1.6min


[CV]  C=0.03125, gamma=0.0078125, score=0.8427370948379351, total= 1.1min
[CV] C=0.125, gamma=0.00048828125 ....................................


[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  1.8min


[CV]  C=0.03125, gamma=3.0517578125e-05, score=0.8427370948379351, total= 1.3min
[CV] C=0.125, gamma=0.00048828125 ....................................


[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  1.9min


[CV]  C=0.03125, gamma=0.00048828125, score=0.8427370948379351, total= 1.3min
[CV] C=0.125, gamma=0.0078125 ........................................


[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  2.1min


[CV]  C=0.03125, gamma=3.0517578125e-05, score=0.8425314937012598, total= 1.3min
[CV] C=0.125, gamma=0.0078125 ........................................


[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  2.1min


[CV]  C=0.03125, gamma=3.0517578125e-05, score=0.8425314937012598, total= 1.3min
[CV] C=0.125, gamma=0.0078125 ........................................


[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:  2.1min


[CV]  C=0.03125, gamma=0.0078125, score=0.8425314937012598, total= 1.4min
[CV] C=0.125, gamma=0.125 ............................................


[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  2.2min


[CV]  C=0.03125, gamma=0.0078125, score=0.8425314937012598, total= 1.4min
[CV] C=0.125, gamma=0.125 ............................................


[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  2.2min


[CV]  C=0.125, gamma=3.0517578125e-05, score=0.8425314937012598, total=  54.4s
[CV] C=0.125, gamma=0.125 ............................................


[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:  2.7min


[CV]  C=0.125, gamma=3.0517578125e-05, score=0.8427370948379351, total=  54.9s
[CV] C=0.125, gamma=2 ................................................


[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:  2.8min


[CV]  C=0.125, gamma=0.00048828125, score=0.8425314937012598, total=  57.7s
[CV] C=0.125, gamma=2 ................................................


[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:  3.1min


[CV]  C=0.125, gamma=0.00048828125, score=0.8425314937012598, total=  56.2s
[CV] C=0.125, gamma=2 ................................................


[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:  3.2min


[CV] . C=0.03125, gamma=0.125, score=0.8425314937012598, total= 2.2min
[CV] C=0.5, gamma=3.0517578125e-05 ...................................


[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  3.6min


[CV]  C=0.125, gamma=0.00048828125, score=0.8427370948379351, total= 1.3min
[CV] C=0.5, gamma=3.0517578125e-05 ...................................


[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:  3.8min


[CV] ..... C=0.03125, gamma=2, score=0.8427370948379351, total= 2.3min
[CV] C=0.5, gamma=3.0517578125e-05 ...................................


[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  3.9min


[CV]  C=0.125, gamma=0.0078125, score=0.8425314937012598, total= 1.3min
[CV] C=0.5, gamma=0.00048828125 ......................................


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  4.1min


[CV] ..... C=0.03125, gamma=2, score=0.8425314937012598, total= 2.3min
[CV] C=0.5, gamma=0.00048828125 ......................................


[Parallel(n_jobs=-1)]: Done  19 tasks      | elapsed:  4.3min


[CV] . C=0.03125, gamma=0.125, score=0.8427370948379351, total= 2.5min
[CV] C=0.5, gamma=0.00048828125 ......................................


[Parallel(n_jobs=-1)]: Done  20 tasks      | elapsed:  4.3min


[CV] . C=0.03125, gamma=0.125, score=0.8425314937012598, total= 2.5min
[CV] C=0.5, gamma=0.0078125 ..........................................


[Parallel(n_jobs=-1)]: Done  21 tasks      | elapsed:  4.4min


[CV] ..... C=0.03125, gamma=2, score=0.8425314937012598, total= 2.6min
[CV] C=0.5, gamma=0.0078125 ..........................................


[Parallel(n_jobs=-1)]: Done  22 tasks      | elapsed:  4.4min


[CV]  C=0.125, gamma=0.0078125, score=0.8427370948379351, total= 1.4min
[CV] C=0.5, gamma=0.0078125 ..........................................


[Parallel(n_jobs=-1)]: Done  23 tasks      | elapsed:  4.4min


[CV]  C=0.125, gamma=0.0078125, score=0.8425314937012598, total= 1.9min
[CV] C=0.5, gamma=0.125 ..............................................


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:  4.7min


[CV]  C=0.5, gamma=3.0517578125e-05, score=0.8425314937012598, total=  56.4s
[CV] C=0.5, gamma=0.125 ..............................................


[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  5.1min


[CV]  C=0.5, gamma=3.0517578125e-05, score=0.8425314937012598, total= 1.2min
[CV] C=0.5, gamma=0.125 ..............................................


[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:  5.5min


[CV]  C=0.5, gamma=3.0517578125e-05, score=0.8427370948379351, total= 1.0min
[CV] C=0.5, gamma=2 ..................................................


[Parallel(n_jobs=-1)]: Done  27 tasks      | elapsed:  5.5min


[CV]  C=0.5, gamma=0.00048828125, score=0.8428314337132573, total= 1.2min
[CV] C=0.5, gamma=2 ..................................................


[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed:  5.8min


[CV]  C=0.5, gamma=0.00048828125, score=0.8436374549819928, total=  59.0s
[CV] C=0.5, gamma=2 ..................................................


[Parallel(n_jobs=-1)]: Done  29 tasks      | elapsed:  5.9min


[CV] ... C=0.125, gamma=0.125, score=0.8427370948379351, total= 2.2min
[CV] C=2, gamma=3.0517578125e-05 .....................................


[Parallel(n_jobs=-1)]: Done  30 tasks      | elapsed:  6.3min


[CV] ....... C=0.125, gamma=2, score=0.8425314937012598, total= 2.1min
[CV] C=2, gamma=3.0517578125e-05 .....................................


[Parallel(n_jobs=-1)]: Done  31 tasks      | elapsed:  6.4min


[CV]  C=0.5, gamma=0.00048828125, score=0.8428314337132573, total= 1.6min
[CV] C=2, gamma=3.0517578125e-05 .....................................


[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed:  6.4min


[CV] .. C=0.5, gamma=0.0078125, score=0.859628074385123, total= 1.5min
[CV] C=2, gamma=0.00048828125 ........................................
[CV] ... C=0.125, gamma=0.125, score=0.8425314937012598, total= 2.8min


[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  6.6min
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  6.6min


[CV] C=2, gamma=0.00048828125 ........................................
[CV] ... C=0.125, gamma=0.125, score=0.8425314937012598, total= 2.6min
[CV] C=2, gamma=0.00048828125 ........................................


[Parallel(n_jobs=-1)]: Done  35 tasks      | elapsed:  6.7min


[CV] ....... C=0.125, gamma=2, score=0.8427370948379351, total= 2.1min
[CV] C=2, gamma=0.0078125 ............................................


[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed:  6.8min


[CV] ....... C=0.125, gamma=2, score=0.8425314937012598, total= 2.2min
[CV] C=2, gamma=0.0078125 ............................................


[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:  6.8min


[CV] . C=0.5, gamma=0.0078125, score=0.8578284343131374, total= 1.5min
[CV] C=2, gamma=0.0078125 ............................................


[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed:  6.9min


[CV] .. C=0.5, gamma=0.0078125, score=0.858343337334934, total= 1.6min
[CV] C=2, gamma=0.125 ................................................


[Parallel(n_jobs=-1)]: Done  39 tasks      | elapsed:  7.1min


[CV]  C=2, gamma=3.0517578125e-05, score=0.8425314937012598, total=  57.7s
[CV] C=2, gamma=0.125 ................................................


[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:  7.8min


[CV]  C=2, gamma=3.0517578125e-05, score=0.8425314937012598, total=  59.7s
[CV] C=2, gamma=0.125 ................................................


[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed:  7.8min


[CV]  C=2, gamma=0.00048828125, score=0.8553421368547419, total=  57.2s
[CV] C=2, gamma=2 ....................................................


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:  8.2min


[CV]  C=2, gamma=3.0517578125e-05, score=0.8427370948379351, total= 1.3min
[CV] C=2, gamma=2 ....................................................


[Parallel(n_jobs=-1)]: Done  43 tasks      | elapsed:  8.3min


[CV]  C=2, gamma=0.00048828125, score=0.8548290341931614, total= 1.1min
[CV] C=2, gamma=2 ....................................................


[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed:  8.4min


[CV]  C=2, gamma=0.00048828125, score=0.8503299340131973, total= 1.1min
[CV] C=8, gamma=3.0517578125e-05 .....................................
[CV] ..... C=0.5, gamma=0.125, score=0.8428314337132573, total= 2.3min
[CV] C=8, gamma=3.0517578125e-05 .....................................
[CV] .... C=2, gamma=0.0078125, score=0.906118776244751, total= 1.3min
[CV] C=8, gamma=3.0517578125e-05 .....................................


[Parallel(n_jobs=-1)]: Done  47 out of  75 | elapsed:  8.9min remaining:  5.3min


[CV] ... C=2, gamma=0.0078125, score=0.9052189562087583, total= 1.4min
[CV] C=8, gamma=0.00048828125 ........................................
[CV] ......... C=0.5, gamma=2, score=0.8431313737252549, total= 2.3min
[CV] C=8, gamma=0.00048828125 ........................................
[CV] ..... C=0.5, gamma=0.125, score=0.8431313737252549, total= 2.9min
[CV] C=8, gamma=0.00048828125 ........................................


[Parallel(n_jobs=-1)]: Done  50 out of  75 | elapsed:  9.5min remaining:  4.8min


[CV] ... C=2, gamma=0.0078125, score=0.9087635054021609, total= 1.8min
[CV] C=8, gamma=0.0078125 ............................................
[CV] ...... C=0.5, gamma=0.125, score=0.843937575030012, total= 2.7min
[CV] C=8, gamma=0.0078125 ............................................
[CV]  C=8, gamma=3.0517578125e-05, score=0.8428314337132573, total= 1.1min
[CV] C=8, gamma=0.0078125 ............................................


[Parallel(n_jobs=-1)]: Done  53 out of  75 | elapsed: 10.1min remaining:  4.2min


[CV]  C=8, gamma=3.0517578125e-05, score=0.8428314337132573, total=  54.3s
[CV] C=8, gamma=0.125 ................................................
[CV]  C=8, gamma=3.0517578125e-05, score=0.8436374549819928, total=  53.9s
[CV] C=8, gamma=0.125 ................................................
[CV]  C=8, gamma=0.00048828125, score=0.9001199760047991, total=  51.4s
[CV] C=8, gamma=0.125 ................................................


[Parallel(n_jobs=-1)]: Done  56 out of  75 | elapsed: 10.3min remaining:  3.5min


[CV] .......... C=0.5, gamma=2, score=0.843937575030012, total= 2.7min
[CV] C=8, gamma=2 ....................................................
[CV] ......... C=0.5, gamma=2, score=0.8428314337132573, total= 2.8min
[CV] C=8, gamma=2 ....................................................
[CV]  C=8, gamma=0.00048828125, score=0.9055188962207559, total= 1.1min
[CV] C=8, gamma=2 ....................................................


[Parallel(n_jobs=-1)]: Done  59 out of  75 | elapsed: 11.2min remaining:  3.0min


[CV] ....... C=2, gamma=0.125, score=0.8464307138572286, total= 2.7min
[CV] ....... C=2, gamma=0.125, score=0.8485302939412117, total= 2.4min
[CV] ....... C=2, gamma=0.125, score=0.8469387755102041, total= 2.3min


[Parallel(n_jobs=-1)]: Done  62 out of  75 | elapsed: 11.6min remaining:  2.4min


[CV]  C=8, gamma=0.00048828125, score=0.9000600240096038, total= 1.6min
[CV] ........... C=2, gamma=2, score=0.8464307138572286, total= 2.3min
[CV] ... C=8, gamma=0.0078125, score=0.9145170965806838, total= 1.4min


[Parallel(n_jobs=-1)]: Done  65 out of  75 | elapsed: 12.0min remaining:  1.8min


[CV] ........... C=2, gamma=2, score=0.8485302939412117, total= 2.4min
[CV] ... C=8, gamma=0.0078125, score=0.9103179364127174, total= 1.8min
[CV] ... C=8, gamma=0.0078125, score=0.9171668667466987, total= 1.7min


[Parallel(n_jobs=-1)]: Done  68 out of  75 | elapsed: 12.4min remaining:  1.3min


[CV] ........... C=2, gamma=2, score=0.8469387755102041, total= 3.4min
[CV] ....... C=8, gamma=0.125, score=0.8464307138572286, total= 2.1min
[CV] ....... C=8, gamma=0.125, score=0.8485302939412117, total= 2.1min


[Parallel(n_jobs=-1)]: Done  71 out of  75 | elapsed: 13.0min remaining:   43.8s


[CV] ....... C=8, gamma=0.125, score=0.8469387755102041, total= 2.1min
[CV] ........... C=8, gamma=2, score=0.8464307138572286, total= 2.1min
[CV] ........... C=8, gamma=2, score=0.8485302939412117, total= 2.1min
[CV] ........... C=8, gamma=2, score=0.8469387755102041, total= 1.6min


[Parallel(n_jobs=-1)]: Done  75 out of  75 | elapsed: 13.2min finished


0:13:38.526283


In [16]:
rbf_svm_grid_cv.best_params_, rbf_svm_grid_cv.best_estimator_, rbf_svm_grid_cv.best_score_

({'C': 8, 'gamma': 0.0078125},
 SVC(C=8, cache_size=200, class_weight=None, coef0=0.0,
   decision_function_shape='ovr', degree=3, gamma=0.0078125, kernel='rbf',
   max_iter=-1, probability=False, random_state=None, shrinking=True,
   tol=0.001, verbose=False),
 0.91400000000000003)

In [17]:
y_true, y_pred = y_test, rbf_svm_grid_cv.predict(x_test)

In [18]:
from sklearn.metrics import precision_recall_fscore_support, classification_report
print(classification_report(y_true, y_pred))
print(precision_recall_fscore_support(y_true, y_pred, average='micro'))

             precision    recall  f1-score   support

   negative       0.84      0.57      0.68     20513
   positive       0.93      0.98      0.95    110941

avg / total       0.91      0.92      0.91    131454

(0.91640421744488565, 0.91640421744488565, 0.91640421744488565, None)


In [19]:
from sklearn.metrics import roc_curve, auc, roc_auc_score

y_pred = np.where(y_pred == 'positive', 1, 0)
y_true = np.where(y_true == 'positive', 1, 0)

fpr, tpr, threshold = roc_curve(y_true, y_pred)
print("FPR: {}, \nTPR: {}, \nAUC : {}".format(fpr[1], tpr[1], auc(fpr,tpr)))

FPR: 0.4280700043874616, 
TPR: 0.9800975293173849, 
AUC : 0.7760137624649615
