In [1]:
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from six.moves import range
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc

# Setup Pandas
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)

# Setup Seaborn
sns.set_style("whitegrid")
sns.set_context("poster")

In [2]:
##get the data set with clean reviews
train = pd.read_csv("clean_train_reviewsbg.csv")

In [3]:
#import the word2vec model with gensim
import gensim

In [4]:
#load word2vec
model = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True, limit=500000)


  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


In [5]:
#change reviews to vectors space anf get the average vector for each review
def review_to_vector(review):
    review_array = review.split()
    wordvectors = []
    for word in review_array:
        try:
            vector = model.get_vector(word)
            wordvectors.append(vector)
        except KeyError:
            pass
    avg_word_vector = np.mean(np.array(wordvectors), axis=0)
    return avg_word_vector

In [6]:
cleanvector = []
for review in train['cleanReview']:
    cleanvector.append(review_to_vector(review))

In [7]:
X_emreviews = np.array(cleanvector, dtype=np.float)
X_emreviews.shape

(25000, 300)

In [8]:
print(X_emreviews[1])

[ 0.03917618  0.07527008  0.00668135  0.12237892 -0.05514144  0.01250296
  0.04666138 -0.04853227  0.06809974  0.06034899 -0.0264349  -0.13563843
 -0.02281494  0.04036713 -0.0774643   0.1085742   0.03467331  0.12186289
  0.01463499 -0.05926056 -0.03535689  0.07528882  0.02703972  0.00123997
  0.04615135 -0.03957943 -0.09244518  0.0536674   0.00888844 -0.00864553
 -0.06121502 -0.01299958 -0.01968536  0.03419232  0.05326509  0.0030509
  0.03214207  0.04327729  0.05265088  0.09054108  0.11198883 -0.01156807
  0.10114269  0.02941818 -0.02399211 -0.05242996 -0.06253014 -0.00025215
  0.03833618 -0.01486816 -0.01893005  0.03220692 -0.04692974  0.01528053
  0.02531347  0.02246552 -0.04301932 -0.09034767  0.02570829 -0.07500491
  0.01776581  0.09853458 -0.09504414 -0.08185279  0.0098011  -0.03730826
 -0.1026289   0.06098423 -0.02648797  0.07591248  0.0409565  -0.00612602
  0.07162933 -0.02266569 -0.12598877 -0.09535122  0.02087936  0.06742859
  0.06781578  0.10706339 -0.02486682 -0.02205276  0.

In [9]:
#train test split with features as vectors and the sentiment score 
train_features, test_features, train_labels, test_labels = train_test_split(X_emreviews, train['sentiment'], test_size = 0.33, random_state = 42)

In [10]:
#set up SVC fit and predict
from sklearn.svm import LinearSVC
svc = LinearSVC()

In [11]:
svc.fit(train_features, train_labels)

LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [12]:
sen_pred_train = svc.predict(train_features)
sen_pred_test = svc.predict(test_features)


In [13]:
#performance scores below
from sklearn.metrics import accuracy_score
print('accuracy scores training set: ',accuracy_score(train_labels,sen_pred_train))
print('accuracy scores test set: ',accuracy_score(test_labels,sen_pred_test))

accuracy scores training set:  0.8656119402985074
accuracy scores test set:  0.8578181818181818


In [15]:
from sklearn.metrics import classification_report, confusion_matrix 

In [16]:
print(classification_report(test_labels, sen_pred_test)) 

              precision    recall  f1-score   support

           0       0.86      0.85      0.86      4105
           1       0.85      0.86      0.86      4145

   micro avg       0.86      0.86      0.86      8250
   macro avg       0.86      0.86      0.86      8250
weighted avg       0.86      0.86      0.86      8250



In [17]:
print(confusion_matrix(test_labels, sen_pred_test)) 

[[3495  610]
 [ 563 3582]]


In [18]:
#hyperparamter tuning using C and loss paramters
from sklearn.model_selection import GridSearchCV 

In [41]:
param_grid = {'C': [0.1, 1, 10, 100, 1000],
             'loss': ['squared_hinge', 'hinge']}  
  

In [42]:
grid = GridSearchCV(LinearSVC(), param_grid, refit = True, verbose = 3) 

In [43]:
print(LinearSVC())

LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)


In [44]:
#same as above
grid.fit(train_features, train_labels)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] C=0.1, loss=squared_hinge .......................................
[CV]  C=0.1, loss=squared_hinge, score=0.8542263610315186, total=   0.2s
[CV] C=0.1, loss=squared_hinge .......................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s


[CV]  C=0.1, loss=squared_hinge, score=0.8507970625111947, total=   0.3s
[CV] C=0.1, loss=squared_hinge .......................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.6s remaining:    0.0s


[CV]  C=0.1, loss=squared_hinge, score=0.8452444922084901, total=   0.3s
[CV] C=0.1, loss=hinge ...............................................
[CV] ...... C=0.1, loss=hinge, score=0.8402578796561605, total=   0.2s
[CV] C=0.1, loss=hinge ...............................................
[CV] ...... C=0.1, loss=hinge, score=0.8359305033136306, total=   0.1s
[CV] C=0.1, loss=hinge ...............................................
[CV] ....... C=0.1, loss=hinge, score=0.831810854379366, total=   0.2s
[CV] C=1, loss=squared_hinge .........................................
[CV]  C=1, loss=squared_hinge, score=0.8604942693409742, total=   0.9s
[CV] C=1, loss=squared_hinge .........................................
[CV]  C=1, loss=squared_hinge, score=0.8570660934981192, total=   0.9s
[CV] C=1, loss=squared_hinge .........................................
[CV]  C=1, loss=squared_hinge, score=0.8486476804585349, total=   0.9s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8574498567335244, total=   0.4s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8559914024717894, total=   0.4s
[CV] C=1, loss=hinge .................................................
[CV] ........ C=1, loss=hinge, score=0.8477521046032599, total=   0.6s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.857987106017192, total=   7.0s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.8542002507612395, total=   6.6s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.8500806018269748, total=   6.7s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8576289398280802, total=   1.2s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8529464445638546, total=   1.5s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8490059108006448, total=   1.5s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.853689111747851, total=  16.3s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.8493641411427548, total=  14.5s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.8493641411427548, total=  14.3s
[CV] C=100, loss=hinge ...............................................




[CV] ...... C=100, loss=hinge, score=0.8581661891117478, total=   5.4s
[CV] C=100, loss=hinge ...............................................




[CV] ...... C=100, loss=hinge, score=0.8513344080243597, total=   5.5s
[CV] C=100, loss=hinge ...............................................




[CV] ...... C=100, loss=hinge, score=0.8495432563138098, total=   5.5s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.8492120343839542, total=  16.7s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.7479849543256314, total=  15.8s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.7476267239835214, total=  15.7s
[CV] C=1000, loss=hinge ..............................................




[CV] ..... C=1000, loss=hinge, score=0.7716690544412608, total=  16.0s
[CV] C=1000, loss=hinge ..............................................




[CV] ..... C=1000, loss=hinge, score=0.7295360917069675, total=  15.7s
[CV] C=1000, loss=hinge ..............................................


[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:  3.2min finished


[CV] ..... C=1000, loss=hinge, score=0.8310943936951459, total=  15.5s


GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [0.1, 1, 10, 100, 1000], 'loss': ['squared_hinge', 'hinge']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=3)

In [45]:
print(grid.best_params_) 

{'C': 1, 'loss': 'squared_hinge'}


In [46]:
print(grid.best_estimator_)

LinearSVC(C=1, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)


In [47]:
grid.predict(test_features)

array([0, 1, 0, ..., 0, 1, 1], dtype=int64)

In [48]:
grid_pred_train = grid.predict(train_features)
grid_pred_test = grid.predict(test_features)

In [49]:
print('accuracy scores training set: ',accuracy_score(train_labels,grid_pred_train))
print('accuracy scores test set: ',accuracy_score(test_labels,grid_pred_test))

accuracy scores training set:  0.8656119402985074
accuracy scores test set:  0.8578181818181818


In [51]:
from sklearn.model_selection import cross_val_score

In [52]:
scores = cross_val_score(grid, X_emreviews, train['sentiment'], cv=5)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] C=0.1, loss=squared_hinge .......................................
[CV]  C=0.1, loss=squared_hinge, score=0.8552789442111578, total=   0.3s
[CV] C=0.1, loss=squared_hinge .......................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.3s remaining:    0.0s


[CV]  C=0.1, loss=squared_hinge, score=0.8514851485148515, total=   0.4s
[CV] C=0.1, loss=squared_hinge .......................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.8s remaining:    0.0s


[CV]  C=0.1, loss=squared_hinge, score=0.8495349534953496, total=   0.4s
[CV] C=0.1, loss=hinge ...............................................
[CV] ...... C=0.1, loss=hinge, score=0.8428314337132573, total=   0.3s
[CV] C=0.1, loss=hinge ...............................................
[CV] ...... C=0.1, loss=hinge, score=0.8358835883588359, total=   0.3s
[CV] C=0.1, loss=hinge ...............................................
[CV] ...... C=0.1, loss=hinge, score=0.8339333933393339, total=   0.2s
[CV] C=1, loss=squared_hinge .........................................
[CV] . C=1, loss=squared_hinge, score=0.861127774445111, total=   1.3s
[CV] C=1, loss=squared_hinge .........................................
[CV]  C=1, loss=squared_hinge, score=0.8573357335733574, total=   1.2s
[CV] C=1, loss=squared_hinge .........................................
[CV]  C=1, loss=squared_hinge, score=0.8511851185118512, total=   1.2s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8597780443911218, total=   0.5s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8571857185718572, total=   0.6s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8502850285028503, total=   0.6s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.860377924415117, total=   9.5s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.8567356735673567, total=   8.4s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.8493849384938494, total=  12.2s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8572285542891421, total=   1.5s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8585358535853586, total=   1.4s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8516351635163516, total=   1.5s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.8567786442711458, total=  17.6s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.8556855685568557, total=  18.1s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.8484848484848485, total=  17.5s
[CV] C=100, loss=hinge ...............................................




[CV] ...... C=100, loss=hinge, score=0.8579784043191362, total=   6.6s
[CV] C=100, loss=hinge ...............................................




[CV] ....... C=100, loss=hinge, score=0.858985898589859, total=   6.5s
[CV] C=100, loss=hinge ...............................................




[CV] ....... C=100, loss=hinge, score=0.849084908490849, total=   6.4s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.8531793641271745, total=  19.3s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.8393339333933393, total=  19.2s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.8166816681668166, total=  19.7s
[CV] C=1000, loss=hinge ..............................................




[CV] ..... C=1000, loss=hinge, score=0.8545290941811637, total=  19.6s
[CV] C=1000, loss=hinge ..............................................




[CV] ..... C=1000, loss=hinge, score=0.8292829282928292, total=  20.2s
[CV] C=1000, loss=hinge ..............................................


[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:  3.9min finished


[CV] ..... C=1000, loss=hinge, score=0.8148814881488149, total=  19.4s


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] C=0.1, loss=squared_hinge .......................................
[CV]  C=0.1, loss=squared_hinge, score=0.8527294541091782, total=   0.3s
[CV] C=0.1, loss=squared_hinge .......................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s


[CV]  C=0.1, loss=squared_hinge, score=0.8513351335133513, total=   0.4s
[CV] C=0.1, loss=squared_hinge .......................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.8s remaining:    0.0s


[CV]  C=0.1, loss=squared_hinge, score=0.8468346834683468, total=   0.4s
[CV] C=0.1, loss=hinge ...............................................
[CV] ...... C=0.1, loss=hinge, score=0.8411817636472706, total=   0.2s
[CV] C=0.1, loss=hinge ...............................................
[CV] ...... C=0.1, loss=hinge, score=0.8382838283828383, total=   0.2s
[CV] C=0.1, loss=hinge ...............................................
[CV] ...... C=0.1, loss=hinge, score=0.8348334833483348, total=   0.3s
[CV] C=1, loss=squared_hinge .........................................
[CV]  C=1, loss=squared_hinge, score=0.8575284943011398, total=   1.2s
[CV] C=1, loss=squared_hinge .........................................
[CV] .. C=1, loss=squared_hinge, score=0.86003600360036, total=   1.0s
[CV] C=1, loss=squared_hinge .........................................
[CV]  C=1, loss=squared_hinge, score=0.8535853585358536, total=   1.2s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8557288542291541, total=   0.5s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8556855685568557, total=   0.5s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8511851185118512, total=   0.6s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.8582783443311338, total=   9.5s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.8604860486048604, total=   8.1s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.8511851185118512, total=   9.2s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8549790041991602, total=   1.5s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8586858685868587, total=   1.5s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8498349834983498, total=   1.5s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.8569286142771446, total=  17.7s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.8567356735673567, total=  16.9s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.851035103510351, total=  17.4s
[CV] C=100, loss=hinge ...............................................




[CV] ....... C=100, loss=hinge, score=0.853629274145171, total=   6.5s
[CV] C=100, loss=hinge ...............................................




[CV] ...... C=100, loss=hinge, score=0.8571857185718572, total=   6.5s
[CV] C=100, loss=hinge ...............................................




[CV] ...... C=100, loss=hinge, score=0.8486348634863486, total=   6.4s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.7603479304139172, total=  19.3s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.818031803180318, total=  19.2s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.8343834383438344, total=  19.2s
[CV] C=1000, loss=hinge ..............................................




[CV] ..... C=1000, loss=hinge, score=0.8182363527294542, total=  16.3s
[CV] C=1000, loss=hinge ..............................................




[CV] ..... C=1000, loss=hinge, score=0.7416741674167416, total=  18.1s
[CV] C=1000, loss=hinge ..............................................


[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:  3.7min finished


[CV] ..... C=1000, loss=hinge, score=0.8229822982298229, total=  19.1s


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] C=0.1, loss=squared_hinge .......................................
[CV]  C=0.1, loss=squared_hinge, score=0.8576784643071386, total=   0.3s
[CV] C=0.1, loss=squared_hinge .......................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s


[CV]  C=0.1, loss=squared_hinge, score=0.8507350735073508, total=   0.4s
[CV] C=0.1, loss=squared_hinge .......................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.9s remaining:    0.0s


[CV]  C=0.1, loss=squared_hinge, score=0.8483348334833484, total=   0.3s
[CV] C=0.1, loss=hinge ...............................................
[CV] ...... C=0.1, loss=hinge, score=0.8465806838632274, total=   0.2s
[CV] C=0.1, loss=hinge ...............................................
[CV] ...... C=0.1, loss=hinge, score=0.8393339333933393, total=   0.2s
[CV] C=0.1, loss=hinge ...............................................
[CV] ...... C=0.1, loss=hinge, score=0.8342334233423342, total=   0.2s
[CV] C=1, loss=squared_hinge .........................................
[CV] . C=1, loss=squared_hinge, score=0.862627474505099, total=   1.2s
[CV] C=1, loss=squared_hinge .........................................
[CV]  C=1, loss=squared_hinge, score=0.8579357935793579, total=   1.0s
[CV] C=1, loss=squared_hinge .........................................
[CV]  C=1, loss=squared_hinge, score=0.8537353735373537, total=   1.1s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8609778044391122, total=   0.5s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8556855685568557, total=   0.5s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8498349834983498, total=   0.5s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.8615776844631073, total=   9.2s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.8568856885688569, total=   8.1s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.8532853285328533, total=   9.1s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8609778044391122, total=   1.4s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8550855085508551, total=   1.5s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8535853585358536, total=   1.5s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.8606778644271146, total=  17.4s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.8556855685568557, total=  17.3s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.8537353735373537, total=  17.8s
[CV] C=100, loss=hinge ...............................................




[CV] ....... C=100, loss=hinge, score=0.863377324535093, total=   6.5s
[CV] C=100, loss=hinge ...............................................




[CV] ...... C=100, loss=hinge, score=0.8538853885388539, total=   6.5s
[CV] C=100, loss=hinge ...............................................




[CV] ...... C=100, loss=hinge, score=0.8511851185118512, total=   6.1s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.8584283143371326, total=  19.2s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.8127812781278128, total=  19.3s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.8391839183918391, total=  19.3s
[CV] C=1000, loss=hinge ..............................................




[CV] ...... C=1000, loss=hinge, score=0.762747450509898, total=  19.4s
[CV] C=1000, loss=hinge ..............................................




[CV] ..... C=1000, loss=hinge, score=0.8505850585058505, total=  18.9s
[CV] C=1000, loss=hinge ..............................................


[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:  3.8min finished


[CV] ..... C=1000, loss=hinge, score=0.8426342634263426, total=  19.1s


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] C=0.1, loss=squared_hinge .......................................
[CV]  C=0.1, loss=squared_hinge, score=0.8599280143971205, total=   0.3s
[CV] C=0.1, loss=squared_hinge .......................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.3s remaining:    0.0s


[CV]  C=0.1, loss=squared_hinge, score=0.8468346834683468, total=   0.3s
[CV] C=0.1, loss=squared_hinge .......................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.8s remaining:    0.0s


[CV]  C=0.1, loss=squared_hinge, score=0.8432343234323433, total=   0.4s
[CV] C=0.1, loss=hinge ...............................................
[CV] ...... C=0.1, loss=hinge, score=0.8465806838632274, total=   0.4s
[CV] C=0.1, loss=hinge ...............................................
[CV] ...... C=0.1, loss=hinge, score=0.8351335133513351, total=   0.3s
[CV] C=0.1, loss=hinge ...............................................
[CV] ...... C=0.1, loss=hinge, score=0.8315331533153315, total=   0.2s
[CV] C=1, loss=squared_hinge .........................................
[CV]  C=1, loss=squared_hinge, score=0.8645770845830834, total=   1.1s
[CV] C=1, loss=squared_hinge .........................................
[CV]  C=1, loss=squared_hinge, score=0.8523852385238524, total=   1.0s
[CV] C=1, loss=squared_hinge .........................................
[CV]  C=1, loss=squared_hinge, score=0.8534353435343535, total=   1.0s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8630773845230953, total=   0.5s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8516351635163516, total=   0.5s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8484848484848485, total=   0.5s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.8650269946010798, total=  11.9s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.8522352235223523, total=   7.9s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.8525352535253525, total=   7.7s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8629274145170965, total=   1.5s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8511851185118512, total=   1.5s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8528352835283528, total=   1.4s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.8623275344931014, total=  17.3s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.8502850285028503, total=  16.3s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.8526852685268527, total=  15.4s
[CV] C=100, loss=hinge ...............................................




[CV] ...... C=100, loss=hinge, score=0.8615776844631073, total=   6.5s
[CV] C=100, loss=hinge ...............................................




[CV] ...... C=100, loss=hinge, score=0.8502850285028503, total=   6.7s
[CV] C=100, loss=hinge ...............................................




[CV] ...... C=100, loss=hinge, score=0.8489348934893489, total=   6.3s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.8533293341331734, total=  19.8s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.7496249624962497, total=  19.0s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.7982298229822983, total=  19.0s
[CV] C=1000, loss=hinge ..............................................




[CV] ..... C=1000, loss=hinge, score=0.6498200359928015, total=  19.1s
[CV] C=1000, loss=hinge ..............................................




[CV] ..... C=1000, loss=hinge, score=0.7758775877587759, total=  19.6s
[CV] C=1000, loss=hinge ..............................................


[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:  3.8min finished


[CV] ..... C=1000, loss=hinge, score=0.8271827182718272, total=  20.1s


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] C=0.1, loss=squared_hinge .......................................
[CV]  C=0.1, loss=squared_hinge, score=0.8569286142771446, total=   0.4s
[CV] C=0.1, loss=squared_hinge .......................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s


[CV]  C=0.1, loss=squared_hinge, score=0.8486348634863486, total=   0.4s
[CV] C=0.1, loss=squared_hinge .......................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.0s remaining:    0.0s


[CV]  C=0.1, loss=squared_hinge, score=0.8511851185118512, total=   0.4s
[CV] C=0.1, loss=hinge ...............................................
[CV] ....... C=0.1, loss=hinge, score=0.845380923815237, total=   0.2s
[CV] C=0.1, loss=hinge ...............................................
[CV] ...... C=0.1, loss=hinge, score=0.8345334533453346, total=   0.2s
[CV] C=0.1, loss=hinge ...............................................
[CV] ...... C=0.1, loss=hinge, score=0.8381338133813382, total=   0.2s
[CV] C=1, loss=squared_hinge .........................................
[CV] . C=1, loss=squared_hinge, score=0.864127174565087, total=   1.1s
[CV] C=1, loss=squared_hinge .........................................
[CV]  C=1, loss=squared_hinge, score=0.8546354635463547, total=   1.0s
[CV] C=1, loss=squared_hinge .........................................
[CV]  C=1, loss=squared_hinge, score=0.8594359435943595, total=   1.0s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8605278944211158, total=   0.6s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8495349534953496, total=   0.6s
[CV] C=1, loss=hinge .................................................




[CV] ........ C=1, loss=hinge, score=0.8549354935493549, total=   0.5s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.8642771445710857, total=   9.3s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.854035403540354, total=  12.9s
[CV] C=10, loss=squared_hinge ........................................
[CV]  C=10, loss=squared_hinge, score=0.8583858385838584, total=   7.7s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8614277144571085, total=   1.5s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8553855385538554, total=   1.5s
[CV] C=10, loss=hinge ................................................




[CV] ....... C=10, loss=hinge, score=0.8597359735973598, total=   1.5s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.8632273545290942, total=  18.3s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.8532853285328533, total=  17.6s
[CV] C=100, loss=squared_hinge .......................................




[CV]  C=100, loss=squared_hinge, score=0.8583858385838584, total=  17.2s
[CV] C=100, loss=hinge ...............................................




[CV] ...... C=100, loss=hinge, score=0.8597780443911218, total=   6.6s
[CV] C=100, loss=hinge ...............................................




[CV] ...... C=100, loss=hinge, score=0.8546354635463547, total=   6.7s
[CV] C=100, loss=hinge ...............................................




[CV] ....... C=100, loss=hinge, score=0.858085808580858, total=   6.5s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.8498800239952009, total=  19.5s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.7961296129612961, total=  19.0s
[CV] C=1000, loss=squared_hinge ......................................




[CV]  C=1000, loss=squared_hinge, score=0.653015301530153, total=  19.4s
[CV] C=1000, loss=hinge ..............................................




[CV] ..... C=1000, loss=hinge, score=0.7435512897420515, total=  19.5s
[CV] C=1000, loss=hinge ..............................................




[CV] ..... C=1000, loss=hinge, score=0.7913291329132913, total=  19.1s
[CV] C=1000, loss=hinge ..............................................


[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:  3.9min finished


[CV] ...... C=1000, loss=hinge, score=0.845934593459346, total=  18.8s


In [53]:
#cross valdiate to make sure the model is stable
scores = cross_val_score(svc, X_emreviews, train['sentiment'], cv=5)

In [54]:
print(scores)

[0.8574 0.8642 0.858  0.8632 0.8532]
