In [1]:
# library
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.externals import joblib
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import StratifiedKFold
from sklearn.base import clone
# confusion matrix
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_predict
# precision/recall
from sklearn.metrics import precision_score, recall_score, f1_score

In [2]:
# result reproducibility
np.random.seed(42)

In [3]:
# The text data is already cleaned
inputfile = './csvfiles/output_sentiment.csv'
review = pd.read_csv(inputfile, skip_blank_lines=False)
review = review[['text', 'ovsentiment']]
# exclude NaN in 'text' column (count: 11248)
review = review[~pd.isna(review['text'])]
X = review['text'].values
y = review['ovsentiment'].values

# len(review[review['ovsentiment'] == -1])
# total number of -1 (negative review) is 12566

# len(review[review['ovsentiment'] == 0])
# total number of 0 (neutral) is 135292

# len(review[review['ovsentiment'] == 1])
# total number of 1 (positive review) is 455689

# splitting the dataset into the training set and test set
# stratify=y --> stratified sampling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
        random_state=42, shuffle=True, stratify=y)

In [4]:
# TOKENIZATION
# see psentiment.py, tokenization is done using
# snowball englishstemmer
# Here, we only need to split the text
##############################################
def tokenizer(text):
    return text.split()

In [5]:
# -1
len(review[review['ovsentiment'] == -1])/len(review)

0.020820250949801757

In [6]:
# 0
len(review[review['ovsentiment'] == 0])/len(review)

0.22416149860739926

In [7]:
# 1
len(review[review['ovsentiment'] == 1])/len(review)

0.755018250442799

# 1 - Logistic Regression

In [8]:
from sklearn.linear_model import LogisticRegression

In [9]:
tfidf = TfidfVectorizer(strip_accents=None, lowercase=False, preprocessor=None)
param_grid = [{'vect__ngram_range': [(1,1)],
    'vect__stop_words': [None],
    'vect__tokenizer': [str.split],
    'clf__penalty': ['l2'],
    'clf__C': [1.0, 10.0, 15.0, 20.0, 30.0, 40.0, 
        50.0, 60.0, 70.0, 80.0, 90.0, 100.0],
    'clf__solver': ['newton-cg', 'saga', 'lbfgs']
    }, 
    {'vect__ngram_range': [(1,1)],
        'vect__stop_words': [None],
        'vect__tokenizer': [str.split],
        'vect__use_idf':[False],
        'vect__norm': [None],
        'clf__penalty': ['l2'],
        'clf__C': [1.0, 10.0, 15.0, 20.0, 30.0, 40.0, 
            50.0, 60.0, 70.0, 80.0, 90.0, 100.0],
        'clf__solver': ['newton-cg', 'saga', 'lbfgs']
        }
    ]

In [10]:
lr_tfidf = Pipeline([
    ('vect', tfidf),
    ('clf', LogisticRegression(
        random_state=42,
        multi_class='multinomial',
        class_weight={-1:3.,
                     0:1.5,
                     1:1.5}
    )
    )
])

In [11]:
gs_lr_tfidf = GridSearchCV(estimator=lr_tfidf,
        param_grid=param_grid,
        scoring=['accuracy', 'f1_micro'],
        cv=5,
        verbose=2,
        refit='f1_micro',
        n_jobs=-1)

In [12]:
gs_lr_tfidf.fit(X_train, y_train)

Fitting 5 folds for each of 72 candidates, totalling 360 fits
[CV] clf__C=1.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=1.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=1.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=1.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=1.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=1.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[



[CV]  clf__C=1.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.4min
[CV] clf__C=10.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=1.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.5min
[CV] clf__C=10.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=1.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.5min
[CV] clf__C=10.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=1.0, clf__penalty=l2, 

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  5.0min


[CV] clf__C=10.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=10.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV] clf__C=10.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=10.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.0min
[CV] clf__C=15.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=10.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.1min
[CV] clf__C=15.



[CV]  clf__C=10.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.6min
[CV] clf__C=15.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=10.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.5min
[CV] clf__C=15.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=10.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.6min
[CV] clf__C=15.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=10.0, clf



[CV]  clf__C=15.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.1min
[CV] clf__C=15.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=15.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.7min
[CV] clf__C=20.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=15.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.8min
[CV] clf__C=20.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=15.0, cl



[CV]  clf__C=15.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.3min
[CV] clf__C=20.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=15.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.5min
[CV] clf__C=20.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=15.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.7min
[CV] clf__C=20.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=15.0



[CV]  clf__C=20.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 1.4min
[CV] clf__C=20.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=20.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.8min
[CV] clf__C=20.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=20.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.8min
[CV] clf__C=30.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=20.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV] clf__C=30.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=20.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.3min
[CV] clf__C=30.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=20.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV] clf__C=30.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=20.0



[CV]  clf__C=30.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.7min
[CV] clf__C=30.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=30.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.8min
[CV] clf__C=40.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=30.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV]  clf__C=30.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV] clf__C=40.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=40.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=30.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.5min
[CV] clf__C=40.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=30.0



[CV]  clf__C=40.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.8min
[CV] clf__C=40.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=40.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV] clf__C=50.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=40.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV] clf__C=50.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=40.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.8min
[CV] clf__C=50.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=40.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 1.3min
[CV] clf__C=50.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=40.0, cl



[CV]  clf__C=50.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.8min
[CV] clf__C=50.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=50.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.7min
[CV] clf__C=60.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=50.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV] clf__C=60.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=50.0, clf__pe



[CV]  clf__C=60.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.8min
[CV] clf__C=60.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=60.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV] clf__C=70.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=60.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.8min
[CV] clf__C=70.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=60.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.8min
[CV]  clf__C=60.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV] clf__C=



[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.0min
[CV] clf__C=70.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.0min
[CV] clf__C=70.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 1.3min
[CV] clf__C=80.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.0min
[CV] clf__C=80.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV] clf__C=80.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.0min
[CV] clf__C=80.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 1.4min
[CV] clf__C=80.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=70.0, cl



[CV]  clf__C=80.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.7min


[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed: 38.0min


[CV] clf__C=80.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=80.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV] clf__C=90.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=80.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.8min
[CV] clf__C=90.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=80.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.8min
[CV] clf__C=90.0, clf__pen



[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 1.3min
[CV] clf__C=90.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV] clf__C=90.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.0min
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.0min
[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 1.4min
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 1.5min
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=90.0, clf



[CV]  clf__C=100.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 1.4min
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=100.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.0min
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=100.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.1min
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=100.0, clf__pe



[CV]  clf__C=100.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.8min
[CV] clf__C=1.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=100.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV] clf__C=1.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=100.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.9min
[CV] clf__C=1.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=100.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 1.5min
[CV] clf__C=1.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, ve



[CV]  clf__C=1.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.0min
[CV] clf__C=1.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=1.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.0min
[CV] clf__C=1.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=1.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.3min
[CV] clf__C=1.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=1.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 2.9min
[CV] clf__C=10.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=1.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.4min
[CV] clf__C=10.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=1.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, t



[CV]  clf__C=1.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 2.9min
[CV]  clf__C=1.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 5.1min
[CV] clf__C=10.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__C=10.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=1.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.4m



[CV]  clf__C=10.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 2.7min
[CV] clf__C=10.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=10.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 2.7min
[CV]  clf__C=10.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 2.8min
[CV] clf__C=15.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__C=15.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=10.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, tota



[CV]  clf__C=15.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.7min
[CV] clf__C=15.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=15.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.8min
[CV] clf__C=15.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=10.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=10.9min
[CV] clf__C=15.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=10.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, tota



[CV]  clf__C=15.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.5min
[CV] clf__C=15.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=15.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.3min
[CV]  clf__C=15.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.5min
[CV] clf__C=15.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__C=15.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=15.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.6



[CV]  clf__C=15.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.6min
[CV] clf__C=20.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=15.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.3min
[CV] clf__C=20.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=15.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, to



[CV]  clf__C=20.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.5min
[CV] clf__C=20.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=20.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.6min
[CV] clf__C=20.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=15.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=1



[CV]  clf__C=20.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.3min
[CV] clf__C=20.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=20.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.4min
[CV] clf__C=20.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=20.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.5



[CV]  clf__C=20.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.5min
[CV] clf__C=30.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=20.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.3min
[CV] clf__C=30.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=20.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, tot



[CV]  clf__C=30.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.7min
[CV] clf__C=30.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=30.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.7min
[CV] clf__C=30.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=30.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.8min
[CV] clf__C=30.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=30.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.8min
[CV] clf__C=30.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=20.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=17



[CV]  clf__C=30.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.5min
[CV] clf__C=40.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=30.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.6min
[CV] clf__C=40.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=30.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, to



[CV]  clf__C=40.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.7min
[CV] clf__C=40.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=40.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.6min
[CV] clf__C=40.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=30.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=19.



[CV]  clf__C=30.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=21.3min
[CV] clf__C=40.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=40.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.6min
[CV] clf__C=40.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=40.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 



[CV]  clf__C=40.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.7min
[CV] clf__C=50.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=40.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.6min
[CV] clf__C=50.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=40.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, to



[CV]  clf__C=50.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.7min
[CV] clf__C=50.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=50.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.8min
[CV] clf__C=50.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=50.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.8min
[CV] clf__C=50.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=50.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.7min
[CV] clf__C=50.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=50.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.7min
[CV] clf__C=50.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=50.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.8min
[CV] clf__C=50.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=40.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=



[CV]  clf__C=60.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.7min
[CV] clf__C=60.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=60.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.8min
[CV] clf__C=60.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=50.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=23.



[CV]  clf__C=60.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.5min
[CV] clf__C=60.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=60.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.7min
[CV] clf__C=60.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=50.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=2



[CV]  clf__C=60.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.6min
[CV] clf__C=60.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=60.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.6min
[CV] clf__C=70.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=60.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=



[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.8min
[CV] clf__C=70.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.7min
[CV] clf__C=70.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.8min
[CV] clf__C=70.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.6min
[CV] clf__C=70.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.7min
[CV] clf__C=70.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=60.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=



[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.8min
[CV] clf__C=70.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.7min
[CV] clf__C=70.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.7



[CV]  clf__C=80.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.9min
[CV] clf__C=80.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=28.0min
[CV] clf__C=80.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=80.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.8min
[CV] clf__C=80.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=80.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.7min
[CV] clf__C=80.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=70.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=31.7min
[CV] clf__C=80.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=80.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.4min
[CV] clf__C=80.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=80.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=



[CV]  clf__C=80.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.6min
[CV] clf__C=80.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=80.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.6min
[CV] clf__C=90.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=80.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=



[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.9min
[CV] clf__C=90.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.7min
[CV] clf__C=90.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.7min
[CV] clf__C=90.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.7min
[CV] clf__C=90.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.7min
[CV] clf__C=90.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.7min
[CV] clf__C=90.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.7



[CV]  clf__C=100.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.9min
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=80.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=43.8min
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, t



[CV]  clf__C=100.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.8min
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=33.2min
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=100.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.4min
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=34.4min
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=100.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.5min
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=100.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.4min
[CV] clf__C=100.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=100.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.6min
[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=34.9min
[CV]  clf__C=100.0, clf__penalty=l2, clf__solver=lbfgs, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.5min
[CV]  clf__C=100.0, clf__penalty=l2, clf__solver=saga, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.5min
[CV]  clf__C=90.0, clf__penalty=l2, clf__solver=newton-cg, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' 

[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed: 211.2min finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(memory=None,
     steps=[('vect', TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=False, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), norm='l2', preprocessor=None, smooth_idf=True,
 ...
          random_state=42, solver='liblinear', tol=0.0001, verbose=0,
          warm_start=False))]),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid=[{'vect__ngram_range': [(1, 1)], 'vect__stop_words': [None], 'vect__tokenizer': [<method 'split' of 'str' objects>], 'clf__penalty': ['l2'], 'clf__C': [1.0, 10.0, 15.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0], 'clf__solver': ['newton-cg', 'saga', 'lbfgs']}, {'vect__ngram_ra...0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0], 'clf__solver': ['newton-cg', 'saga', 'lbfgs']}],
       pre_dispatch='2*n_jobs', refit='f1_micro',
   

In [13]:
best_parameters = gs_lr_tfidf.best_params_
best_estimator = gs_lr_tfidf.best_estimator_
result = gs_lr_tfidf.cv_results_

In [14]:
best_parameters

{'clf__C': 15.0,
 'clf__penalty': 'l2',
 'clf__solver': 'lbfgs',
 'vect__ngram_range': (1, 1),
 'vect__norm': None,
 'vect__stop_words': None,
 'vect__tokenizer': <method 'split' of 'str' objects>,
 'vect__use_idf': False}

In [15]:
# PERFORMANCE MEASURE
#####################
# Stratified k-fold CV
skfolds = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

for train_index, test_index in skfolds.split(X_train, y_train):
    clone_estimator = clone(best_estimator)
    X_train_folds = X_train[train_index]
    y_train_folds = (y_train[train_index])
    X_test_fold = X_train[test_index]
    y_test_fold = (y_train[test_index])

    clone_estimator.fit(X_train_folds, y_train_folds)
    y_pred = clone_estimator.predict(X_test_fold)
    n_correct = sum(y_pred == y_test_fold)
    print(n_correct / len(y_pred))

0.9850573165301494
0.9848914754369977
0.9851089387788916
0.9855746328935651
0.9852743201540916


In [16]:
# Dumb Classifier
from sklearn.base import BaseEstimator
class DumbClassifier(BaseEstimator):
    def fit(self, X, y=None):
        pass
    def predict(self, X):
        return np.zeros((len(X), 1), dtype=bool)
dumb = DumbClassifier()
cross_val_score(dumb, X_train, y_train, cv=5, scoring='accuracy')

array([0.22451537, 0.22286886, 0.22508725, 0.22405169, 0.22427952])

In [17]:
y_train_pred = cross_val_predict(best_estimator, X_train, y_train, cv=5,
        verbose=2, n_jobs=-1)

conf_mx = confusion_matrix(y_train, y_train_pred)

[Parallel(n_jobs=-1)]: Done   3 out of   5 | elapsed:   48.2s remaining:   32.1s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:   48.2s finished


In [18]:
conf_mx

array([[  8705,   1173,    175],
       [  1325, 104500,   2408],
       [   298,   1627, 362626]])

In [20]:
(precision_score(y_train, y_train_pred, average='micro'),
        recall_score(y_train, y_train_pred, average='micro'))

(0.9854899272425269, 0.9854899272425269)

In [21]:
f1_score(y_train, y_train_pred, average='micro')

0.9854899272425269

In [22]:
# TEST MODEL ON TEST DATA
#########################
y_test_pred = cross_val_predict(best_estimator, X_test, y_test, cv=5,
        verbose=2, n_jobs=-1)

conf_mx = confusion_matrix(y_test, y_test_pred)

[Parallel(n_jobs=-1)]: Done   3 out of   5 | elapsed:   20.6s remaining:   13.7s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:   21.0s finished


In [23]:
conf_mx

array([[ 2010,   447,    56],
       [  373, 25922,   764],
       [   89,   714, 90335]])

In [24]:
(precision_score(y_test, y_test_pred, average='micro'),
        recall_score(y_test, y_test_pred, average='micro'))

(0.9797614116477508, 0.9797614116477508)

In [25]:
f1_score(y_test, y_test_pred, average='micro')

0.9797614116477508

In [26]:
# save fitted model to file
joblib.dump(best_estimator, './training/sentiment_logreg_classweight.pkl')

['./training/sentiment_logreg_classweight.pkl']

# 2 - Decision Trees

In [27]:
from sklearn.tree import DecisionTreeClassifier

In [69]:
# The text data is already cleaned
inputfile = './csvfiles/output_sentiment.csv'
review = pd.read_csv(inputfile, skip_blank_lines=False)
review = review[['text', 'ovsentiment']]
# exclude NaN in 'text' column (count: 11248)
review = review[~pd.isna(review['text'])]
X = review['text'].values
y = review['ovsentiment'].values

# len(review[review['ovsentiment'] == -1])
# total number of -1 (negative review) is 12566

# len(review[review['ovsentiment'] == 0])
# total number of 0 (neutral) is 135292

# len(review[review['ovsentiment'] == 1])
# total number of 1 (positive review) is 455689

# splitting the dataset into the training set and test set
# stratify=y --> stratified sampling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, 
        random_state=42, shuffle=True, stratify=y)

In [70]:
X_train.shape

(301773,)

In [71]:
tfidf = TfidfVectorizer(strip_accents=None, lowercase=False, preprocessor=None)
param_grid = [{'vect__ngram_range': [(1,1)],
    'vect__stop_words': [None],
    'vect__tokenizer': [str.split],
    'clf__max_depth': [5, 6, 7],
    'clf__min_samples_leaf': [4, 5, 6],
    'clf__min_samples_split': [5, 6, 7],
    'clf__max_features': ['auto', 'log2', 'sqrt']
    }, 
    {'vect__ngram_range': [(1,1)],
        'vect__stop_words': [None],
        'vect__tokenizer': [str.split],
        'vect__use_idf':[False],
        'vect__norm': [None],
        'clf__max_depth': [5, 6, 7],
        'clf__min_samples_leaf': [4, 5, 6],
        'clf__min_samples_split': [5, 6, 7],
        'clf__max_features': ['auto', 'log2','sqrt']
        }
    ]

In [72]:
lr_tfidf = Pipeline([
    ('vect', tfidf),
    ('clf', DecisionTreeClassifier(
        random_state=42,
        class_weight={-1:5.,
                     0:1,
                     1:1}
    )
    )
])

In [73]:
gs_lr_tfidf = GridSearchCV(estimator=lr_tfidf,
        param_grid=param_grid,
        scoring=['accuracy', 'f1_micro'],
        cv=5,
        verbose=2,
        refit='f1_micro',
        n_jobs=-1)

In [74]:
gs_lr_tfidf.fit(X_train, y_train)

Fitting 5 folds for each of 162 candidates, totalling 810 fits
[CV] clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None

[CV]  clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.4s
[CV]  clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.4s
[CV]  clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV]  clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV] clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, v

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:   15.9s


[CV] clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV]  clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' obje

[CV] clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV]  clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.2s
[CV] clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split'

[CV]  clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.1s
[CV] clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV] clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split'

[CV] clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.2s
[CV] clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.4s
[CV]  clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split

[CV] clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV] clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV] clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split'

[CV]  clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.2s
[CV] clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.2s
[CV] clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split'

[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed:  1.6min


[CV] clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV] clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.2s
[CV]  clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split

[CV] clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV] clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.5s
[CV] clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split'

[CV] clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.1s
[CV] clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.2s
[CV] clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split'

[CV]  clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.2s
[CV] clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.2s
[CV]  clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.4s
[CV] clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=

[CV] clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.2s
[CV]  clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.6s
[CV] clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split'

[CV] clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.5s
[CV] clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.5s
[CV] clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split'

[CV] clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.2s
[CV] clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.2s
[CV] clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split'

[CV]  clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV] clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV] clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split'

[CV] clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.2s
[CV] clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.2s
[CV] clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split'

[CV]  clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV]  clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.2s
[CV] clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split'

[CV]  clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.5s
[CV] clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV] clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split'

[Parallel(n_jobs=-1)]: Done 341 tasks      | elapsed:  3.8min


[CV]  clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV]  clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV] clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split

[CV]  clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.4s
[CV] clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV] clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split

[CV]  clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV]  clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.2s
[CV] clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split

[CV]  clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.3s
[CV]  clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   3.2s
[CV] clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=6, vect

[CV] clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=5, clf__max_features

[CV]  clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.2s
[CV]  clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.4s
[CV] clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=5, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV]  clf__

[CV] clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.0s
[CV]  clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.2s
[CV]  clf__max_depth=5, cl

[CV] clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.2s
[CV] clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=5, clf__max_features

[CV]  clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.3s
[CV] clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV]  clf__max_depth=5, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.2s
[CV] clf__m

[CV]  clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=5, cl

[CV] clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.2s
[CV] clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=5, clf__max_features

[CV]  clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=5, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=5, cl

[CV]  clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.2s
[CV] clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.2s
[CV] clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=6, cl

[CV]  clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV]  clf__max_depth=6, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.2s
[CV] clf__m

[CV]  clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.3s
[CV] clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=6, cl

[CV]  clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=6, cl

[CV]  clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.0s
[CV] clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 


[Parallel(n_jobs=-1)]: Done 624 tasks      | elapsed:  6.7min


[CV]  clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=6, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=6, cl

[CV]  clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV]  clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.2s
[CV] clf__m

[CV] clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=6, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=6, clf__max_features

[CV]  clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=7, cl

[CV]  clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=7, cl

[CV]  clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.2s
[CV] clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=7, clf__max_features=auto, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=7, cl

[CV] clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.2s
[CV] clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=7, clf__max_features

[CV] clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.2s
[CV] clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=7, clf__max_features

[CV]  clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.3s
[CV] clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.3s
[CV]  clf__max_depth=7, clf__max_features=log2, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__m

[CV] clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=6, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV]  clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=4, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.1s
[CV] clf__max_depth=7, clf

[CV]  clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=5, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.2s
[CV]  clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=5, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   3.3s
[CV] clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__max_depth=7, clf__max_features=sqrt, clf__min_samples_leaf=6, clf__min_samples_split=7, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__max_depth=7, cl

[Parallel(n_jobs=-1)]: Done 810 out of 810 | elapsed:  8.6min finished


GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(memory=None,
     steps=[('vect', TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=False, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), norm='l2', preprocessor=None, smooth_idf=True,
 ...        min_weight_fraction_leaf=0.0, presort=False, random_state=42,
            splitter='best'))]),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid=[{'vect__ngram_range': [(1, 1)], 'vect__stop_words': [None], 'vect__tokenizer': [<method 'split' of 'str' objects>], 'clf__max_depth': [5, 6, 7], 'clf__min_samples_leaf': [4, 5, 6], 'clf__min_samples_split': [5, 6, 7], 'clf__max_features': ['auto', 'log2', 'sqrt']}, {'vect__ngram_range': ...af': [4, 5, 6], 'clf__min_samples_split': [5, 6, 7], 'clf__max_features': ['auto', 'log2', 'sqrt']}],
       pre_dispatch='2*n_jobs', refit='f1_micro',
   

In [75]:
best_parameters = gs_lr_tfidf.best_params_
best_estimator = gs_lr_tfidf.best_estimator_
result = gs_lr_tfidf.cv_results_

In [76]:
best_parameters

{'clf__max_depth': 7,
 'clf__max_features': 'auto',
 'clf__min_samples_leaf': 4,
 'clf__min_samples_split': 5,
 'vect__ngram_range': (1, 1),
 'vect__stop_words': None,
 'vect__tokenizer': <method 'split' of 'str' objects>}

In [77]:
# PERFORMANCE MEASURE
#####################
# Stratified k-fold CV
from sklearn.model_selection import StratifiedKFold
from sklearn.base import clone

skfolds = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

for train_index, test_index in skfolds.split(X_train, y_train):
    clone_estimator = clone(best_estimator)
    X_train_folds = X_train[train_index]
    y_train_folds = (y_train[train_index])
    X_test_fold = X_train[test_index]
    y_test_fold = (y_train[test_index])

    clone_estimator.fit(X_train_folds, y_train_folds)
    y_pred = clone_estimator.predict(X_test_fold)
    n_correct = sum(y_pred == y_test_fold)
    print(n_correct / len(y_pred))

0.7551858970110676
0.7549995857841106
0.7546350758015078
0.7588560824468966
0.7562010173479363


In [78]:
y_train_pred = cross_val_predict(best_estimator, X_train, y_train, cv=5,
        verbose=2, n_jobs=-1)

conf_mx = confusion_matrix(y_train, y_train_pred)

[Parallel(n_jobs=-1)]: Done   3 out of   5 | elapsed:    2.2s remaining:    1.5s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    2.4s finished


In [79]:
conf_mx

array([[    98,      7,   6178],
       [   108,   1112,  66426],
       [   193,    277, 227374]])

In [80]:
(precision_score(y_train, y_train_pred, average='micro'),
        recall_score(y_train, y_train_pred, average='micro'))

(0.7574700188552322, 0.7574700188552322)

In [81]:
f1_score(y_train, y_train_pred, average='micro')

0.7574700188552322

In [82]:
# TEST MODEL ON TEST DATA
#########################
y_test_pred = cross_val_predict(best_estimator, X_test, y_test, cv=5,
        verbose=2, n_jobs=-1)

conf_mx = confusion_matrix(y_test, y_test_pred)

[Parallel(n_jobs=-1)]: Done   3 out of   5 | elapsed:    2.2s remaining:    1.5s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    2.4s finished


In [83]:
conf_mx

array([[   186,      0,   6097],
       [   252,    155,  67239],
       [   301,     28, 227516]])

In [84]:
(precision_score(y_test, y_test_pred, average='micro'),
        recall_score(y_test, y_test_pred, average='micro'))

(0.7550584212026218, 0.7550584212026218)

In [85]:
f1_score(y_test, y_test_pred, average='micro')

0.7550584212026218

In [86]:
joblib.dump(best_estimator, './training/sentiment_destree_classweight.pkl')

['./training/sentiment_destree_classweight.pkl']

# 3 - SGDClassifier behaves like Linear SVC

In [90]:
# hence loss='hinge' and penalty='l2'

In [87]:
from sklearn.linear_model import SGDClassifier

In [88]:
# The text data is already cleaned
inputfile = './csvfiles/output_sentiment.csv'
review = pd.read_csv(inputfile, skip_blank_lines=False)
review = review[['text', 'ovsentiment']]
# exclude NaN in 'text' column (count: 11248)
review = review[~pd.isna(review['text'])]
X = review['text'].values
y = review['ovsentiment'].values

# len(review[review['ovsentiment'] == -1])
# total number of -1 (negative review) is 12566

# len(review[review['ovsentiment'] == 0])
# total number of 0 (neutral) is 135292

# len(review[review['ovsentiment'] == 1])
# total number of 1 (positive review) is 455689

# splitting the dataset into the training set and test set
# stratify=y --> stratified sampling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
        random_state=42, shuffle=True, stratify=y)

In [89]:
X_train.shape

(482837,)

In [91]:
tfidf = TfidfVectorizer(strip_accents=None, lowercase=False, preprocessor=None)
param_grid = [
        {'vect__ngram_range': [(1,1)],
            'vect__stop_words': [None],
            'vect__tokenizer': [str.split],
            'clf__loss': ['hinge'],
            'clf__penalty': ['l2'],
            'clf__max_iter': [200, 300, 400, 500, 600]
            }, 
        {'vect__ngram_range': [(1,1)],
            'vect__stop_words': [None],
            'vect__tokenizer': [str.split],
            'vect__use_idf':[False],
            'vect__norm': [None],
            'clf__loss': ['hinge'],
            'clf__penalty': ['l2'],
            'clf__max_iter': [200, 300, 400, 500, 600]
            }
    ]

lr_tfidf = Pipeline(
        [   ('vect', tfidf),
            ('clf', SGDClassifier(random_state=42, 
                                  class_weight={-1:3,
                                               0:1.5,
                                               1:1.5}))
            ]
        )

gs_lr_tfidf = GridSearchCV(estimator=lr_tfidf,
        param_grid=param_grid,
        scoring=['accuracy', 'f1_macro', 'f1_micro'],
        cv=5,
        verbose=2,
        refit='f1_micro',
        n_jobs=-1)

In [92]:
gs_lr_tfidf.fit(X_train, y_train)

best_parameters = gs_lr_tfidf.best_params_
best_estimator = gs_lr_tfidf.best_estimator_
result = gs_lr_tfidf.cv_results_

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] clf__loss=hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__loss=hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__loss=hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__loss=hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__loss=hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__loss=hinge, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' o

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  6.8min


[CV] clf__loss=hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=hinge, clf__max_iter=500, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 4.3min
[CV] clf__loss=hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=hinge, clf__max_iter=500, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 4.3min
[CV] clf__loss=hinge, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=hinge, clf__max_iter=500, clf__penalty=l2, vect__n

[CV] clf__loss=hinge, clf__max_iter=600, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=hinge, clf__max_iter=400, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.3min
[CV]  clf__loss=hinge, clf__max_iter=400, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.4min
[CV]  clf__loss=hinge, clf__max_iter=500, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 4.2min
[CV]  clf__loss=hinge, clf__max_iter=500, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objec

[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed: 16.2min finished


In [93]:
best_parameters

{'clf__loss': 'hinge',
 'clf__max_iter': 400,
 'clf__penalty': 'l2',
 'vect__ngram_range': (1, 1),
 'vect__norm': None,
 'vect__stop_words': None,
 'vect__tokenizer': <method 'split' of 'str' objects>,
 'vect__use_idf': False}

In [94]:
# PERFORMANCE MEASURE
#####################
# Stratified k-fold CV
from sklearn.model_selection import StratifiedKFold
from sklearn.base import clone

skfolds = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

for train_index, test_index in skfolds.split(X_train, y_train):
    clone_estimator = clone(best_estimator)
    X_train_folds = X_train[train_index]
    y_train_folds = (y_train[train_index])
    X_test_fold = X_train[test_index]
    y_test_fold = (y_train[test_index])

    clone_estimator.fit(X_train_folds, y_train_folds)
    y_pred = clone_estimator.predict(X_test_fold)
    n_correct = sum(y_pred == y_test_fold)
    print(n_correct / len(y_pred))

0.9753958309602461
0.9763482727197416
0.9759858338165852
0.9759024915601765
0.9760164032889422


In [95]:
# confusion matrix
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_predict

y_train_pred = cross_val_predict(best_estimator, X_train, y_train, cv=5,
        verbose=2, n_jobs=-1)

conf_mx = confusion_matrix(y_train, y_train_pred)

[Parallel(n_jobs=-1)]: Done   3 out of   5 | elapsed:  2.2min remaining:  1.5min
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:  2.2min finished


In [96]:
conf_mx

array([[  5238,   4105,    710],
       [   644, 102823,   4766],
       [   203,   1201, 363147]])

In [97]:
(precision_score(y_train, y_train_pred, average='micro'),
        recall_score(y_train, y_train_pred, average='micro'))

(0.975915267471217, 0.975915267471217)

In [98]:
f1_score(y_train, y_train_pred, average='micro')

0.975915267471217

In [99]:
# TEST MODEL ON TEST DATA
#########################
y_test_pred = cross_val_predict(best_estimator, X_test, y_test, cv=5,
        verbose=2, n_jobs=-1)

conf_mx = confusion_matrix(y_test, y_test_pred)

[Parallel(n_jobs=-1)]: Done   3 out of   5 | elapsed:   23.4s remaining:   15.6s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:   23.8s finished


In [100]:
conf_mx

array([[ 1305,  1039,   169],
       [  159, 25729,  1171],
       [   55,   302, 90781]])

In [101]:
(precision_score(y_test, y_test_pred, average='micro'),
        recall_score(y_test, y_test_pred, average='micro'))

(0.9760169000082843, 0.9760169000082843)

In [102]:
f1_score(y_test, y_test_pred, average='micro')

0.9760169000082843

In [103]:
joblib.dump(best_estimator, './training/sentiment_svm_classweight.pkl')

['./training/sentiment_svm_classweight.pkl']

# 4 - SGD

In [8]:
from sklearn.linear_model import SGDClassifier

In [9]:
tfidf = TfidfVectorizer(strip_accents=None, lowercase=False, preprocessor=None)
param_grid = [
        {'vect__ngram_range': [(1,1)],
            'vect__stop_words': [None],
            'vect__tokenizer': [str.split],
            'clf__loss': ['hinge', 'modified_huber', 'squared_hinge', 'perceptron', 'log'],
            'clf__penalty': ['l2', 'l1', 'elasticnet'],
            'clf__max_iter': [200, 300, 400, 500, 600]
            }, 
        {'vect__ngram_range': [(1,1)],
            'vect__stop_words': [None],
            'vect__tokenizer': [str.split],
            'vect__use_idf':[False],
            'vect__norm': [None],
            'clf__loss': ['hinge', 'modified_huber', 'squared_hinge', 'perceptron', 'log'],
            'clf__penalty': ['l2', 'l1', 'elasticnet'],
            'clf__max_iter': [200, 300, 400, 500, 600]
            }
    ]

lr_tfidf = Pipeline(
        [   ('vect', tfidf),
            ('clf', SGDClassifier(random_state=42, 
                                  class_weight={-1:3,
                                               0:1.5,
                                               1:1.5}))
            ]
        )

gs_lr_tfidf = GridSearchCV(estimator=lr_tfidf,
        param_grid=param_grid,
        scoring=['accuracy', 'f1_macro', 'f1_micro'],
        cv=5,
        verbose=2,
        refit='f1_micro',
        n_jobs=-1)

In [10]:
gs_lr_tfidf.fit(X_train, y_train)

best_parameters = gs_lr_tfidf.best_params_
best_estimator = gs_lr_tfidf.best_estimator_
result = gs_lr_tfidf.cv_results_

Fitting 5 folds for each of 150 candidates, totalling 750 fits
[CV] clf__loss=hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__loss=hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__loss=hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__loss=hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__loss=hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__loss=hinge, clf__max_iter=200, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str'

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  4.9min


[CV] clf__loss=hinge, clf__max_iter=300, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=hinge, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.6min
[CV] clf__loss=hinge, clf__max_iter=300, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=hinge, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.6min
[CV] clf__loss=hinge, clf__max_iter=400, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=hinge, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total

[CV]  clf__loss=hinge, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 4.2min
[CV] clf__loss=hinge, clf__max_iter=500, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=hinge, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 4.2min
[CV]  clf__loss=hinge, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 4.2min
[CV] clf__loss=hinge, clf__max_iter=500, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__loss=hinge, clf__max_iter=500, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 's

[CV] clf__loss=modified_huber, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=hinge, clf__max_iter=600, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 6.1min
[CV] clf__loss=modified_huber, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=hinge, clf__max_iter=600, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 6.1min
[CV] clf__loss=modified_huber, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=modified_huber, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of '

[CV]  clf__loss=modified_huber, clf__max_iter=200, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.2min
[CV] clf__loss=modified_huber, clf__max_iter=300, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=modified_huber, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.6min
[CV] clf__loss=modified_huber, clf__max_iter=300, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=modified_huber, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.6min
[CV] clf__loss=modified_huber, clf__max_iter=300, clf__penalty=elasticnet, vect__ngram_range=(1,

[CV] clf__loss=modified_huber, clf__max_iter=500, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=modified_huber, clf__max_iter=400, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 4.2min
[CV] clf__loss=modified_huber, clf__max_iter=500, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=modified_huber, clf__max_iter=400, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 4.2min
[CV] clf__loss=modified_huber, clf__max_iter=500, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=modified_huber, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__toke

[CV]  clf__loss=modified_huber, clf__max_iter=600, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 5.2min
[CV] clf__loss=modified_huber, clf__max_iter=600, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=modified_huber, clf__max_iter=600, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 5.2min


[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed: 48.8min


[CV] clf__loss=modified_huber, clf__max_iter=600, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=modified_huber, clf__max_iter=600, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 5.2min
[CV] clf__loss=squared_hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=modified_huber, clf__max_iter=600, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 5.2min
[CV] clf__loss=squared_hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=modified_huber, clf__max_iter=600, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokeni

[CV]  clf__loss=modified_huber, clf__max_iter=600, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 6.2min
[CV] clf__loss=squared_hinge, clf__max_iter=300, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=squared_hinge, clf__max_iter=200, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.0min
[CV] clf__loss=squared_hinge, clf__max_iter=300, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=squared_hinge, clf__max_iter=200, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.1min
[CV] clf__loss=squared_hinge, clf__max_iter=300, clf__penalty=elasticnet, vect__ngram_range=(1, 1), 

[CV] clf__loss=squared_hinge, clf__max_iter=500, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=squared_hinge, clf__max_iter=400, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 4.1min
[CV]  clf__loss=squared_hinge, clf__max_iter=400, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 4.1min
[CV] clf__loss=squared_hinge, clf__max_iter=500, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__loss=squared_hinge, clf__max_iter=500, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=squared_hinge, clf__max_iter=400, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 

[CV]  clf__loss=squared_hinge, clf__max_iter=500, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 5.0min
[CV] clf__loss=squared_hinge, clf__max_iter=600, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=squared_hinge, clf__max_iter=600, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 5.0min
[CV] clf__loss=squared_hinge, clf__max_iter=600, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=squared_hinge, clf__max_iter=500, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 5.1min
[CV] clf__loss=squared_hinge, clf__max_iter=600, clf__penalty=elasticnet, vect__ngram_range=(

[CV] clf__loss=perceptron, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=perceptron, clf__max_iter=200, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.0min
[CV] clf__loss=perceptron, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=squared_hinge, clf__max_iter=600, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 6.0min
[CV] clf__loss=perceptron, clf__max_iter=300, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=perceptron, clf__max_iter=200, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split'

[CV] clf__loss=perceptron, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=perceptron, clf__max_iter=400, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.3min
[CV] clf__loss=perceptron, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=perceptron, clf__max_iter=400, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.3min
[CV] clf__loss=perceptron, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=perceptron, clf__max_iter=400, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<m

[CV] clf__loss=perceptron, clf__max_iter=600, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=perceptron, clf__max_iter=500, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 5.0min
[CV] clf__loss=perceptron, clf__max_iter=600, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=perceptron, clf__max_iter=500, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 5.0min
[CV] clf__loss=perceptron, clf__max_iter=600, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=perceptron, clf__max_iter=500, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<m

[CV] clf__loss=log, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=perceptron, clf__max_iter=600, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 5.9min
[CV] clf__loss=log, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=log, clf__max_iter=200, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.6min
[CV] clf__loss=log, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=log, clf__max_iter=200, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 2.6min
[CV]

[CV] clf__loss=log, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=log, clf__max_iter=300, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 3.8min
[CV] clf__loss=log, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=log, clf__max_iter=400, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 4.4min
[CV] clf__loss=log, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=log, clf__max_iter=400, clf__penalty=l2, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, t

[Parallel(n_jobs=-1)]: Done 341 tasks      | elapsed: 118.1min


[CV] clf__loss=log, clf__max_iter=500, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=log, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 5.2min
[CV] clf__loss=log, clf__max_iter=500, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=log, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 5.1min
[CV] clf__loss=log, clf__max_iter=500, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__loss=log, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 5.

[CV]  clf__loss=log, clf__max_iter=600, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 7.4min
[CV] clf__loss=hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.7min
[CV] clf__loss=hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=log, clf__max_iter=600, clf__penalty=l1, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 7.5min
[CV] clf__loss=hinge, clf__max_iter=200, clf__penalty=l

[CV]  clf__loss=hinge, clf__max_iter=200, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 2.1min
[CV] clf__loss=hinge, clf__max_iter=300, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=hinge, clf__max_iter=200, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 2.1min
[CV] clf__loss=hinge, clf__max_iter=300, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=log, clf__max_iter=600, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total

[CV]  clf__loss=hinge, clf__max_iter=400, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.4min
[CV] clf__loss=hinge, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=hinge, clf__max_iter=400, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.4min
[CV] clf__loss=hinge, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=hinge, clf__max_iter=400, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' obje

[CV]  clf__loss=hinge, clf__max_iter=500, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 5.0min
[CV] clf__loss=hinge, clf__max_iter=600, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=hinge, clf__max_iter=500, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 5.0min
[CV] clf__loss=hinge, clf__max_iter=600, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=hinge, clf__max_iter=500, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_

[CV]  clf__loss=hinge, clf__max_iter=600, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 6.0min
[CV] clf__loss=modified_huber, clf__max_iter=200, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=hinge, clf__max_iter=600, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 6.1min
[CV] clf__loss=modified_huber, clf__max_iter=200, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=modified_huber, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 's

[CV] clf__loss=modified_huber, clf__max_iter=300, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=modified_huber, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 2.6min
[CV] clf__loss=modified_huber, clf__max_iter=300, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=modified_huber, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 2.6min
[CV] clf__loss=modified_huber, clf__max_iter=300, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words

[CV]  clf__loss=modified_huber, clf__max_iter=400, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 4.3min
[CV] clf__loss=modified_huber, clf__max_iter=500, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=modified_huber, clf__max_iter=400, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 4.3min
[CV] clf__loss=modified_huber, clf__max_iter=500, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=modified_huber, clf__max_iter=400, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=

[CV] clf__loss=modified_huber, clf__max_iter=600, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=modified_huber, clf__max_iter=500, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 5.4min
[CV] clf__loss=modified_huber, clf__max_iter=600, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=modified_huber, clf__max_iter=500, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 5.4min
[CV] clf__loss=modified_huber, clf__max_iter=600, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, v

[CV]  clf__loss=modified_huber, clf__max_iter=600, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 6.5min
[CV] clf__loss=squared_hinge, clf__max_iter=200, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=squared_hinge, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 1.7min
[CV] clf__loss=squared_hinge, clf__max_iter=200, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=squared_hinge, clf__max_iter=200, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=No

[CV]  clf__loss=squared_hinge, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 2.5min
[CV] clf__loss=squared_hinge, clf__max_iter=400, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=squared_hinge, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 2.6min
[CV] clf__loss=squared_hinge, clf__max_iter=400, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=squared_hinge, clf__max_iter=300, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<meth

[CV] clf__loss=squared_hinge, clf__max_iter=500, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=squared_hinge, clf__max_iter=400, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 4.1min
[CV] clf__loss=squared_hinge, clf__max_iter=500, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=squared_hinge, clf__max_iter=400, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 4.2min
[CV] clf__loss=squared_hinge, clf__max_iter=500, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<metho

[CV]  clf__loss=squared_hinge, clf__max_iter=500, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 5.5min
[CV] clf__loss=squared_hinge, clf__max_iter=600, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=squared_hinge, clf__max_iter=500, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 5.4min
[CV] clf__loss=squared_hinge, clf__max_iter=600, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=squared_hinge, clf__max_iter=600, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=Non

[CV]  clf__loss=squared_hinge, clf__max_iter=600, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 6.4min
[CV] clf__loss=perceptron, clf__max_iter=200, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__loss=perceptron, clf__max_iter=200, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=perceptron, clf__max_iter=200, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 2.1min
[CV] clf__loss=perceptron, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__toke

[CV]  clf__loss=perceptron, clf__max_iter=300, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.1min
[CV] clf__loss=perceptron, clf__max_iter=400, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=perceptron, clf__max_iter=300, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.1min
[CV] clf__loss=perceptron, clf__max_iter=400, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=perceptron, clf__max_iter=300, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of '

[Parallel(n_jobs=-1)]: Done 624 tasks      | elapsed: 219.7min


[CV] clf__loss=perceptron, clf__max_iter=400, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=perceptron, clf__max_iter=300, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.0min
[CV] clf__loss=perceptron, clf__max_iter=400, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=perceptron, clf__max_iter=300, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.1min
[CV] clf__loss=perceptron, clf__max_iter=400, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'spli

[CV]  clf__loss=perceptron, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 4.1min
[CV] clf__loss=perceptron, clf__max_iter=500, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=perceptron, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 4.1min
[CV] clf__loss=perceptron, clf__max_iter=500, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=perceptron, clf__max_iter=500, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect

[CV] clf__loss=perceptron, clf__max_iter=600, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=perceptron, clf__max_iter=600, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 5.1min
[CV] clf__loss=log, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=perceptron, clf__max_iter=600, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 5.0min
[CV] clf__loss=log, clf__max_iter=200, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' ob

[CV] clf__loss=log, clf__max_iter=300, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=log, clf__max_iter=200, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 2.6min
[CV] clf__loss=log, clf__max_iter=300, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=log, clf__max_iter=200, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 2.7min
[CV] clf__loss=log, clf__max_iter=300, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 


[CV]  clf__loss=log, clf__max_iter=300, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.9min
[CV] clf__loss=log, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=log, clf__max_iter=300, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 3.9min
[CV] clf__loss=log, clf__max_iter=400, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=log, clf__max_iter=400, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str

[CV]  clf__loss=log, clf__max_iter=500, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 5.7min
[CV] clf__loss=log, clf__max_iter=600, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=log, clf__max_iter=500, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 5.7min
[CV] clf__loss=log, clf__max_iter=600, clf__penalty=l2, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__loss=log, clf__max_iter=500, clf__penalty=l1, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False,

[CV]  clf__loss=log, clf__max_iter=600, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 6.5min
[CV]  clf__loss=log, clf__max_iter=600, clf__penalty=elasticnet, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 5.9min


[Parallel(n_jobs=-1)]: Done 750 out of 750 | elapsed: 272.2min finished


In [11]:
best_parameters

{'clf__loss': 'modified_huber',
 'clf__max_iter': 600,
 'clf__penalty': 'l1',
 'vect__ngram_range': (1, 1),
 'vect__norm': None,
 'vect__stop_words': None,
 'vect__tokenizer': <method 'split' of 'str' objects>,
 'vect__use_idf': False}

In [12]:
skfolds = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

for train_index, test_index in skfolds.split(X_train, y_train):
    clone_estimator = clone(best_estimator)
    X_train_folds = X_train[train_index]
    y_train_folds = (y_train[train_index])
    X_test_fold = X_train[test_index]
    y_test_fold = (y_train[test_index])

    clone_estimator.fit(X_train_folds, y_train_folds)
    y_pred = clone_estimator.predict(X_test_fold)
    n_correct = sum(y_pred == y_test_fold)
    print(n_correct / len(y_pred))

0.9841356957201587
0.9837213155496645
0.9839387788915582
0.9834413768821324
0.9846633390634385


In [13]:
y_train_pred = cross_val_predict(best_estimator, X_train, y_train, cv=5,
        verbose=2, n_jobs=-1)

conf_mx = confusion_matrix(y_train, y_train_pred)

[Parallel(n_jobs=-1)]: Done   3 out of   5 | elapsed:  4.1min remaining:  2.7min
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:  4.1min finished


In [14]:
conf_mx

array([[  8736,   1094,    223],
       [  1772, 104005,   2456],
       [   448,   1542, 362561]])

In [15]:
(precision_score(y_train, y_train_pred, average='micro'),
        recall_score(y_train, y_train_pred, average='micro'))

(0.9843943194079989, 0.9843943194079989)

In [16]:
f1_score(y_train, y_train_pred, average='micro')

0.9843943194079989

In [17]:
y_test_pred = cross_val_predict(best_estimator, X_test, y_test, cv=5,
        verbose=2, n_jobs=-1)

conf_mx = confusion_matrix(y_test, y_test_pred)

[Parallel(n_jobs=-1)]: Done   3 out of   5 | elapsed:   44.6s remaining:   29.7s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:   45.3s finished


In [18]:
conf_mx

array([[ 2104,   347,    62],
       [  555, 25777,   727],
       [  171,   612, 90355]])

In [19]:
(precision_score(y_test, y_test_pred, average='micro'),
        recall_score(y_test, y_test_pred, average='micro'))

(0.9795045977963714, 0.9795045977963714)

In [20]:
f1_score(y_test, y_test_pred, average='micro')

0.9795045977963714

In [21]:
joblib.dump(best_estimator, './training/sentiment_sgd_classweight.pkl')

['./training/sentiment_sgd_classweight.pkl']

# 5 - Passive Aggressive Classifier

In [22]:
from sklearn.linear_model import PassiveAggressiveClassifier

In [23]:
tfidf = TfidfVectorizer(strip_accents=None, lowercase=False, preprocessor=None)
param_grid = [
        {'vect__ngram_range': [(1,1)],
            'vect__stop_words': [None],
            'vect__tokenizer': [str.split],
            'clf__loss': ['hinge', 'squared_hinge'],
            'clf__C': [0.1, 0.5, 1.0, 1.5],
            'clf__shuffle': [True, False]
            }, 
        {'vect__ngram_range': [(1,1)],
            'vect__stop_words': [None],
            'vect__tokenizer': [str.split],
            'vect__use_idf':[False],
            'vect__norm': [None],
            'clf__loss': ['hinge', 'squared_hinge'],
            'clf__C': [0.1, 0.5, 1.0, 1.5],
            'clf__shuffle': [True, False]
            }
    ]

lr_tfidf = Pipeline(
        [   ('vect', tfidf),
            ('clf', PassiveAggressiveClassifier(random_state=42, class_weight={-1:3,
                                                                              0:1.5,
                                                                              1:1.5}))
            ]
        )

gs_lr_tfidf = GridSearchCV(estimator=lr_tfidf,
        param_grid=param_grid,
        scoring=['accuracy', 'f1_micro'],
        cv=5,
        verbose=2,
        refit='f1_micro',
        n_jobs=-1)

In [24]:
gs_lr_tfidf.fit(X_train, y_train)

best_parameters = gs_lr_tfidf.best_params_
best_estimator = gs_lr_tfidf.best_estimator_
result = gs_lr_tfidf.cv_results_

Fitting 5 folds for each of 32 candidates, totalling 160 fits
[CV] clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=0.1, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=0.1, cl



[CV]  clf__C=0.1, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.0s
[CV]  clf__C=0.1, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.0s
[CV] clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.1s
[CV]  clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.4s
[CV] clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]



[CV]  clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   6.7s
[CV] clf__C=0.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   6.8s
[CV] clf__C=0.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.8s
[CV]  clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, t

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:   31.2s


[CV]  clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.6s
[CV] clf__C=0.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=0.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=0.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.5s
[CV]  clf__C=0.5, clf__lo



[CV] clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=0.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   9.2s
[CV] clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=0.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   9.2s
[CV] clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=0.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.2s
[CV]  clf__C=0.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.4s
[CV] clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=0.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.6s
[CV]  clf__C=0.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.2s
[CV] clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[



[CV]  clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.9s




[CV] clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.8s




[CV] clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.4s
[CV] clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   6.6s
[CV]  clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   6.9s
[CV] clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.0s
[CV] clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>



[CV] clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.8s




[CV]  clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.6s
[CV]  clf__C=1.0, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.8s
[CV] clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV] clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=1.0, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.8s
[CV]  clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.1s
[CV] clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   6.4s
[CV] clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   6.5s
[CV] clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.3s
[CV] clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=1.0, clf__loss



[CV]  clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   9.3s
[CV]  clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.1s
[CV] clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.6s
[CV] clf__C=1.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, t



[CV] clf__C=1.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV] clf__C=1.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 




[CV]  clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.3s
[CV] clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.2s
[CV] clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.3s
[CV]  clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.5s
[CV] c



[CV] clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=1.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.0s
[CV] clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__C=1.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.1s




[CV]  clf__C=1.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.3s
[CV] clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.9s
[CV] clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.1s
[CV] clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.5s




[CV] clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   7.7s
[CV]  clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   6.7s
[CV] clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__C=0.1, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=   8.3s
[CV] clf__C=0.1, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__no



[CV]  clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.2s
[CV]  clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.6s
[CV] clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV] clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.3s




[CV] clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.3s




[CV] clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=0.1, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   7.0s
[CV] clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=0.1, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.6s
[CV] clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=0.1, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=F



[CV] clf__C=0.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.5s
[CV] clf__C=0.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.8s




[CV]  clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.5s
[CV] clf__C=0.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__C=0.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.1s




[CV] clf__C=0.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.2s
[CV]  clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.4s
[CV] clf__C=0.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.6s
[CV]  clf__C=0.1, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of



[CV] clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=0.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.7s
[CV] clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=0.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   9.5s
[CV]  clf__C=0.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.9s
[CV] clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vec



[CV] clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=0.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.1s
[CV] clf__C=1.0, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=0.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.2s
[CV] clf__C=1.0, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, tota



[CV]  clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.5s
[CV]  clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.9s
[CV] clf__C=1.0, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.6s
[CV]  clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 's



[CV] clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.7s
[CV]  clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.8s
[CV] clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=0.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.5s
[CV] clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 's



[CV]  clf__C=1.0, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   7.6s
[CV] clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=1.0, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   7.9s
[CV] clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=1.0, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False



[CV]  clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.4s
[CV] clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.9s




[CV] clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.7s
[CV] clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   7.0s
[CV]  clf__C=1.0, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=



[CV] clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.4s
[CV]  clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.6s




[CV] clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   7.6s
[CV] clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.0s
[CV]  clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   7.1s
[CV] clf__C=1.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objec



[CV]  clf__C=1.0, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.8s
[CV] clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   9.1s
[CV]  clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.8s
[CV] clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV] clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.1s
[CV] clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   7.9s
[CV] clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 




[CV]  clf__C=1.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.3s
[CV] clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__C=1.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   7.0s
[CV]  clf__C=1.5, clf__loss=hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.4s
[CV]  clf__C=1.5, clf__loss=hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__us



[CV]  clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.1s




[CV]  clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.5s
[CV]  clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   6.7s
[CV]  clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.2s
[CV]  clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=   8.1s
[CV]  clf__C=1.5, clf__loss=squared_hinge, clf__shuffle=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenize

[Parallel(n_jobs=-1)]: Done 160 out of 160 | elapsed:  3.4min finished


In [25]:
# PERFORMANCE MEASURE
#####################
# Stratified k-fold CV
from sklearn.model_selection import StratifiedKFold
from sklearn.base import clone

skfolds = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

for train_index, test_index in skfolds.split(X_train, y_train):
    clone_estimator = clone(best_estimator)
    X_train_folds = X_train[train_index]
    y_train_folds = (y_train[train_index])
    X_test_fold = X_train[test_index]
    y_test_fold = (y_train[test_index])

    clone_estimator.fit(X_train_folds, y_train_folds)
    y_pred = clone_estimator.predict(X_test_fold)
    n_correct = sum(y_pred == y_test_fold)
    print(n_correct / len(y_pred))



0.9816400708301836




0.9821369397730096




0.9817434346781543




0.9814738106579955




0.9811527866951101


In [26]:
y_train_pred = cross_val_predict(best_estimator, X_train, y_train, cv=5,
        verbose=2, n_jobs=-1)

conf_mx = confusion_matrix(y_train, y_train_pred)

[Parallel(n_jobs=-1)]: Done   3 out of   5 | elapsed:    5.5s remaining:    3.6s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    5.8s finished


In [27]:
conf_mx

array([[  7513,   2297,    243],
       [  1374, 103494,   3365],
       [   273,   1111, 363167]])

In [28]:
(precision_score(y_train, y_train_pred, average='micro'),
        recall_score(y_train, y_train_pred, average='micro'))

(0.9820581272769071, 0.9820581272769071)

In [29]:
f1_score(y_train, y_train_pred, average='micro')

0.9820581272769071

In [30]:
# TEST MODEL ON TEST DATA
#########################
y_test_pred = cross_val_predict(best_estimator, X_test, y_test, cv=5,
        verbose=2, n_jobs=-1)

conf_mx = confusion_matrix(y_test, y_test_pred)

[Parallel(n_jobs=-1)]: Done   3 out of   5 | elapsed:    1.6s remaining:    1.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    1.6s finished


In [31]:
conf_mx

array([[ 1590,   823,   100],
       [  313, 25773,   973],
       [   89,   399, 90650]])

In [32]:
(precision_score(y_test, y_test_pred, average='micro'),
        recall_score(y_test, y_test_pred, average='micro'))

(0.9776571949299975, 0.9776571949299975)

In [33]:
f1_score(y_test, y_test_pred, average='micro')

0.9776571949299975

In [34]:
joblib.dump(best_estimator, './training/sentiment_passiveagressive_classweight.pkl')

['./training/sentiment_passiveagressive_classweight.pkl']

# 6 - Perceptron

In [38]:
from sklearn.linear_model import Perceptron

In [39]:
tfidf = TfidfVectorizer(strip_accents=None, lowercase=False, preprocessor=None)
param_grid = [
        {'vect__ngram_range': [(1,1)],
            'vect__stop_words': [None],
            'vect__tokenizer': [str.split],
            'clf__penalty': ['l2', 'l1', 'elasticnet'],
            'clf__alpha': [0.0001, 0.001, 0.01, 1],
            'clf__max_iter': [1000],
            'clf__warm_start': [0.0001]
            }, 
        {'vect__ngram_range': [(1,1)],
            'vect__stop_words': [None],
            'vect__tokenizer': [str.split],
            'vect__use_idf':[False],
            'vect__norm': [None],
            'clf__penalty': ['l2', 'l1', 'elasticnet'],
            'clf__alpha': [0.0001, 0.001, 0.01, 1],
            'clf__max_iter': [1000],
            'clf__warm_start': [0.0001]
            }
    ]

lr_tfidf = Pipeline(
        [   ('vect', tfidf),
            ('clf', Perceptron(random_state=42, class_weight={-1:3,
                                                             0:1.5,
                                                             1:1.5}))
            ]
        )

gs_lr_tfidf = GridSearchCV(estimator=lr_tfidf,
        param_grid=param_grid,
        scoring=['accuracy', 'f1_micro'],
        cv=5,
        verbose=2,
        refit='f1_micro',
        n_jobs=-1)

In [40]:
gs_lr_tfidf.fit(X_train, y_train)

best_parameters = gs_lr_tfidf.best_params_
best_estimator = gs_lr_tfidf.best_estimator_
result = gs_lr_tfidf.cv_results_

Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV] clf__alpha=0.0001, clf__max_iter=1000, clf__penalty=l2, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__alpha=0.0001, clf__max_iter=1000, clf__penalty=l2, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__alpha=0.0001, clf__max_iter=1000, clf__penalty=l2, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__alpha=0.0001, clf__max_iter=1000, clf__penalty=l2, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__alpha=0.0001, clf__max_iter=1000, clf__penalty=l2, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV] clf__alp

[CV] clf__alpha=0.001, clf__max_iter=1000, clf__penalty=elasticnet, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__alpha=0.001, clf__max_iter=1000, clf__penalty=l2, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 8.7min


[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed: 18.9min


[CV] clf__alpha=0.001, clf__max_iter=1000, clf__penalty=elasticnet, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__alpha=0.0001, clf__max_iter=1000, clf__penalty=elasticnet, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=10.6min
[CV] clf__alpha=0.001, clf__max_iter=1000, clf__penalty=elasticnet, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__alpha=0.0001, clf__max_iter=1000, clf__penalty=elasticnet, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=10.6min
[CV] clf__alpha=0.01, clf__max_iter=1000, clf__penalty=l2, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__alpha=

[CV]  clf__alpha=0.01, clf__max_iter=1000, clf__penalty=l1, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 9.0min
[CV] clf__alpha=1, clf__max_iter=1000, clf__penalty=l1, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__alpha=0.01, clf__max_iter=1000, clf__penalty=l1, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 8.9min
[CV] clf__alpha=1, clf__max_iter=1000, clf__penalty=l1, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects> 
[CV]  clf__alpha=0.01, clf__max_iter=1000, clf__penalty=elasticnet, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total= 9.2min
[CV] clf__alpha=1, clf__max_iter=1000,

[CV]  clf__alpha=0.0001, clf__max_iter=1000, clf__penalty=l1, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 7.2min
[CV] clf__alpha=0.0001, clf__max_iter=1000, clf__penalty=elasticnet, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__alpha=0.0001, clf__max_iter=1000, clf__penalty=l1, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 7.2min
[CV] clf__alpha=0.0001, clf__max_iter=1000, clf__penalty=elasticnet, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__alpha=0.0001, clf__max_iter=1000, clf__penalty=elas

[CV]  clf__alpha=0.001, clf__max_iter=1000, clf__penalty=elasticnet, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 7.1min
[CV] clf__alpha=0.001, clf__max_iter=1000, clf__penalty=elasticnet, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__alpha=0.001, clf__max_iter=1000, clf__penalty=elasticnet, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 7.1min
[CV] clf__alpha=0.001, clf__max_iter=1000, clf__penalty=elasticnet, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__alpha=0.001, clf__max_iter=1000, clf__p

[CV]  clf__alpha=0.01, clf__max_iter=1000, clf__penalty=elasticnet, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total= 6.8min
[CV] clf__alpha=1, clf__max_iter=1000, clf__penalty=l2, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__alpha=1, clf__max_iter=1000, clf__penalty=l2, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, total=237.8min
[CV] clf__alpha=1, clf__max_iter=1000, clf__penalty=l2, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False 
[CV]  clf__alpha=1, clf__max_iter=1000, clf__penalty=l2, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__stop_words=

[CV]  clf__alpha=1, clf__max_iter=1000, clf__penalty=elasticnet, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=233.4min
[CV]  clf__alpha=1, clf__max_iter=1000, clf__penalty=elasticnet, clf__warm_start=0.0001, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<method 'split' of 'str' objects>, vect__use_idf=False, total=207.2min


[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed: 559.1min finished


In [41]:
best_parameters

{'clf__alpha': 0.0001,
 'clf__max_iter': 1000,
 'clf__penalty': 'l1',
 'clf__warm_start': 0.0001,
 'vect__ngram_range': (1, 1),
 'vect__norm': None,
 'vect__stop_words': None,
 'vect__tokenizer': <method 'split' of 'str' objects>,
 'vect__use_idf': False}

In [42]:
# PERFORMANCE MEASURE
#####################
# Stratified k-fold CV
from sklearn.model_selection import StratifiedKFold
from sklearn.base import clone

skfolds = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

for train_index, test_index in skfolds.split(X_train, y_train):
    clone_estimator = clone(best_estimator)
    X_train_folds = X_train[train_index]
    y_train_folds = (y_train[train_index])
    X_test_fold = X_train[test_index]
    y_test_fold = (y_train[test_index])

    clone_estimator.fit(X_train_folds, y_train_folds)
    y_pred = clone_estimator.predict(X_test_fold)
    n_correct = sum(y_pred == y_test_fold)
    print(n_correct / len(y_pred))

0.9729726931002702
0.9754680639549334
0.9202945074973076
0.9711078433403061
0.9666342190833213


In [43]:
y_train_pred = cross_val_predict(best_estimator, X_train, y_train, cv=5,
        verbose=2, n_jobs=-1)

conf_mx = confusion_matrix(y_train, y_train_pred)

[Parallel(n_jobs=-1)]: Done   3 out of   5 | elapsed:  6.3min remaining:  4.2min
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:  6.3min finished


In [44]:
conf_mx

array([[  6817,   2675,    561],
       [  2414, 101758,   4061],
       [  2273,   2174, 360104]])

In [45]:
(precision_score(y_train, y_train_pred, average='micro'),
        recall_score(y_train, y_train_pred, average='micro'))

(0.9706774750070934, 0.9706774750070934)

In [46]:
f1_score(y_train, y_train_pred, average='micro')

0.9706774750070934

In [47]:
y_test_pred = cross_val_predict(best_estimator, X_test, y_test, cv=5,
        verbose=2, n_jobs=-1)

conf_mx = confusion_matrix(y_test, y_test_pred)

[Parallel(n_jobs=-1)]: Done   3 out of   5 | elapsed:  1.1min remaining:   44.6s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:  1.1min finished


In [48]:
conf_mx

array([[ 1752,   555,   206],
       [  573, 24891,  1595],
       [  173,  2798, 88167]])

In [49]:
(precision_score(y_test, y_test_pred, average='micro'),
        recall_score(y_test, y_test_pred, average='micro'))

(0.9511225250600613, 0.9511225250600613)

In [50]:
f1_score(y_test, y_test_pred, average='micro')

0.9511225250600613

In [51]:
joblib.dump(best_estimator, './training/sentiment_perceptron_classweight.pkl')

['./training/sentiment_perceptron_classweight.pkl']