In [1]:
import pickle
df = pickle.load(open('Clayton_SG_df_first.pkl', 'br'))
df['long_post'] = df['long_post'].apply(', '.join)
df['long_post'] = df['long_post'].str.replace(',', '')
df.sample(5)

Unnamed: 0,group,long_post,post_number
32394,Non Hodgkins Lymphoma,happens pot covers symptoms well dont even kno...,383011
12485,Smoking,sooo close cigarette brnno one cares brn refer...,151573
14120,Psoriasis,doctors want mstart using biologics im sure an...,103717
25327,Non Hodgkins Lymphoma,short note let everyone know lump abdomen hern...,60104
27650,Bipolar,told im addict guess true dont change things l...,397203


In [2]:
from sklearn.model_selection import train_test_split
X = df.long_post
y = df.group
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(27204,)
(6801,)
(27204,)
(6801,)


In [4]:
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(X_train)
X_train_counts.shape

(27204, 45472)

In [5]:
# TF-IDF: term-frequency times inverse document frequency
from sklearn.feature_extraction.text import TfidfTransformer
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
X_train_tfidf.shape

(27204, 45472)

In [6]:
# Classifier using Naive Bayes
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(X_train_tfidf, y_train)

In [7]:
from sklearn.pipeline import Pipeline
text_clf = Pipeline([('vect', CountVectorizer()),
                      ('tfidf', TfidfTransformer()),
                      ('clf', MultinomialNB()),
])

text_clf = text_clf.fit(X_train, y_train)

In [8]:
import numpy as np
predicted = text_clf.predict(X_test)
np.mean(predicted == y_test)

0.06557859138362006

In [9]:
# support vector machine classifier
from sklearn.linear_model import SGDClassifier

text_clf_svm = Pipeline([('vect', CountVectorizer()),
                      ('tfidf', TfidfTransformer()),
                      ('clf-svm', SGDClassifier(loss='hinge', penalty='l2',
                                            alpha=1e-3, max_iter=5, tol=None, random_state=42)),
])

_ = text_clf_svm.fit(X_train, y_train)

predicted_svm = text_clf_svm.predict(X_test)
np.mean(predicted_svm == y_test)

0.5544772827525364

In [10]:
# Tuning hyperparameters
from sklearn.model_selection import GridSearchCV
parameters = {'vect__ngram_range': [(1, 1), (1, 2)],
               'tfidf__use_idf': (True, False),
               'clf__alpha': (1e-2, 1e-3),
}

In [11]:
# Grid search with Naive Bayes
gs_clf = GridSearchCV(text_clf, parameters, n_jobs=-1)

In [12]:
%%time
gs_clf = gs_clf.fit(X_train, y_train)
print(gs_clf.best_score_)
gs_clf.best_params_

0.4042420232318777
CPU times: user 13.9 s, sys: 2.82 s, total: 16.7 s
Wall time: 43.8 s


In [13]:
# Grid search with SVM classifier
parameters_svm = {'vect__ngram_range': [(1, 1), (1, 2)],
               'tfidf__use_idf': (True, False),
               'clf-svm__alpha': (1e-2, 1e-3),
}
gs_clf_svm = GridSearchCV(text_clf_svm, parameters_svm, n_jobs=-1)

In [14]:
%%time
gs_clf_svm = gs_clf_svm.fit(X_train, y_train)
print(gs_clf_svm.best_score_)
gs_clf_svm.best_params_

0.5244081752683429
CPU times: user 2min 14s, sys: 13.2 s, total: 2min 27s
Wall time: 2min 47s


In [15]:
import nltk
nltk.download('stopwords')

from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english", ignore_stopwords=True)

class StemmedCountVectorizer(CountVectorizer):
    def build_analyzer(self):
        analyzer = super(StemmedCountVectorizer, self).build_analyzer()
        return lambda doc: ([stemmer.stem(w) for w in analyzer(doc)])

stemmed_count_vect = StemmedCountVectorizer(stop_words='english')

text_mnb_stemmed = Pipeline([('vect', stemmed_count_vect),
                      ('tfidf', TfidfTransformer()),
                      ('mnb', MultinomialNB(fit_prior=False)),
])

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/ec2-user/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [16]:
%%time
text_mnb_stemmed = text_mnb_stemmed.fit(X_train, y_train)

CPU times: user 16.2 s, sys: 210 ms, total: 16.4 s
Wall time: 16.4 s


In [17]:
predicted_mnb_stemmed = text_mnb_stemmed.predict(X_test)
np.mean(predicted_mnb_stemmed == y_test)

0.12924569916188797

In [18]:
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline

  from numpy.core.umath_tests import inner1d


In [19]:
vectorizers = [
    TfidfVectorizer(stop_words='english',
                    max_features=None),
    CountVectorizer(stop_words='english',
                   max_features=None)
]

classifiers = [
    MultinomialNB(),
    LinearSVC(),
    LogisticRegression(),
    RandomForestClassifier()
]

clf_names = [
         "Naive Bayes",
         "Linear SVC",
         "Logistic Regression",
         "Random Forest",
        ]

vect_names = [
    "TfidfVectorizer",
    "CountVectorizer"
]

clf_params = [
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__alpha': (1e-2, 1e-3)},
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__C': (np.logspace(-5, 1, 5))},
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__C': (np.logspace(-5, 1, 5))},
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__max_depth': (1, 2)},
             ]

In [20]:
%%time
models = []
for classifier, clf_name, params in zip(classifiers, 
                                        clf_names, 
                                        clf_params):
    for vectorizer, vect_name in zip(vectorizers, 
                                     vect_names):
        pipe = Pipeline([
            ('vect', vectorizer),
            ('clf', classifier),
        ])
        gs = GridSearchCV(pipe, 
                          param_grid=params, 
                          n_jobs=-1,
                          scoring='accuracy',
                          cv=5,
                          verbose=10)
        
        gs.fit(df.long_post, df.group)
        score = gs.best_score_
        print(f'''
Classifier: {clf_name}
Vectorizer: {vect_name}
Score: {gs.best_score_:.4f}
Params: {gs.best_params_}
------------------------------
            ''')
        models.append((clf_name, vect_name, gs.best_score_, gs.best_params_))



Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] clf__alpha=0

[Parallel(n_jobs=-1)]: Done   4 out of  20 | elapsed:    7.3s remaining:   29.3s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.7409717186366933, total=   4.7s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.43321407274895646, total=   4.6s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.4216921025337087, total=   4.6s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.42210464432686656, total=   4.7s


[Parallel(n_jobs=-1)]: Done   7 out of  20 | elapsed:    7.9s remaining:   14.7s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.4533508541392904, total=   4.8s


[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    8.2s remaining:    8.2s


[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.41159293327493063, total=  19.5s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.3875968992248062, total=  19.3s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.7505438723712835, total=  20.6s


[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:   25.7s remaining:   13.9s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.7907179115300943, total=  20.7s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.38509408801303896, total=  23.6s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.37904174015285125, total=  23.9s


[Parallel(n_jobs=-1)]: Done  16 out of  20 | elapsed:   28.9s remaining:    7.2s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.47883124627310675, total=  17.4s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.49481676157103227, total=  17.9s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.4655504519188028, total=  21.6s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.4692827748383304, total=  21.8s


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   32.9s finished



Classifier: Naive Bayes
Vectorizer: TfidfVectorizer
Score: 0.5407
Params: {'clf__alpha': 0.001, 'vect__ngram_range': (1, 2)}
------------------------------
            
Fitting 5 folds for each of 4 candidates, totalling 20 fits




[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] c

[Parallel(n_jobs=-1)]: Done   4 out of  20 | elapsed:    7.1s remaining:   28.5s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.7112400290065265, total=   4.7s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.42940575266462255, total=   4.7s


[Parallel(n_jobs=-1)]: Done   7 out of  20 | elapsed:    7.7s remaining:   14.4s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.4009406231628454, total=   4.6s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.41308884913536076, total=   4.5s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.3965031856571344, total=   4.6s


[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    8.2s remaining:    8.2s


[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.4640727489564699, total=  18.7s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.4532627865961199, total=  19.0s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.7763596809282088, total=  20.1s


[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:   24.5s remaining:   13.2s


[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.4532523336790636, total=  22.5s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.4835742444152431, total=  23.3s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.4472369194591417, total=  16.9s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.44806638020447476, total=  16.9s


[Parallel(n_jobs=-1)]: Done  16 out of  20 | elapsed:   27.7s remaining:    6.9s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.4594514013118664, total=  17.0s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.7798404641044235, total=  24.1s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.4789020294933567, total=  21.1s


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   31.6s finished



Classifier: Naive Bayes
Vectorizer: CountVectorizer
Score: 0.5270
Params: {'clf__alpha': 0.01, 'vect__ngram_range': (1, 2)}
------------------------------
            
Fitting 5 folds for each of 10 candidates, totalling 50 fits




[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] c

[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   41.9s


[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.42248005801305294, total=  44.2s
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.3800554825521974, total=  43.2s
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.36728395061728397, total=  43.4s
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.36879537709290267, total=  44.0s
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.044960116026105876, total= 1.4min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.37298747763864043, total=  45.2s
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=1e-05, vect__ngram_range=(1

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.7min


[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.04593273077492962, total= 1.6min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.04526208205577457, total= 1.6min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.044960116026105876, total= 1.6min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.04570840681951793, total= 1.2min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.04555409548839247, total= 1.2min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.046511627906976744, total= 1.2min

[Parallel(n_jobs=-1)]: Done  25 out of  50 | elapsed:  2.0min remaining:  2.0min


[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.5758504891224996, total=  31.9s
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.7836113125453227, total=  33.6s
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.5564373897707231, total=  31.7s
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.552822640391169, total=  32.5s
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.5575432319618366, total=  36.0s
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.34677302393038434, tot

[Parallel(n_jobs=-1)]: Done  31 out of  50 | elapsed:  2.5min remaining:  1.5min


[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.2943495400788436, total= 1.6min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.2907097347755223, total= 1.6min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.2893559928443649, total= 1.6min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.2839506172839506, total= 1.7min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.601401664476566, total= 1.8min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.8269760696156635, total= 2.0min


[Parallel(n_jobs=-1)]: Done  37 out of  50 | elapsed:  3.8min remaining:  1.3min


[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.569077013521458, total= 1.9min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.5744554748851681, total= 1.9min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.5733452593917711, total= 1.8min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.5232216343327455, total= 1.9min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.8134880348078317, total= 2.0min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.5609578040589868, total= 2.0min


[Parallel(n_jobs=-1)]: Done  43 out of  50 | elapsed:  4.1min remaining:   39.9s


[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.5249666617276634, total= 1.9min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.5313059033989267, total= 1.9min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.8279912980420595, total= 7.2min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5946853555263543, total= 7.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5659905937683716, total= 6.9min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5669350029815146, total= 6.8min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.570751222403319, total= 7.0min


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed: 10.2min finished



Classifier: Linear SVC
Vectorizer: TfidfVectorizer
Score: 0.6298
Params: {'clf__C': 0.31622776601683794, 'vect__ngram_range': (1, 2)}
------------------------------
            
Fitting 5 folds for each of 10 candidates, totalling 50 fits




[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] c

[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   52.8s


[CV]  clf__C=1e-05, vect__ngram_range=(1, 1), score=0.05828861061419201, total=  50.9s
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.42741116751269037, total= 1.4min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.696881798404641, total= 1.1min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.060453400503778336, total= 1.9min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.06163886874546773, total= 1.9min
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05963029218843172, total= 1.9min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.01, vect__ngram_range=

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  2.1min


[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.5327785078113593, total= 1.2min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.057672652942035336, total= 2.0min
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.5158020274299344, total= 1.2min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.31819517930629043, total= 1.4min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.32804859979256185, total= 1.4min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.33902759526938236, total= 1.5min
[CV]

[Parallel(n_jobs=-1)]: Done  25 out of  50 | elapsed:  2.3min remaining:  2.3min


[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.8002900652646846, total= 2.8min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.8011602610587382, total= 1.9min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.5406628704920426, total= 1.9min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.5193798449612403, total= 1.9min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.5091122868900647, total= 2.0min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.5119276929915543, tot

[Parallel(n_jobs=-1)]: Done  31 out of  50 | elapsed:  4.1min remaining:  2.5min


[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.7904278462654097, total= 2.1min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.5789166301649876, total= 3.1min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.5518812463256908, total= 3.1min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.5511927692991554, total= 3.1min
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.5562015503875969, total= 3.1min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.48488830486202367, total= 2.2min


[Parallel(n_jobs=-1)]: Done  37 out of  50 | elapsed:  6.1min remaining:  2.1min


[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.45960047704233753, total= 2.2min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.4421395762335161, total= 2.2min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.43797766019988243, total= 2.3min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.5332438878950507, total= 5.2min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.531041635797896, total= 5.8min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.522633744855967, total= 6.2min


[Parallel(n_jobs=-1)]: Done  43 out of  50 | elapsed:  8.4min remaining:  1.4min


[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.8184191443074692, total= 6.5min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.5584756898817346, total= 6.6min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.7897026831036983, total= 5.2min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.4882464593371295, total= 5.5min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.4434156378600823, total= 5.2min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.4627310673822302, total= 5.1min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.45028893169358425, total= 5.5min


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed: 10.3min finished



Classifier: Linear SVC
Vectorizer: CountVectorizer
Score: 0.6084
Params: {'clf__C': 0.01, 'vect__ngram_range': (1, 2)}
------------------------------
            
Fitting 5 folds for each of 10 candidates, totalling 50 fits




[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] c

[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  6.8min


[CV] clf__C=0.01, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 1), score=0.04593273077492962, total= 6.6min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.04526208205577457, total= 9.9min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.04556143445032334, total= 9.9min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.04606440071556351, total=10.0min
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.04593273077492962, total=10.0min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.044960116026105876, total=10.2min
[CV] clf__C=0.31622776601683794, v

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed: 16.3min


[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.04818221638195357, total= 9.3min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.04889683959451401, total= 9.3min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.048747962661134986, total= 9.6min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.04556143445032334, total=14.1min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.04526208205577457, total=14.2min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.04593273077492962, total=14.1min
[CV

[Parallel(n_jobs=-1)]: Done  25 out of  50 | elapsed: 19.3min remaining: 19.3min


[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.21029731689630166, total=13.3min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.19491896627244854, total=13.6min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.1853321575543798, total=13.3min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.044960116026105876, total=23.2min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.19173210846051267, total=14.2min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.1972271914132379

[Parallel(n_jobs=-1)]: Done  31 out of  50 | elapsed: 30.8min remaining: 18.9min


[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.04526208205577457, total=22.3min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.04593273077492962, total=22.4min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.04606440071556351, total=22.5min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.04570840681951793, total=22.6min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.08293181486348372, total=28.3min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.09064539521392313, total=28.8min


[Parallel(n_jobs=-1)]: Done  37 out of  50 | elapsed: 45.8min remaining: 16.1min


[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.8001450326323423, total=27.9min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.0805408583186361, total=28.1min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.08164172469995555, total=28.3min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.08348240906380441, total=28.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.5625638779383851, total=25.8min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.5358612580834803, total=23.7min


[Parallel(n_jobs=-1)]: Done  43 out of  50 | elapsed: 51.8min remaining:  8.4min


[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.5387464809601422, total=23.6min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.541293977340489, total=22.1min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.8178390137781001, total=32.2min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5691341801722879, total=32.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5433397540376352, total=29.7min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5393885949441505, total=30.1min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5496422182468694, total=29.9min


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed: 63.6min finished



Classifier: Logistic Regression
Vectorizer: TfidfVectorizer
Score: 0.6046
Params: {'clf__C': 10.0, 'vect__ngram_range': (1, 2)}
------------------------------
            
Fitting 5 folds for each of 10 candidates, totalling 50 fits




[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] c

[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed: 12.9min


[CV] clf__C=0.01, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 1), score=0.06161483428237699, total=12.7min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.053203997648442095, total=18.8min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.053785746036449845, total=18.9min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05577456563001898, total=19.0min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05754323196183661, total=19.0min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05786802030456853, total=19.3min
[CV] clf__C=0.31622776601683794, 

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed: 33.0min


[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.2457293035479632, total=20.6min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.24299896280930508, total=20.4min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.24806201550387597, total=20.5min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.06775426219870664, total=29.2min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.06857483601669648, total=29.4min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.06599503577164549, total=29.4min
[CV] 

[Parallel(n_jobs=-1)]: Done  25 out of  50 | elapsed: 38.6min remaining: 38.6min


[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.36185641769398114, total=49.4min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.6841189267585207, total=45.0min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.270870076425632, total=47.7min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.27678174544376943, total=49.0min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.2826690027741276, total=49.8min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.281902206320811, total=49.7min


[Parallel(n_jobs=-1)]: Done  31 out of  50 | elapsed: 69.8min remaining: 42.8min


[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.49467075485472334, total=44.7min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.4691358024691358, total=43.7min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.4713290857904875, total=43.4min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.47883124627310675, total=44.2min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.7984046410442349, total=91.1min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.5329245145276683, total=99.3min


[Parallel(n_jobs=-1)]: Done  37 out of  50 | elapsed: 133.6min remaining: 46.9min


[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.5014697236919459, total=97.4min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.7696881798404641, total=104.1min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.5012594458438288, total=99.6min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.5120751341681574, total=100.0min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.5298583734851803, total=80.7min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.48927101704879483, total=79.1min


[Parallel(n_jobs=-1)]: Done  43 out of  50 | elapsed: 144.4min remaining: 23.5min


[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.4907393687953771, total=77.5min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.5014907573047108, total=76.8min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.49382716049382713, total=104.0min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5384727697474084, total=107.9min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.4985923840568973, total=103.2min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5125223613595706, total=102.1min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.8105873821609862, total=109.3min


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed: 179.1min finished



Classifier: Logistic Regression
Vectorizer: CountVectorizer
Score: 0.5717
Params: {'clf__C': 10.0, 'vect__ngram_range': (1, 2)}
------------------------------
            
Fitting 5 folds for each of 4 candidates, totalling 20 fits




[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ......................
[CV] c

[Parallel(n_jobs=-1)]: Done   4 out of  20 | elapsed:    6.2s remaining:   24.8s


[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.06178390137781001, total=   3.2s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.07128159905937684, total=   3.2s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.0700832238282961, total=   3.2s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.06559332140727489, total=   3.2s


[Parallel(n_jobs=-1)]: Done   7 out of  20 | elapsed:    6.9s remaining:   12.8s


[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.06267595199288784, total=   3.2s


[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    7.2s remaining:    7.2s


[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.051832384289677326, total=   8.9s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.05085243974132863, total=   8.9s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.04904591532498509, total=   8.8s


[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:   13.2s remaining:    7.1s


[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.048440899202320524, total=  10.1s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.05482293673136761, total=  10.0s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.05221174764321972, total=  10.4s


[Parallel(n_jobs=-1)]: Done  16 out of  20 | elapsed:   15.7s remaining:    3.9s


[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.04997060552616108, total=   6.5s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.057193658319751074, total=   6.7s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.05232558139534884, total=   6.7s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.056212585778945834, total=   8.5s


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   17.8s finished



Classifier: Random Forest
Vectorizer: TfidfVectorizer
Score: 0.0663
Params: {'clf__max_depth': 2, 'vect__ngram_range': (1, 1)}
------------------------------
            
Fitting 5 folds for each of 4 candidates, totalling 20 fits




[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ......................
[CV] c

[Parallel(n_jobs=-1)]: Done   4 out of  20 | elapsed:    6.0s remaining:   24.2s


[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.058402686523580084, total=   3.1s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.06250906453952139, total=   3.1s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.06819517930629042, total=   3.1s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.0646021632834494, total=   3.1s


[Parallel(n_jobs=-1)]: Done   7 out of  20 | elapsed:    6.7s remaining:   12.5s


[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.058884913536076326, total=   3.1s


[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    7.1s remaining:    7.1s


[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.05110235070813257, total=   8.1s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.04614932392710171, total=   8.1s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.048598688133571856, total=   8.2s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.04902102973168963, total=   8.6s


[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:   12.1s remaining:    6.5s


[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.05008149355460068, total=   9.3s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.05395213923132705, total=   9.7s


[Parallel(n_jobs=-1)]: Done  16 out of  20 | elapsed:   14.4s remaining:    3.6s


[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.05055849500293945, total=   6.3s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.05689731812120314, total=   6.4s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.05358446488538473, total=   7.9s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.057692307692307696, total=   7.9s


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   17.1s finished



Classifier: Random Forest
Vectorizer: CountVectorizer
Score: 0.0625
Params: {'clf__max_depth': 2, 'vect__ngram_range': (1, 1)}
------------------------------
            
CPU times: user 2h 59min 19s, sys: 9min 56s, total: 3h 9min 15s
Wall time: 4h 51min 15s


In [21]:
models = sorted(models, key=lambda tup: tup[2])
print('And the winner is...')
print()
print('Classifier:', models[-1][0])
print('Vectorizer:', models[-1][1])
print('Score:', models[-1][2])
print('Params:', models[-1][3])

And the winner is...

Classifier: Linear SVC
Vectorizer: TfidfVectorizer
Score: 0.6297897368034112
Params: {'clf__C': 0.31622776601683794, 'vect__ngram_range': (1, 2)}
