In [1]:
import pickle
df = pickle.load(open('Clayton_SG_df_drop.pkl', 'br'))
df['long_post'] = df['long_post'].apply(', '.join)
df['long_post'] = df['long_post'].str.replace(',', '')
df.sample(5)

Unnamed: 0,long_post,post_number,group
21961,struggling anxiety years gets worse get older ...,60926,Anxiety
10345,ive sodas morning theres jugs tea work im hopi...,377042,Caffeine
26365,today well yesterday wed feb th pm est somewha...,68427,Video Game Addiction
28887,realise something im going get something thats...,547768,Widow
24333,looking anyone camptocormia knows someone deal...,532870,Parkinsons


In [2]:
from sklearn.model_selection import train_test_split
X = df.long_post
y = df.group
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(19543,)
(4886,)
(19543,)
(4886,)


In [4]:
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(X_train)
X_train_counts.shape

(19543, 36145)

In [5]:
# TF-IDF: term-frequency times inverse document frequency
from sklearn.feature_extraction.text import TfidfTransformer
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
X_train_tfidf.shape

(19543, 36145)

In [6]:
# Classifier using Naive Bayes
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(X_train_tfidf, y_train)

In [7]:
from sklearn.pipeline import Pipeline
text_clf = Pipeline([('vect', CountVectorizer()),
                      ('tfidf', TfidfTransformer()),
                      ('clf', MultinomialNB()),
])

text_clf = text_clf.fit(X_train, y_train)

In [8]:
import numpy as np
predicted = text_clf.predict(X_test)
np.mean(predicted == y_test)

0.36962750716332377

In [9]:
# support vector machine classifier
from sklearn.linear_model import SGDClassifier

text_clf_svm = Pipeline([('vect', CountVectorizer()),
                      ('tfidf', TfidfTransformer()),
                      ('clf-svm', SGDClassifier(loss='hinge', penalty='l2',
                                            alpha=1e-3, max_iter=5, tol=None, random_state=42)),
])

_ = text_clf_svm.fit(X_train, y_train)

predicted_svm = text_clf_svm.predict(X_test)
np.mean(predicted_svm == y_test)

0.6377404830126893

In [10]:
# Tuning hyperparameters
from sklearn.model_selection import GridSearchCV
parameters = {'vect__ngram_range': [(1, 1), (1, 2)],
               'tfidf__use_idf': (True, False),
               'clf__alpha': (1e-2, 1e-3),
}

In [11]:
# Grid search with Naive Bayes
gs_clf = GridSearchCV(text_clf, parameters, n_jobs=-1)

In [12]:
%%time
gs_clf = gs_clf.fit(X_train, y_train)
print(gs_clf.best_score_)
gs_clf.best_params_

0.47464565317504986
CPU times: user 2.67 s, sys: 308 ms, total: 2.98 s
Wall time: 22.5 s


In [13]:
# Grid search with SVM classifier
parameters_svm = {'vect__ngram_range': [(1, 1), (1, 2)],
               'tfidf__use_idf': (True, False),
               'clf-svm__alpha': (1e-2, 1e-3),
}
gs_clf_svm = GridSearchCV(text_clf_svm, parameters_svm, n_jobs=-1)

In [14]:
%%time
gs_clf_svm = gs_clf_svm.fit(X_train, y_train)
print(gs_clf_svm.best_score_)
gs_clf_svm.best_params_

0.5988333418615361
CPU times: user 1min 25s, sys: 8.96 s, total: 1min 34s
Wall time: 2min 4s


In [15]:
import nltk
nltk.download('stopwords')

from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english", ignore_stopwords=True)

class StemmedCountVectorizer(CountVectorizer):
    def build_analyzer(self):
        analyzer = super(StemmedCountVectorizer, self).build_analyzer()
        return lambda doc: ([stemmer.stem(w) for w in analyzer(doc)])

stemmed_count_vect = StemmedCountVectorizer(stop_words='english')

text_mnb_stemmed = Pipeline([('vect', stemmed_count_vect),
                      ('tfidf', TfidfTransformer()),
                      ('mnb', MultinomialNB(fit_prior=False)),
])

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/ec2-user/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [16]:
%%time
text_mnb_stemmed = text_mnb_stemmed.fit(X_train, y_train)

CPU times: user 11.4 s, sys: 146 ms, total: 11.5 s
Wall time: 11.5 s


In [17]:
predicted_mnb_stemmed = text_mnb_stemmed.predict(X_test)
np.mean(predicted_mnb_stemmed == y_test)

0.4950880065493246

In [18]:
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline

  from numpy.core.umath_tests import inner1d


In [19]:
vectorizers = [
    TfidfVectorizer(stop_words='english',
                    max_features=None),
    CountVectorizer(stop_words='english',
                   max_features=None)
]

classifiers = [
    MultinomialNB(),
    LinearSVC(),
    LogisticRegression(),
    RandomForestClassifier()
]

clf_names = [
         "Naive Bayes",
         "Linear SVC",
         "Logistic Regression",
         "Random Forest",
        ]

vect_names = [
    "TfidfVectorizer",
    "CountVectorizer"
]

clf_params = [
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__alpha': (1e-2, 1e-3)},
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__C': (np.logspace(-5, 1, 5))},
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__C': (np.logspace(-5, 1, 5))},
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__max_depth': (1, 2)},
             ]

In [20]:
%%time
models = []
for classifier, clf_name, params in zip(classifiers, 
                                        clf_names, 
                                        clf_params):
    for vectorizer, vect_name in zip(vectorizers, 
                                     vect_names):
        pipe = Pipeline([
            ('vect', vectorizer),
            ('clf', classifier),
        ])
        gs = GridSearchCV(pipe, 
                          param_grid=params, 
                          n_jobs=-1,
                          scoring='accuracy',
                          cv=5,
                          verbose=10)
        
        gs.fit(df.long_post, df.group)
        score = gs.best_score_
        print(f'''
Classifier: {clf_name}
Vectorizer: {vect_name}
Score: {gs.best_score_:.4f}
Params: {gs.best_params_}
------------------------------
            ''')
        models.append((clf_name, vect_name, gs.best_score_, gs.best_params_))

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] clf__alpha=0

[Parallel(n_jobs=-1)]: Done   4 out of  20 | elapsed:    5.2s remaining:   20.6s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.5169031719532554, total=   3.3s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.7862595419847328, total=   3.4s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.52768201176232, total=   3.3s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.5090871540685667, total=   3.3s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.5016373311502251, total=   3.4s


[Parallel(n_jobs=-1)]: Done   7 out of  20 | elapsed:    5.8s remaining:   10.8s
[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    5.9s remaining:    5.9s


[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.5481646724802272, total=  13.9s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.5346410684474123, total=  13.9s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.5247646336471551, total=  14.0s


[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:   17.4s remaining:    9.3s


[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.8071514664523906, total=  15.0s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.5247831474597274, total=  16.4s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.529946303180504, total=  12.4s


[Parallel(n_jobs=-1)]: Done  16 out of  20 | elapsed:   20.4s remaining:    5.1s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.5477590752382884, total=  12.7s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.8133788670148654, total=  17.7s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.5419449081803005, total=  15.1s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.5325419566107246, total=  15.4s


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   23.3s finished



Classifier: Naive Bayes
Vectorizer: TfidfVectorizer
Score: 0.5942
Params: {'clf__alpha': 0.001, 'vect__ngram_range': (1, 2)}
------------------------------
            
Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ............

[Parallel(n_jobs=-1)]: Done   4 out of  20 | elapsed:    5.1s remaining:   20.2s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.7444756930494174, total=   3.3s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.4629553827261564, total=   3.3s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.4838775096329345, total=   3.3s


[Parallel(n_jobs=-1)]: Done   7 out of  20 | elapsed:    5.5s remaining:   10.2s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.4603469640644362, total=   3.3s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.48080133555926546, total=   3.3s


[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    5.8s remaining:    5.8s


[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.5270736158994119, total=  13.5s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.5059892606361008, total=  13.5s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.5083913221449038, total=  13.7s


[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:   16.9s remaining:    9.1s


[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.5210767946577629, total=  16.4s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.5020652622883106, total=  12.0s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.5051166598444535, total=  12.1s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.5194073455759599, total=  12.2s


[Parallel(n_jobs=-1)]: Done  16 out of  20 | elapsed:   19.9s remaining:    5.0s


[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.7991161108879068, total=  17.6s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.7969063881076738, total=  17.6s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.5201784627864531, total=  15.1s


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   22.5s finished



Classifier: Naive Bayes
Vectorizer: CountVectorizer
Score: 0.5734
Params: {'clf__alpha': 0.01, 'vect__ngram_range': (1, 2)}
------------------------------
            
Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1)

[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   25.2s


[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.6502611490558458, total=  27.7s
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.5390912812116251, total=  27.9s
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.5421313506815365, total=  28.0s
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.5453254917866559, total=  29.8s
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.5482053422370617, total=  29.3s
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.12253917235837686, total=  53.1s
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2),

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.0min


[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.008838891120932101, total=  57.6s
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.008880627839735646, total=  58.0s
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.00880065493246009, total=  58.1s
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.8304539975893933, total=  21.6s
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.11586121437422553, total=  53.0s
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.6499695802068546, total=  23.1s
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  c

[Parallel(n_jobs=-1)]: Done  25 out of  50 | elapsed:  1.4min remaining:  1.4min


[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.11600081119448388, total=  56.3s
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.11297584936553418, total=  56.6s
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.6408597662771286, total=  22.0s
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.6363073110285006, total=  22.7s
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.6391731477691364, total=  23.7s
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.7886701486540779, total= 1.1min


[Parallel(n_jobs=-1)]: Done  31 out of  50 | elapsed:  1.6min remaining:   59.2s


[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.5870200333889817, total= 1.1min
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.5952139525451227, total= 1.1min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.5845272206303725, total= 1.1min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.5817843866171004, total= 1.1min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.6560535388359359, total= 1.3min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.8487344314985938, total= 1.3min


[Parallel(n_jobs=-1)]: Done  37 out of  50 | elapsed:  2.4min remaining:   51.5s


[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.6461318051575932, total= 1.2min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.8408999598232222, total= 1.2min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.6103923205342237, total= 1.2min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.6482858323007021, total= 1.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.6107245190339746, total= 1.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.6119372160264354, total= 1.3min


[Parallel(n_jobs=-1)]: Done  43 out of  50 | elapsed:  2.7min remaining:   26.7s


[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.62867572500507, total= 1.3min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.6435726210350584, total= 1.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.6611235043601703, total= 4.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.8521494576134994, total= 4.6min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.6471551371264839, total= 4.1min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.6446160267111853, total= 4.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.6433292028087567, total= 4.3min


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:  6.5min finished



Classifier: Linear SVC
Vectorizer: TfidfVectorizer
Score: 0.6905
Params: {'clf__C': 10.0, 'vect__ngram_range': (1, 2)}
------------------------------
            
Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) ....

[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   35.4s


[CV]  clf__C=1e-05, vect__ngram_range=(1, 1), score=0.21861691340498884, total=  34.0s
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.5785456006428285, total=  55.6s
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.7717959019686621, total=  34.8s
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.615291016021091, total=  36.7s
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.5954151177199505, total=  36.1s
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.6058013355592654, total=  40.2s
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), 

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.4min


[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.23068979760429575, total= 1.3min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.24802271344554858, total= 1.4min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.28364805142627564, total= 1.4min
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.45367278797996663, total=  57.2s
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.44816191656340354, total=  59.3s
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.450821334414926, total= 1.0min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV

[Parallel(n_jobs=-1)]: Done  25 out of  50 | elapsed:  1.5min remaining:  1.5min


[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.8320610687022901, total= 1.6min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.8284451586982724, total=  58.5s
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.6015007097951733, total= 1.1min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.5820712239050347, total= 1.1min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.5879801734820322, total= 1.1min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.5895242070116862, total= 1.2min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) .................

[Parallel(n_jobs=-1)]: Done  31 out of  50 | elapsed:  2.6min remaining:  1.6min


[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.6424660312309877, total= 1.7min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.6307818256242325, total= 1.8min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.8071514664523906, total= 1.4min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.6228831061544816, total= 1.7min
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.6304257095158597, total= 1.7min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.5110601001669449, total= 1.0min


[Parallel(n_jobs=-1)]: Done  37 out of  50 | elapsed:  3.6min remaining:  1.3min


[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.4877200163733115, total= 1.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.5359967552220645, total= 1.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.4995869475423379, total= 1.3min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.8398955403776617, total= 3.0min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.5951585976627712, total= 3.1min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.6156966132630298, total= 3.3min


[Parallel(n_jobs=-1)]: Done  43 out of  50 | elapsed:  4.8min remaining:   47.1s


[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.5962412226352747, total= 3.4min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.5896438804748261, total= 3.5min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.8119726797910808, total= 2.9min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5382275400527277, total= 3.0min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.4969300040933279, total= 3.0min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5192069392812887, total= 3.1min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5214941569282137, total= 3.3min


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:  6.3min finished



Classifier: Linear SVC
Vectorizer: CountVectorizer
Score: 0.6725
Params: {'clf__C': 0.01, 'vect__ngram_range': (1, 2)}
------------------------------
            
Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) ....

[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  4.4min


[CV] clf__C=0.01, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 1), score=0.020875972165370446, total= 4.4min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.008923139322652606, total= 7.1min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.008838891120932101, total= 7.3min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.00880065493246009, total= 7.3min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.008880627839735646, total= 7.3min
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.008973288814691152, total= 7.3min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV] clf__C=0.3162277660168379

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed: 10.4min


[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.11338518215309046, total= 6.2min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.11482858323007022, total= 6.3min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.11185308848080133, total= 6.4min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.009937132427499494, total= 8.1min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.00929368029739777, total= 8.0min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.009414654113794515, total= 8.2min
[C

[Parallel(n_jobs=-1)]: Done  25 out of  50 | elapsed: 11.9min remaining: 11.9min


[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.5102450783447168, total= 9.2min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.45183532751977284, total= 9.2min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.44474007367990176, total= 9.6min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.44774886410574144, total= 9.2min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.4578464106844741, total= 9.1min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.035154680594616315

[Parallel(n_jobs=-1)]: Done  31 out of  50 | elapsed: 20.7min remaining: 12.7min


[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.03690934901642669, total=14.8min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.03526711185308848, total=15.0min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.03510945890128046, total=15.2min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.03192795742939009, total=15.4min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.8378867014865408, total=17.6min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.39119853984992903, total=19.2min


[Parallel(n_jobs=-1)]: Done  37 out of  50 | elapsed: 30.6min remaining: 10.8min


[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.48413017276014464, total=19.7min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.39116067740603055, total=19.5min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.3860008186655751, total=19.7min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.39398998330550916, total=19.6min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.641046440884202, total=15.9min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.6281211625051166, total=15.2min


[Parallel(n_jobs=-1)]: Done  43 out of  50 | elapsed: 33.7min remaining:  5.5min


[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.6323038397328882, total=14.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.6266005782734407, total=14.6min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.8465247087183608, total=20.7min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.6465220036503752, total=20.2min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.6332378223495702, total=18.6min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.6350681536555143, total=18.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.6331385642737897, total=18.1min


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed: 41.4min finished



Classifier: Logistic Regression
Vectorizer: TfidfVectorizer
Score: 0.6797
Params: {'clf__C': 10.0, 'vect__ngram_range': (1, 2)}
------------------------------
            
Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1

[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  8.3min


[CV] clf__C=0.01, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 1), score=0.24212034383954154, total= 8.1min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.1959515859766277, total=10.9min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.1825691862866584, total=11.0min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.18460908718788374, total=11.4min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.19164469681606164, total=11.9min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.2189634391321816, total=12.7min
[CV] clf__C=0.31622776601683794, vect_

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed: 20.6min


[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.3344716753716352, total=20.3min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.3868194842406877, total=12.8min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.39425857083849647, total=12.8min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.27783411072804703, total=19.7min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.26913630781825626, total=19.8min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.2852671118530885, tot

[Parallel(n_jobs=-1)]: Done  25 out of  50 | elapsed: 26.1min remaining: 26.1min


[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.7736038569706709, total=26.1min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.5707111289674568, total=31.6min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.4314367580843226, total=31.0min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.44473737578584466, total=31.2min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.44114002478314746, total=31.1min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.44887312186978295, total=31.8min


[Parallel(n_jobs=-1)]: Done  31 out of  50 | elapsed: 44.6min remaining: 27.3min


[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.5895355911579802, total=27.0min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.5671305771592304, total=26.6min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.570425444031392, total=26.4min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.5851419031719532, total=26.6min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.8246283648051427, total=48.6min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.6079902656661935, total=60.3min


[Parallel(n_jobs=-1)]: Done  37 out of  50 | elapsed: 81.9min remaining: 28.8min


[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.8226195259140217, total=62.9min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.5804096532143581, total=45.4min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.5497339336880884, total=45.0min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.5989148580968281, total=60.2min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.582480556692591, total=60.7min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.5625774473358116, total=43.5min


[Parallel(n_jobs=-1)]: Done  43 out of  50 | elapsed: 86.4min remaining: 14.1min


[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.5970673275505989, total=60.9min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.5655258764607679, total=44.2min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5779760697627256, total=61.6min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5646911519198664, total=60.2min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5474826033565289, total=62.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5615448161916563, total=60.4min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.8300522298111691, total=64.3min


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed: 107.6min finished



Classifier: Logistic Regression
Vectorizer: CountVectorizer
Score: 0.6427
Params: {'clf__C': 0.31622776601683794, 'vect__ngram_range': (1, 2)}
------------------------------
            
Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=2, vect__ngram_range=(

[Parallel(n_jobs=-1)]: Done   4 out of  20 | elapsed:    4.4s remaining:   17.5s


[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.045600642828445156, total=   2.2s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.04318460908718788, total=   2.3s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.04357703428335399, total=   2.3s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.05069965524234435, total=   2.2s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.03526711185308848, total=   2.2s


[Parallel(n_jobs=-1)]: Done   7 out of  20 | elapsed:    5.0s remaining:    9.2s
[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    5.1s remaining:    5.1s


[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.01569599339116068, total=   6.0s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.015468059461631177, total=   6.3s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.025173966434711422, total=   6.5s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.020685459338876496, total=   6.8s


[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:   10.2s remaining:    5.5s


[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.015651085141903172, total=   6.8s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.03455202892728003, total=   7.0s


[Parallel(n_jobs=-1)]: Done  16 out of  20 | elapsed:   10.8s remaining:    2.7s


[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.0332589738389779, total=   4.4s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.02540272614622057, total=   4.4s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.036021285304952924, total=   4.5s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.023580968280467445, total=   4.5s


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   11.4s finished



Classifier: Random Forest
Vectorizer: TfidfVectorizer
Score: 0.0437
Params: {'clf__max_depth': 2, 'vect__ngram_range': (1, 1)}
------------------------------
            
Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ..........

[Parallel(n_jobs=-1)]: Done   4 out of  20 | elapsed:    4.2s remaining:   17.0s


[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.03816793893129771, total=   2.2s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.04400327466230045, total=   2.2s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.03672787979966611, total=   2.2s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.050294058000405595, total=   2.2s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.0563816604708798, total=   2.2s


[Parallel(n_jobs=-1)]: Done   7 out of  20 | elapsed:    4.9s remaining:    9.0s
[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:    5.0s remaining:    5.0s


[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.019265868992090852, total=   5.7s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.018174308137133416, total=   5.6s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.01903397462136717, total=   5.7s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.019284853354760947, total=   5.9s


[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    8.3s remaining:    4.5s


[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.01669449081803005, total=   6.4s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.02249899558055444, total=   6.8s


[Parallel(n_jobs=-1)]: Done  16 out of  20 | elapsed:   10.1s remaining:    2.5s


[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.030825390387345365, total=   4.4s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.03151862464183381, total=   4.4s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.028500619578686492, total=   5.4s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.02003338898163606, total=   5.4s


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   11.9s finished



Classifier: Random Forest
Vectorizer: CountVectorizer
Score: 0.0451
Params: {'clf__max_depth': 2, 'vect__ngram_range': (1, 1)}
------------------------------
            
CPU times: user 59min 8s, sys: 3min 8s, total: 1h 2min 16s
Wall time: 2h 55min 2s


In [21]:
models = sorted(models, key=lambda tup: tup[2])
print('And the winner is...')
print()
print('Classifier:', models[-1][0])
print('Vectorizer:', models[-1][1])
print('Score:', models[-1][2])
print('Params:', models[-1][3])

And the winner is...

Classifier: Linear SVC
Vectorizer: TfidfVectorizer
Score: 0.6904908101027467
Params: {'clf__C': 10.0, 'vect__ngram_range': (1, 2)}
