In [1]:
import pickle
df = pickle.load(open('Clayton_SG_df_all.pkl', 'br'))
df['long_post'] = df['long_post'].apply(', '.join)
df['long_post'] = df['long_post'].str.replace(',', '')
df.sample(5)

Unnamed: 0,post_number,long_post,group
5491,39356,husband last chemo treatment week finished che...,Hodgkins Lymphoma
16436,161085,dear husband,Incest Survivors
18733,159440,mothers day todayi hope dealing day better fee...,Infertility
5885,410406,become suddenly overwhelmed lonliness tonight ...,Panic Attack
4581,54470,well hello everyone hoping someone help husban...,Oxycodone


In [2]:
from sklearn.model_selection import train_test_split
X = df.long_post
y = df.group
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(40294,)
(10074,)
(40294,)
(10074,)


In [4]:
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(X_train)
X_train_counts.shape

(40294, 46697)

In [5]:
# TF-IDF: term-frequency times inverse document frequency
from sklearn.feature_extraction.text import TfidfTransformer
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
X_train_tfidf.shape

(40294, 46697)

In [6]:
# Classifier using Naive Bayes
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(X_train_tfidf, y_train)

In [7]:
from sklearn.pipeline import Pipeline
text_clf = Pipeline([('vect', CountVectorizer()),
                      ('tfidf', TfidfTransformer()),
                      ('clf', MultinomialNB()),
])

text_clf = text_clf.fit(X_train, y_train)

In [8]:
import numpy as np
predicted = text_clf.predict(X_test)
np.mean(predicted == y_test)

0.05787174905697836

In [9]:
# support vector machine classifier
from sklearn.linear_model import SGDClassifier

text_clf_svm = Pipeline([('vect', CountVectorizer()),
                      ('tfidf', TfidfTransformer()),
                      ('clf-svm', SGDClassifier(loss='hinge', penalty='l2',
                                            alpha=1e-3, max_iter=5, tol=None, random_state=42)),
])

_ = text_clf_svm.fit(X_train, y_train)

predicted_svm = text_clf_svm.predict(X_test)
np.mean(predicted_svm == y_test)

0.3461385745483423

In [10]:
# Tuning hyperparameters
from sklearn.model_selection import GridSearchCV
parameters = {'vect__ngram_range': [(1, 1), (1, 2)],
               'tfidf__use_idf': (True, False),
               'clf__alpha': (1e-2, 1e-3),
}

In [11]:
# Grid search with Naive Bayes
gs_clf = GridSearchCV(text_clf, parameters, n_jobs=-1)

In [12]:
%%time
gs_clf = gs_clf.fit(X_train, y_train)
print(gs_clf.best_score_)
gs_clf.best_params_



0.24911897552985557
CPU times: user 16.1 s, sys: 3.42 s, total: 19.6 s
Wall time: 54.1 s


In [13]:
# Grid search with SVM classifier
parameters_svm = {'vect__ngram_range': [(1, 1), (1, 2)],
               'tfidf__use_idf': (True, False),
               'clf-svm__alpha': (1e-2, 1e-3),
}
gs_clf_svm = GridSearchCV(text_clf_svm, parameters_svm, n_jobs=-1)

In [14]:
%%time
gs_clf_svm = gs_clf_svm.fit(X_train, y_train)
print(gs_clf_svm.best_score_)
gs_clf_svm.best_params_



0.33364768948230505
CPU times: user 3min 4s, sys: 16 s, total: 3min 20s
Wall time: 3min 31s


In [15]:
import nltk
nltk.download('stopwords')

from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english", ignore_stopwords=True)

class StemmedCountVectorizer(CountVectorizer):
    def build_analyzer(self):
        analyzer = super(StemmedCountVectorizer, self).build_analyzer()
        return lambda doc: ([stemmer.stem(w) for w in analyzer(doc)])

stemmed_count_vect = StemmedCountVectorizer(stop_words='english')

text_mnb_stemmed = Pipeline([('vect', stemmed_count_vect),
                      ('tfidf', TfidfTransformer()),
                      ('mnb', MultinomialNB(fit_prior=False)),
])

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/ec2-user/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [16]:
%%time
text_mnb_stemmed = text_mnb_stemmed.fit(X_train, y_train)

CPU times: user 24.3 s, sys: 197 ms, total: 24.5 s
Wall time: 24.5 s


In [17]:
predicted_mnb_stemmed = text_mnb_stemmed.predict(X_test)
np.mean(predicted_mnb_stemmed == y_test)

0.08755211435378202

In [18]:
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline

  from numpy.core.umath_tests import inner1d


In [19]:
vectorizers = [
    TfidfVectorizer(stop_words='english',
                    max_features=None),
    CountVectorizer(stop_words='english',
                   max_features=None)
]

classifiers = [
    MultinomialNB(),
    LinearSVC(),
    LogisticRegression(),
    RandomForestClassifier()
]

clf_names = [
         "Naive Bayes",
         "Linear SVC",
         "Logistic Regression",
         "Random Forest",
        ]

vect_names = [
    "TfidfVectorizer",
    "CountVectorizer"
]

clf_params = [
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__alpha': (1e-2, 1e-3)},
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__C': (np.logspace(-5, 1, 5))},
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__C': (np.logspace(-5, 1, 5))},
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__max_depth': (1, 2)},
             ]

In [None]:
%%time
models = []
for classifier, clf_name, params in zip(classifiers, 
                                        clf_names, 
                                        clf_params):
    for vectorizer, vect_name in zip(vectorizers, 
                                     vect_names):
        pipe = Pipeline([
            ('vect', vectorizer),
            ('clf', classifier),
        ])
        gs = GridSearchCV(pipe, 
                          param_grid=params, 
                          n_jobs=-1,
                          scoring='accuracy',
                          cv=5,
                          verbose=10)
        
        gs.fit(df.long_post, df.group)
        score = gs.best_score_
        print(f'''
Classifier: {clf_name}
Vectorizer: {vect_name}
Score: {gs.best_score_:.4f}
Params: {gs.best_params_}
------------------------------
            ''')
        models.append((clf_name, vect_name, gs.best_score_, gs.best_params_))



Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] clf__alpha=0

[Parallel(n_jobs=-1)]: Done   4 out of  20 | elapsed:   10.4s remaining:   41.5s


[CV]  clf__alpha=0.01, vect__ngram_range=(1, 1), score=0.08267361459064035, total=   6.1s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.25573542556361106, total=   6.4s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.43161930980237934, total=   6.5s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.21637251969289062, total=   6.4s


[Parallel(n_jobs=-1)]: Done   7 out of  20 | elapsed:   11.4s remaining:   21.1s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.2790513833992095, total=   6.5s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.08898687243210743, total=   6.6s


[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:   12.0s remaining:   12.0s


[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.233300395256917, total=  22.7s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.21273214817757474, total=  23.1s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.4164782223970111, total=  24.1s


[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:   31.0s remaining:   16.7s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.459246878379707, total=  23.5s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.16581912453883738, total=  28.5s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.30454545454545456, total=  20.3s


[Parallel(n_jobs=-1)]: Done  16 out of  20 | elapsed:   36.4s remaining:    9.1s


[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.05792163543441227, total=  30.0s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.23282480805663575, total=  21.4s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.2848346409772569, total=  25.1s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.08948792464174767, total=  26.6s


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   42.9s finished



Classifier: Naive Bayes
Vectorizer: TfidfVectorizer
Score: 0.2750
Params: {'clf__alpha': 0.001, 'vect__ngram_range': (1, 2)}
------------------------------
            
Fitting 5 folds for each of 4 candidates, totalling 20 fits




[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 1) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.01, vect__ngram_range=(1, 2) .......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] clf__alpha=0.001, vect__ngram_range=(1, 1) ......................
[CV] c

[Parallel(n_jobs=-1)]: Done   4 out of  20 | elapsed:   10.2s remaining:   40.7s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.4288663848195851, total=   6.2s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.09960917927648061, total=   6.5s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.22135806162129823, total=   6.4s


[Parallel(n_jobs=-1)]: Done   7 out of  20 | elapsed:   11.2s remaining:   20.9s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.2754940711462451, total=   6.3s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 1), score=0.2562319992054822, total=   6.4s


[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:   11.7s remaining:   11.7s


[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.3040513833992095, total=  22.2s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.4581653721364664, total=  23.2s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.23451989231229434, total=  23.2s


[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:   29.7s remaining:   16.0s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.46111493461803166, total=  22.9s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.2831462905948952, total=  26.7s
[CV]  clf__alpha=0.01, vect__ngram_range=(1, 2), score=0.09319571099308548, total=  28.6s


[Parallel(n_jobs=-1)]: Done  16 out of  20 | elapsed:   35.1s remaining:    8.8s


[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.24000398843354273, total=  20.5s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.09940875839262452, total=  20.7s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.3011857707509881, total=  24.1s
[CV]  clf__alpha=0.001, vect__ngram_range=(1, 2), score=0.2829476611381468, total=  24.1s


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   39.4s finished



Classifier: Naive Bayes
Vectorizer: CountVectorizer
Score: 0.2777
Params: {'clf__alpha': 0.001, 'vect__ngram_range': (1, 2)}
------------------------------
            
Fitting 5 folds for each of 10 candidates, totalling 50 fits




[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] c

[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  1.2min


[CV]  clf__C=1e-05, vect__ngram_range=(1, 1), score=0.05849802371541502, total= 1.1min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.05820469963622063, total= 1.8min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.3394946416281585, total= 1.1min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.3191699604743083, total= 1.1min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.2990366471347701, total= 1.1min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.15773123559474897, total= 1.2min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  2.8min


[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05849802371541502, total= 2.7min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.05869500446916278, total= 1.8min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.0592243711794769, total= 1.9min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.05849802371541502, total= 1.9min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05892910559377804, total= 2.8min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05869500446916278, total= 2.8min
[C

[Parallel(n_jobs=-1)]: Done  25 out of  50 | elapsed:  3.0min remaining:  3.0min


[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.49965588437715075, total=  55.3s
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.4117588932806324, total=  56.6s
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.30122644331438825, total=  55.0s
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.11544242910111234, total=  54.1s
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.2390128797561695, total= 2.5min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.37670076472340847, total=  60.0s
[CV] clf__C=10.0, vect__ngram_range=(1, 2) .............

[Parallel(n_jobs=-1)]: Done  31 out of  50 | elapsed:  3.9min remaining:  2.4min


[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.2191699604743083, total= 2.5min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.20627669083325056, total= 2.5min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.10842769816614892, total= 2.5min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.19224249675939775, total= 2.5min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.4116600790513834, total= 3.0min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.09519991983164645, total= 2.9min


[Parallel(n_jobs=-1)]: Done  37 out of  50 | elapsed:  6.1min remaining:  2.1min


[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.5042768655982696, total= 3.1min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.2902582510718915, total= 3.1min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.374217896514053, total= 3.1min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.38023715415019765, total= 4.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.34501936637203295, total= 4.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.08748371580318669, total= 4.1min


[Parallel(n_jobs=-1)]: Done  43 out of  50 | elapsed:  7.9min remaining:  1.3min


[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.490807196932455, total= 5.0min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.2694186858111477, total= 4.5min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.09469886762200622, total=13.5min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.5060466030872087, total=15.7min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.29035796191045965, total=15.0min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.3761048763531632, total=15.8min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.4105731225296443, total=16.9min


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed: 20.9min finished



Classifier: Linear SVC
Vectorizer: TfidfVectorizer
Score: 0.3418
Params: {'clf__C': 0.31622776601683794, 'vect__ngram_range': (1, 1)}
------------------------------
            
Fitting 5 folds for each of 10 candidates, totalling 50 fits




[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] c

[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  1.4min


[CV]  clf__C=1e-05, vect__ngram_range=(1, 1), score=0.05952500250526105, total= 1.3min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.3222888604856946, total= 2.4min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05972542338911715, total= 2.8min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.060325057333732175, total= 2.8min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05918788713007571, total= 2.8min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.059683794466403164, total= 3.0min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=1e-05, vect__ngram_ra

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  3.7min


[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.2468964147383057, total= 2.5min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.2260444710340014, total= 2.5min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.12526305241006114, total= 3.1min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.391304347826087, total= 3.5min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.3611083523686563, total= 3.6min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.2959417688702762, total= 3.8min
[CV] clf__C=0.31622776601683

[Parallel(n_jobs=-1)]: Done  25 out of  50 | elapsed:  5.4min remaining:  5.4min


[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.08778434712897083, total= 5.0min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.2634360354970585, total= 5.7min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.4864811719594927, total= 6.6min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.3708498023715415, total= 6.6min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.3410467772370643, total= 6.4min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.09700370778635134, total= 8.9min


[Parallel(n_jobs=-1)]: Done  31 out of  50 | elapsed: 12.0min remaining:  7.4min


[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.49287189066955067, total=11.3min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.39891304347826084, total=10.4min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.28437531159637053, total=10.4min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.36527957096037345, total=10.5min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.46848884082194475, total=10.8min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.07796372382002205, total= 7.4min


[Parallel(n_jobs=-1)]: Done  37 out of  50 | elapsed: 17.3min remaining:  6.1min


[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.31897233201581027, total=10.0min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.2266427360654103, total= 9.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.29258118979044595, total=10.1min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.0864816113839062, total=16.8min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.4945433094091043, total=18.4min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.3791501976284585, total=19.2min


[Parallel(n_jobs=-1)]: Done  43 out of  50 | elapsed: 23.9min remaining:  3.9min


[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.2706152158739655, total=20.1min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.34839606713675636, total=20.8min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.08036877442629523, total=19.7min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.324901185770751, total=23.9min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.24199820520490578, total=23.2min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.4765509782715564, total=26.8min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.30092362697388025, total=24.5min


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed: 37.4min finished



Classifier: Linear SVC
Vectorizer: CountVectorizer
Score: 0.3285
Params: {'clf__C': 0.01, 'vect__ngram_range': (1, 2)}
------------------------------
            
Fitting 5 folds for each of 10 candidates, totalling 50 fits




[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] c

[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed: 10.7min


[CV] clf__C=0.01, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 1), score=0.05869500446916278, total=10.5min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05849802371541502, total=13.2min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05820469963622063, total=13.6min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05869500446916278, total=13.7min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05892910559377804, total=13.9min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.0592243711794769, total=13.9min
[CV] clf__C=0.31622776601683794, vec

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed: 24.1min


[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.05869500446916278, total=13.4min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.05892910559377804, total=13.5min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.0592243711794769, total=13.5min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.05849802371541502, total=20.5min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.05869500446916278, total=20.8min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.05892910559377804, total=21.0min
[CV] 

[Parallel(n_jobs=-1)]: Done  25 out of  50 | elapsed: 29.3min remaining: 29.3min


[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.16999311768754302, total=19.5min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.15849802371541502, total=19.5min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.1503624987585659, total=20.0min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.05820469963622063, total=30.7min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.14906770365938776, total=19.6min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.05849802371541502, total=29.9min

[Parallel(n_jobs=-1)]: Done  31 out of  50 | elapsed: 44.8min remaining: 27.5min


[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.09890770618298426, total=20.2min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.05869500446916278, total=30.7min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.05892910559377804, total=31.0min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.0592243711794769, total=31.2min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.07134387351778657, total=38.2min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.07098613705633665, total=39.0min


[Parallel(n_jobs=-1)]: Done  37 out of  50 | elapsed: 64.4min remaining: 22.6min


[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.06654086801072599, total=38.3min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.06700568351779838, total=38.9min
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 2), score=0.055316163944282994, total=39.0min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.48746435945334776, total=41.1min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.3782608695652174, total=39.5min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.0967030764605672, total=35.0min


[Parallel(n_jobs=-1)]: Done  43 out of  50 | elapsed: 77.7min remaining: 12.6min


[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.3480981229516337, total=36.8min
[CV]  clf__C=10.0, vect__ngram_range=(1, 1), score=0.2710140592282381, total=35.5min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.34372827490316815, total=49.3min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.3766798418972332, total=49.7min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.4905122406842985, total=50.5min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.2669259148469439, total=49.1min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.08848582022246718, total=48.6min


[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed: 95.0min finished



Classifier: Logistic Regression
Vectorizer: TfidfVectorizer
Score: 0.3171
Params: {'clf__C': 10.0, 'vect__ngram_range': (1, 1)}
------------------------------
            
Fitting 5 folds for each of 10 candidates, totalling 50 fits




[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 1) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=1e-05, vect__ngram_range=(1, 2) ..........................
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] clf__C=0.00031622776601683794, vect__ngram_range=(1, 1) .........
[CV] c

[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed: 18.1min


[CV] clf__C=0.01, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 1), score=0.06068129903664714, total=17.9min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05859683794466403, total=23.7min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.0592243711794769, total=24.7min
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05892910559377804, total=24.6min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05820469963622063, total=25.4min
[CV] clf__C=0.01, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=1e-05, vect__ngram_range=(1, 2), score=0.05869500446916278, total=25.7min
[CV] clf__C=0.31622776601683794, vec

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed: 46.0min


[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.18814229249011857, total=28.6min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 1) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.17390008938325555, total=28.9min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.01, vect__ngram_range=(1, 1), score=0.17090437730581315, total=28.9min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.061561264822134386, total=38.2min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.06102815913418178, total=39.6min
[CV] clf__C=0.31622776601683794, vect__ngram_range=(1, 2) ............
[CV]  clf__C=0.00031622776601683794, vect__ngram_range=(1, 2), score=0.061421876557981855, total=39.8min
[C

[Parallel(n_jobs=-1)]: Done  25 out of  50 | elapsed: 52.3min remaining: 52.3min


[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.24697669845639564, total=74.1min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.20820158102766798, total=70.5min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.10081170457961719, total=69.7min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.18621511570165855, total=73.9min
[CV] clf__C=10.0, vect__ngram_range=(1, 1) ...........................
[CV]  clf__C=0.01, vect__ngram_range=(1, 2), score=0.17758500348987935, total=74.3min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.42965293481466915, total=78.4min


[Parallel(n_jobs=-1)]: Done  31 out of  50 | elapsed: 105.3min remaining: 64.5min


[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.10311654474396233, total=70.8min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.3438735177865613, total=78.4min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.31174893236666995, total=80.1min
[CV] clf__C=10.0, vect__ngram_range=(1, 2) ...........................
[CV]  clf__C=0.31622776601683794, vect__ngram_range=(1, 1), score=0.25665569847442415, total=78.7min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.0817717206132879, total=268.2min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.33300395256917, total=309.0min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.2467843254561771, total=296.0min
[CV]  clf__C=10.0, vect__ngram_range=(1, 2), score=0.3082

[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed: 417.7min finished



Classifier: Logistic Regression
Vectorizer: CountVectorizer
Score: 0.2952
Params: {'clf__C': 10.0, 'vect__ngram_range': (1, 1)}
------------------------------
            
Fitting 5 folds for each of 4 candidates, totalling 20 fits




[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ......................
[CV] c

[Parallel(n_jobs=-1)]: Done   4 out of  20 | elapsed:    9.3s remaining:   37.1s


[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.07393569953790188, total=   4.7s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.07391304347826087, total=   4.7s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.0666401827391002, total=   4.6s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.06810250274204806, total=   4.7s


[Parallel(n_jobs=-1)]: Done   7 out of  20 | elapsed:   10.3s remaining:   19.1s


[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.06583826034672813, total=   4.7s


[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:   10.8s remaining:   10.8s


[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.05997443712515977, total=  12.1s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.06126482213438735, total=  12.1s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.05929089283940808, total=  12.4s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.06331638249077674, total=  12.2s


[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:   18.7s remaining:   10.1s


[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.05992584427297325, total=  14.0s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.060662668370858326, total=  13.9s


[Parallel(n_jobs=-1)]: Done  16 out of  20 | elapsed:   22.2s remaining:    5.5s


[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.06195652173913044, total=   9.0s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.0632634819743768, total=   9.3s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.06481204506929904, total=  12.0s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.06493636636937569, total=  12.3s


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   26.8s finished



Classifier: Random Forest
Vectorizer: TfidfVectorizer
Score: 0.0697
Params: {'clf__max_depth': 2, 'vect__ngram_range': (1, 1)}
------------------------------
            
Fitting 5 folds for each of 4 candidates, totalling 20 fits




[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=1, vect__ngram_range=(1, 2) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ......................
[CV] clf__max_depth=2, vect__ngram_range=(1, 1) ......................
[CV] c

[Parallel(n_jobs=-1)]: Done   4 out of  20 | elapsed:    8.8s remaining:   35.3s


[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.06901976206862649, total=   4.6s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.0698972978362748, total=   4.5s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.07398947263879234, total=   4.5s


[Parallel(n_jobs=-1)]: Done   7 out of  20 | elapsed:   10.1s remaining:   18.8s


[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.06689723320158103, total=   4.6s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 1), score=0.06363363062431106, total=   4.5s


[Parallel(n_jobs=-1)]: Done  10 out of  20 | elapsed:   10.5s remaining:   10.5s


[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.06125258086717137, total=  11.6s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.06086956521739131, total=  11.2s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.06192043075082262, total=  11.4s
[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.059787466481279174, total=  11.5s


[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:   17.2s remaining:    9.3s


[CV]  clf__max_depth=1, vect__ngram_range=(1, 2), score=0.05972542338911715, total=  12.8s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.06331727460426703, total=  12.8s


[Parallel(n_jobs=-1)]: Done  16 out of  20 | elapsed:   20.3s remaining:    5.1s


[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.059782608695652176, total=   9.1s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.06435594398649319, total=   8.9s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.062020141589390766, total=  10.8s
[CV]  clf__max_depth=2, vect__ngram_range=(1, 2), score=0.06162942178575007, total=  11.0s


[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:   24.5s finished



Classifier: Random Forest
Vectorizer: CountVectorizer
Score: 0.0687
Params: {'clf__max_depth': 2, 'vect__ngram_range': (1, 1)}
------------------------------
            
CPU times: user 2h 45min 36s, sys: 9min 18s, total: 2h 54min 54s
Wall time: 9h 59min 41s


In [22]:
models = sorted(models, key=lambda tup: tup[2])
print('And the winner is...')
print()
print('Classifier:', models[-1][0])
print('Vectorizer:', models[-1][1])
print('Score:', models[-1][2])
print('Params:', models[-1][3])

And the winner is...

Classifier: Linear SVC
Vectorizer: TfidfVectorizer
Score: 0.3417844663278272
Params: {'clf__C': 0.31622776601683794, 'vect__ngram_range': (1, 1)}
