In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import nltk
import pickle
from collections import Counter, defaultdict
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag, word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from nltk.stem import *

In [2]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.decomposition import NMF
from sklearn.utils.extmath import randomized_svd
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB

In [3]:
df = pd.read_csv("dataset.csv")
print('Number of data points : ', df.shape[0])
print('Number of features : ', df.shape[1])
df.head()

Number of data points :  2072
Number of features :  9


Unnamed: 0.1,Unnamed: 0,full_text,summary,keywords,publish_date,authors,url,leaf_label,root_label
0,590,Having made a massive impact in Saudi Arabia w...,Having made a massive impact in Saudi Arabia w...,"['singhs', 'rooting', 'cool', 'saudi', 'style'...",,[],https://www.msn.com/en-in/entertainment/other/...,cricket,sports
1,388,Cricket is all about the emotional rollercoast...,"No matter which team fans hope to win, every s...","['wants', 'fans', 'finals', 'cup', 'win', 'tou...",,[],https://www.prnewswire.com:443/news-releases/c...,cricket,sports
2,423,New Zealand announces back-to-back tours next ...,New Zealand announces back-to-back tours next ...,"['test', 'west', 'tour', 'zealand', 'world', '...",2021-12-20 00:00:00,[],https://www.aljazeera.com/news/2021/12/20/cric...,cricket,sports
3,563,It's not the first time cricket fans in the co...,Billed as one of the pre-tournament favourites...,"['qualify', 'afghanistan', 'cup', 'world', 'ze...",,[],https://www.msn.com/en-in/news/other/t20-world...,cricket,sports
4,634,An employee works on a computer terminal again...,"REUTERS/Sivaram VBENGALURU, Oct 11 (Reuters Br...","['success', 'tech', 'startup', 'talent', 'onli...",2021-10-11 00:00:00,['Una Galani'],https://www.reuters.com/breakingviews/india-in...,cricket,sports


In [4]:
train, test = train_test_split(df[["full_text","root_label"]], test_size=0.2)

In [5]:
print('Number of data points in train data:', train.shape[0])
print('Number of data points in test data:', test.shape[0])

Number of data points in train data: 1657
Number of data points in test data: 415


# Cleanup

In [6]:
import re
def clean(text):
    text = re.sub(r"http\S+", '', text, flags=re.MULTILINE)
    texter = re.sub(r"<br />", " ", text)
    texter = re.sub(r"&quot;", "\"",texter)
    texter = re.sub('&#39;', "\"", texter)
    texter = re.sub('\n', " ", texter)
    texter = re.sub(' u '," you ", texter)
    texter = re.sub('`',"", texter)
    texter = re.sub(' +', ' ', texter)
    texter = re.sub(r"(!)\1+", r"!", texter)
    texter = re.sub(r"(\?)\1+", r"?", texter)
    texter = re.sub('&amp;', 'and', texter)
    texter = re.sub('\r', ' ',texter)
    texter = re.sub(r"[0-9]","", texter)
    texter = re.sub('[^a-zA-Z0-9\n]', ' ', texter)
    texter = re.sub('\s+',' ', texter)
    texter = texter.lower()
    clean = re.compile('<.*?>')
    texter = texter.encode('ascii', 'ignore').decode('ascii')
    texter = re.sub(clean, '', texter)
    if texter == "":
        texter = ""
    return texter

In [7]:
train_clean = train.copy()

In [8]:
train_clean

Unnamed: 0,full_text,root_label
1626,Coal mining authorities say that one miner is ...,climate
23,© Nikhil Naz Virat Kohli to play ODIs in South...,sports
880,Play Magnus Group to Bring Chess NFTs to Marke...,sports
1040,"This holiday season, The Save Movement is urgi...",climate
950,"Queens, NY – James Todd Smith — better known a...",sports
...,...,...
303,"Seacrest out, Ted Lasso in. Ryan Seacrest and ...",sports
1012,Citrusw00d Productions unveils films minted as...,sports
267,FILE - Tennessee Titans running back Derrick H...,sports
1763,A swarm of earthquakes has been rattling the o...,climate


In [9]:
trail = train_clean.copy()

In [10]:
trail

Unnamed: 0,full_text,root_label
1626,Coal mining authorities say that one miner is ...,climate
23,© Nikhil Naz Virat Kohli to play ODIs in South...,sports
880,Play Magnus Group to Bring Chess NFTs to Marke...,sports
1040,"This holiday season, The Save Movement is urgi...",climate
950,"Queens, NY – James Todd Smith — better known a...",sports
...,...,...
303,"Seacrest out, Ted Lasso in. Ryan Seacrest and ...",sports
1012,Citrusw00d Productions unveils films minted as...,sports
267,FILE - Tennessee Titans running back Derrick H...,sports
1763,A swarm of earthquakes has been rattling the o...,climate


In [11]:
train_clean = train_clean['full_text'].apply(clean)

In [12]:
from sklearn.feature_extraction.text import CountVectorizer
import nltk
from nltk import pos_tag
from pickle import dump

counts = []

wnl = nltk.wordnet.WordNetLemmatizer()
analyzer = CountVectorizer().build_analyzer()

def penn2morphy(penntag):
    """ Converts Penn Treebank tags to WordNet. """
    morphy_tag = {'NN':'n', 'JJ':'a',
                  'VB':'v', 'RB':'r'}
    try:
        return morphy_tag[penntag[:2]]
    except:
        return 'n'

def lemmatize_sent(list_word):
    # Text input is string, returns array of lowercased strings(words).
    return [wnl.lemmatize(word.lower(), pos=penn2morphy(tag)) 
            for word, tag in pos_tag(list_word)]


def rmv_nums(doc):
    #gets rid of numbers including floats
    #does lemmatization with nltk.wordnet.WordNetLemmatizer and pos_tag
    return (word for word in lemmatize_sent(analyzer(doc)) 
            if not word.isdigit())

#CountVectorizer returns a callable that handles preprocessing and tokenization
#Use the “english” stopwords of the CountVectorizer
vectorizer=CountVectorizer(analyzer=rmv_nums,min_df=3,stop_words='english')

#do feature extraction (train):
X_train_counts=vectorizer.fit_transform(train["full_text"]) #get matrix of doc-term counts (training data)
print('Size of training data after lemmatization but before TF-IDF: ', X_train_counts.shape) 
X_test_counts=vectorizer.transform(test["full_text"]) 
print('Size of testing data after lemmatization but before TF-IDF:  ', X_test_counts.shape) 

from sklearn.feature_extraction.text import TfidfTransformer
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
print('Shape of train TF-IDF matrix: ',X_train_tfidf.shape)
X_test_tfidf = tfidf_transformer.transform(X_test_counts)
print('Shape of test TF-IDF matrix:  ',X_test_tfidf.shape)

Size of training data after lemmatization but before TF-IDF:  (1657, 10270)
Size of testing data after lemmatization but before TF-IDF:   (415, 10270)
Shape of train TF-IDF matrix:  (1657, 10270)
Shape of test TF-IDF matrix:   (415, 10270)


In [13]:
y_train_encoded = train["root_label"].copy()
y_test_encoded = test["root_label"].copy()

y_train_encoded[y_train_encoded == 'sports'] = 0
y_test_encoded[y_test_encoded == 'sports'] = 0


y_train_encoded[y_train_encoded== 'climate'] = 1
y_test_encoded[y_test_encoded == 'climate'] = 1

print("Training Set\n")
print("Original train_dataset:\n" + str(train["root_label"][0:20]))
print("\nBinarized train_dataset:\n" + str(y_train_encoded[0:20]))
print("\nTest Set\n")
print("Original test_dataset:\n" + str(test["root_label"][0:20]))
print("\nBinarized test_dataset:\n" + str(y_test_encoded[0:20]))

Training Set

Original train_dataset:
1626    climate
23       sports
880      sports
1040    climate
950      sports
147      sports
1338    climate
1487    climate
89       sports
552      sports
1816    climate
1523    climate
510      sports
1653    climate
901      sports
1740    climate
1945    climate
1700    climate
1710    climate
149      sports
Name: root_label, dtype: object

Binarized train_dataset:
1626    1
23      0
880     0
1040    1
950     0
147     0
1338    1
1487    1
89      0
552     0
1816    1
1523    1
510     0
1653    1
901     0
1740    1
1945    1
1700    1
1710    1
149     0
Name: root_label, dtype: object

Test Set

Original test_dataset:
61       sports
733      sports
1274    climate
767      sports
1738    climate
1909    climate
1154    climate
1022     sports
1500    climate
1296    climate
275      sports
1178    climate
746      sports
1076    climate
607      sports
1559    climate
611      sports
982      sports
743      sports
1684    climat

In [14]:
wnl = nltk.wordnet.WordNetLemmatizer()

def penn2morphy(penntag):
    """ Converts Penn Treebank tags to WordNet. """
    morphy_tag = {'NN':'n', 'JJ':'a',
                  'VB':'v', 'RB':'r'}
    try:
        return morphy_tag[penntag[:2]]
    except:
        return 'n'

def lemmatize_sent(list_word):
    return [wnl.lemmatize(word.lower(), pos=penn2morphy(tag)) 
            for word, tag in pos_tag(list_word)]


In [15]:
analyzer = CountVectorizer().build_analyzer()
stemmer = PorterStemmer()
def stemmed (doc):
    return (stemmer.stem(word) for word in analyzer(doc) if not word.isdigit())

def lemma(doc):
    return (word for word in lemmatize_sent(analyzer(doc)) if not word.isdigit())

def nolemma(doc):
    return (word for word in analyzer(doc) if not word.isdigit())

In [16]:
vectorizer_df3_lemma = CountVectorizer(min_df=3, 
                             analyzer=lemma, 
                             stop_words='english')
vectorizer_df5_lemma = CountVectorizer(min_df=5, 
                             analyzer=lemma, 
                             stop_words='english')
vectorizer_df3_nolemma = CountVectorizer(min_df=3, 
                             analyzer=nolemma, 
                             stop_words='english')
vectorizer_df5_nolemma = CountVectorizer(min_df=5, 
                             analyzer=nolemma, 
                             stop_words='english')
vectorizer_df3_stem = CountVectorizer(min_df=3, 
                             analyzer=stemmed, 
                             stop_words='english')
vectorizer_df5_stem = CountVectorizer(min_df=5, 
                             analyzer=stemmed, 
                             stop_words='english')


In [17]:
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.decomposition import TruncatedSVD, NMF

# used to cache results
from tempfile import mkdtemp
from shutil import rmtree
import joblib
from joblib import Memory

# print(__doc__)
location = mkdtemp()
memory = Memory(location=location, verbose=10)

pipeline0 = Pipeline([
    ('vect', vectorizer_df3_lemma),
    ('tfidf', TfidfTransformer()),
    ('reduce_dim', TruncatedSVD(n_components=50, random_state=42)),
    ('clf', SVC(kernel='linear', C=1)),
],
memory=memory
)

param_grid0 = [
    {
        'vect': [vectorizer_df3_stem, 
                 vectorizer_df5_stem,
                 vectorizer_df3_lemma, 
                 vectorizer_df3_nolemma, 
                 vectorizer_df5_lemma, 
                 vectorizer_df5_nolemma],
        'reduce_dim': [TruncatedSVD(n_components=5, random_state=42),
                       NMF(n_components=5, init='random', random_state=42, max_iter=10000)],
        'clf': [SVC(kernel='linear', C=1.0,max_iter=10000), 
                LogisticRegression(penalty='l1', C=10,max_iter=10000, solver='liblinear'), 
                LogisticRegression(penalty='l2', C=100,max_iter=10000, solver = 'liblinear'),
                GaussianNB()]
    }
]
grid0 = GridSearchCV(pipeline0, cv=5, n_jobs=1, param_grid=param_grid0, scoring='accuracy')
grid0.fit(train['full_text'], y_train_encoded.astype(str).astype(int))
rmtree(location)

________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(CountVectorizer(analyzer=<function stemmed at 0x7f8461748700>, min_df=3,
                stop_words='english'), 
186     (CNN) Indian police have arrested three Kashmi...
212     India's Mayank Agarwal , left, talks with Axar...
495     Game recap\n\nOffensive MVP\n\nTanner Morgan, ...
190     Kashmiri students assaulted and bowler Mohamme...
890     Lincoln High School’s marching band was named ...
                              ...                        
303     Seacrest out, Ted Lasso in. Ryan Seacrest and ...
1012    Citrusw00d Productions unveils films minted as...
267     FILE - Tennessee Titans running back Derrick H...
1763    A swarm of earthquakes has been rattling the o...
1588    A very strong 5.5 earthquake was just recorded...
Name: full_text, Length: 1325, dtype: object, 
186     0
212     0
495     0
190     0
890   

________________________________________________fit_transform_one - 8.2s, 0.1min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TfidfTransformer(), <1326x8044 sparse matrix of type '<class 'numpy.int64'>'
	with 250212 stored elements in Compressed Sparse Row format>, 
1626    1
23      0
880     0
1040    1
950     0
       ..
303     0
1012    0
267     0
1763    1
1588    1
Name: root_label, Length: 1326, dtype: int64, 
None, message_clsname='Pipeline', message=None)
________________________________________________fit_transform_one - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TruncatedSVD(n_components=5, random_state=42), <1326x8044 sparse matrix of type '<class 'numpy.float64'>'
	with 250212 stored elements in Compressed Sparse Row format>, 
1626    1


________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(CountVectorizer(analyzer=<function stemmed at 0x7f8461748700>, min_df=5,
                stop_words='english'), 
1626    Coal mining authorities say that one miner is ...
23      © Nikhil Naz Virat Kohli to play ODIs in South...
880     Play Magnus Group to Bring Chess NFTs to Marke...
1040    This holiday season, The Save Movement is urgi...
950     Queens, NY – James Todd Smith — better known a...
                              ...                        
303     Seacrest out, Ted Lasso in. Ryan Seacrest and ...
1012    Citrusw00d Productions unveils films minted as...
267     FILE - Tennessee Titans running back Derrick H...
1763    A swarm of earthquakes has been rattling the o...
1588    A very strong 5.5 earthquake was just recorded...
Name: full_text, Length: 1326, dtype: object, 
1626    1
23      0
880     0
1040    1
950   

_______________________________________________fit_transform_one - 18.1s, 0.3min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TfidfTransformer(), <1325x8984 sparse matrix of type '<class 'numpy.int64'>'
	with 243688 stored elements in Compressed Sparse Row format>, 
186     0
212     0
495     0
190     0
890     0
       ..
303     0
1012    0
267     0
1763    1
1588    1
Name: root_label, Length: 1325, dtype: int64, 
None, message_clsname='Pipeline', message=None)
________________________________________________fit_transform_one - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TruncatedSVD(n_components=5, random_state=42), <1325x8984 sparse matrix of type '<class 'numpy.float64'>'
	with 243688 stored elements in Compressed Sparse Row format>, 
186     0


________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(CountVectorizer(analyzer=<function lemma at 0x7f8467a87160>, min_df=3,
                stop_words='english'), 
1626    Coal mining authorities say that one miner is ...
23      © Nikhil Naz Virat Kohli to play ODIs in South...
880     Play Magnus Group to Bring Chess NFTs to Marke...
1040    This holiday season, The Save Movement is urgi...
950     Queens, NY – James Todd Smith — better known a...
                              ...                        
852     The team behind the Stockfish chess engine has...
473     Sports Ramapo Icon Drew Gibbs Remembered By NJ...
942     Articles Sorry, there are no recent results fo...
493     LSU wide receiver Trey Palmer (33) heads upfie...
261     Texas finally halted its losing streak at six ...
Name: full_text, Length: 1326, dtype: object, 
1626    1
23      0
880     0
1040    1
950     

________________________________________________fit_transform_one - 0.5s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TfidfTransformer(), <1326x11269 sparse matrix of type '<class 'numpy.int64'>'
	with 260430 stored elements in Compressed Sparse Row format>, 
1626    1
23      0
880     0
1040    1
950     0
       ..
303     0
1012    0
267     0
1763    1
1588    1
Name: root_label, Length: 1326, dtype: int64, 
None, message_clsname='Pipeline', message=None)
________________________________________________fit_transform_one - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TruncatedSVD(n_components=5, random_state=42), <1326x11269 sparse matrix of type '<class 'numpy.float64'>'
	with 260430 stored elements in Compressed Sparse Row format>, 
1626    

________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(CountVectorizer(analyzer=<function lemma at 0x7f8467a87160>, min_df=5,
                stop_words='english'), 
1626    Coal mining authorities say that one miner is ...
23      © Nikhil Naz Virat Kohli to play ODIs in South...
880     Play Magnus Group to Bring Chess NFTs to Marke...
1040    This holiday season, The Save Movement is urgi...
950     Queens, NY – James Todd Smith — better known a...
                              ...                        
303     Seacrest out, Ted Lasso in. Ryan Seacrest and ...
1012    Citrusw00d Productions unveils films minted as...
267     FILE - Tennessee Titans running back Derrick H...
1763    A swarm of earthquakes has been rattling the o...
1588    A very strong 5.5 earthquake was just recorded...
Name: full_text, Length: 1325, dtype: object, 
1626    1
23      0
880     0
1040    1
950     

_______________________________________________fit_transform_one - 19.0s, 0.3min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TfidfTransformer(), <1326x6256 sparse matrix of type '<class 'numpy.int64'>'
	with 236969 stored elements in Compressed Sparse Row format>, 
1626    1
23      0
880     0
1040    1
950     0
       ..
852     0
473     0
942     0
493     0
261     0
Name: root_label, Length: 1326, dtype: int64, 
None, message_clsname='Pipeline', message=None)
________________________________________________fit_transform_one - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TruncatedSVD(n_components=5, random_state=42), <1326x6256 sparse matrix of type '<class 'numpy.float64'>'
	with 236969 stored elements in Compressed Sparse Row format>, 
1626    1


________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(CountVectorizer(analyzer=<function nolemma at 0x7f8467a871f0>, min_df=5,
                stop_words='english'), 
1626    Coal mining authorities say that one miner is ...
23      © Nikhil Naz Virat Kohli to play ODIs in South...
880     Play Magnus Group to Bring Chess NFTs to Marke...
1040    This holiday season, The Save Movement is urgi...
950     Queens, NY – James Todd Smith — better known a...
                              ...                        
303     Seacrest out, Ted Lasso in. Ryan Seacrest and ...
1012    Citrusw00d Productions unveils films minted as...
267     FILE - Tennessee Titans running back Derrick H...
1763    A swarm of earthquakes has been rattling the o...
1588    A very strong 5.5 earthquake was just recorded...
Name: full_text, Length: 1326, dtype: object, 
1626    1
23      0
880     0
1040    1
950   

________________________________________________fit_transform_one - 0.3s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/42149b1b933c746499970a723b05358b
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/c8133f6fbaafc962b213df4d983e8750
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(NMF(init='random', max_iter=10000, n_components=5, random_state=42), <1326x8044 sparse matrix of type '<class 'numpy.float64'>'
	with 250212 stored elements in Compressed Sparse Row format>, 
1626    1
23      0

________________________________________________fit_transform_one - 0.2s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/e17a321ee1988f17060fab9085657c1e
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/5413d243e28eb3e17c68abb2c82ee09b
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(NMF(init='random', max_iter=10000, n_components=5, random_state=42), <1325x8984 sparse matrix of type '<class 'numpy.float64'>'
	with 243688 stored elements in Compressed Sparse Row format>, 
186     0
212     0

________________________________________________fit_transform_one - 0.5s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/92da76a90960598bff46e14c10126f1c
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/5c5b2e45bc946220c27489515d5f544d
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(NMF(init='random', max_iter=10000, n_components=5, random_state=42), <1326x11269 sparse matrix of type '<class 'numpy.float64'>'
	with 260430 stored elements in Compressed Sparse Row format>, 
1626    1
23      

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/853fd1756d56f24955fb21712de048ec
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/cf76d826d382c33eea6803df5b814985
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(NMF(init='random', max_iter=10000, n_components=5, random_state=42), <1326x6256 sparse matrix of type '<class 'numpy.float64'>'
	with 236969 stored elements in Compressed Sparse Row format>, 
1626    1
23      0
880     0
1040    1
950     0
       ..
852     0
473     0
942     0
493     0


[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/b44d5957c529afd8c8fa3cabc32fdb57
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/0ebae1f6f3561234ff7879d107e76888
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/a3505637a1e7df7701137cf65c520459
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/aa09476f7ae473de27402aaa84213eb0
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/9284f9f06f6dc2022e7b14ae007524c7
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/8692ee0f715d96025d33273bc4e03307
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/b44d5957c529afd8c8fa3cabc32fdb57
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/0ebae1f6f3561234ff7879d107e76888
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/a3505637a1e7df7701137cf65c520459
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/aa09476f7ae473de27402aaa84213eb0
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/9284f9f06f6dc2022e7b14ae007524c7
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/8692ee0f715d96025d33273bc4e03307
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/b44d5957c529afd8c8fa3cabc32fdb57
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/0ebae1f6f3561234ff7879d107e76888
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/a3505637a1e7df7701137cf65c520459
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/aa09476f7ae473de27402aaa84213eb0
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/9284f9f06f6dc2022e7b14ae007524c7
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/8692ee0f715d96025d33273bc4e03307
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmp887uc00a/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

In [18]:
gg0 = pd.DataFrame(grid0.cv_results_)

In [19]:
gg00 = gg0.sort_values(by='mean_test_score',ascending=False)
gg00

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_clf,param_reduce_dim,param_vect,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
25,0.075437,0.000911,2.002199,0.092823,"LogisticRegression(C=100, max_iter=10000, solv...","TruncatedSVD(n_components=5, random_state=42)",CountVectorizer(analyzer=<function stemmed at ...,"{'clf': LogisticRegression(C=100, max_iter=100...",0.918675,0.936747,0.915408,0.924471,0.909366,0.920933,0.009296,1
13,0.075936,0.000732,1.997829,0.095103,"LogisticRegression(C=10, max_iter=10000, penal...","TruncatedSVD(n_components=5, random_state=42)",CountVectorizer(analyzer=<function stemmed at ...,"{'clf': LogisticRegression(C=10, max_iter=1000...",0.921687,0.933735,0.915408,0.924471,0.909366,0.920933,0.008262,1
17,0.093485,0.001213,0.083722,0.003663,"LogisticRegression(C=10, max_iter=10000, penal...","TruncatedSVD(n_components=5, random_state=42)",CountVectorizer(analyzer=<function nolemma at ...,"{'clf': LogisticRegression(C=10, max_iter=1000...",0.912651,0.933735,0.912387,0.915408,0.918429,0.918522,0.007915,3
15,0.095489,0.001412,0.084397,0.003628,"LogisticRegression(C=10, max_iter=10000, penal...","TruncatedSVD(n_components=5, random_state=42)",CountVectorizer(analyzer=<function nolemma at ...,"{'clf': LogisticRegression(C=10, max_iter=1000...",0.912651,0.933735,0.912387,0.915408,0.915408,0.917918,0.008014,4
29,0.091587,0.001193,0.083836,0.003853,"LogisticRegression(C=100, max_iter=10000, solv...","TruncatedSVD(n_components=5, random_state=42)",CountVectorizer(analyzer=<function nolemma at ...,"{'clf': LogisticRegression(C=100, max_iter=100...",0.912651,0.933735,0.912387,0.915408,0.912387,0.917313,0.00829,5
26,0.086722,0.001053,4.670132,0.223706,"LogisticRegression(C=100, max_iter=10000, solv...","TruncatedSVD(n_components=5, random_state=42)",CountVectorizer(analyzer=<function lemma at 0x...,"{'clf': LogisticRegression(C=100, max_iter=100...",0.900602,0.927711,0.915408,0.912387,0.927492,0.91672,0.01017,6
14,0.085103,0.001052,4.638866,0.209409,"LogisticRegression(C=10, max_iter=10000, penal...","TruncatedSVD(n_components=5, random_state=42)",CountVectorizer(analyzer=<function lemma at 0x...,"{'clf': LogisticRegression(C=10, max_iter=1000...",0.900602,0.930723,0.915408,0.912387,0.924471,0.916718,0.010358,7
21,0.096783,0.001995,0.089148,0.00309,"LogisticRegression(C=10, max_iter=10000, penal...","NMF(init='random', max_iter=10000, n_component...",CountVectorizer(analyzer=<function nolemma at ...,"{'clf': LogisticRegression(C=10, max_iter=1000...",0.900602,0.933735,0.918429,0.912387,0.918429,0.916716,0.010714,8
24,0.078035,0.000794,2.001578,0.094888,"LogisticRegression(C=100, max_iter=10000, solv...","TruncatedSVD(n_components=5, random_state=42)",CountVectorizer(analyzer=<function stemmed at ...,"{'clf': LogisticRegression(C=100, max_iter=100...",0.89759,0.939759,0.915408,0.915408,0.915408,0.916715,0.013431,9
12,0.079151,0.000962,1.998965,0.096379,"LogisticRegression(C=10, max_iter=10000, penal...","TruncatedSVD(n_components=5, random_state=42)",CountVectorizer(analyzer=<function stemmed at ...,"{'clf': LogisticRegression(C=10, max_iter=1000...",0.900602,0.939759,0.912387,0.915408,0.915408,0.916713,0.01275,10


In [29]:
print(grid0.best_params_)

{'clf': LogisticRegression(C=10, max_iter=10000, penalty='l1', solver='liblinear'), 'reduce_dim': TruncatedSVD(n_components=5, random_state=42), 'vect': CountVectorizer(analyzer=<function stemmed at 0x7f8461748700>, min_df=5,
                stop_words='english')}


In [24]:
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.decomposition import TruncatedSVD, NMF

# used to cache results
from tempfile import mkdtemp
from shutil import rmtree
import joblib
from joblib import Memory

# print(__doc__)
location = mkdtemp()
memory = Memory(location=location, verbose=10)

pipeline1 = Pipeline([
    ('vect', vectorizer_df3_lemma),
    ('tfidf', TfidfTransformer()),
    ('reduce_dim', TruncatedSVD(n_components=50, random_state=42)),
    ('clf', SVC(kernel='linear', C=1)),
],
memory=memory
)

param_grid1 = [
    {
        'vect': [vectorizer_df3_stem, 
                 vectorizer_df5_stem,
                 vectorizer_df3_lemma, 
                 vectorizer_df3_nolemma, 
                 vectorizer_df5_lemma, 
                 vectorizer_df5_nolemma],
        'reduce_dim': [TruncatedSVD(n_components=50, random_state=42),
                       NMF(n_components=50, init='random', random_state=42, max_iter=10000)],
        'clf': [SVC(kernel='linear', C=1.0,max_iter=10000), 
                LogisticRegression(penalty='l1', C=10,max_iter=10000, solver='liblinear'), 
                LogisticRegression(penalty='l2', C=100,max_iter=10000, solver = 'liblinear'),
                GaussianNB()]
    }
]
grid1 = GridSearchCV(pipeline1, cv=5, n_jobs=1, param_grid=param_grid1, scoring='accuracy')
grid1.fit(train['full_text'], y_train_encoded.astype(str).astype(int))
rmtree(location)

________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(CountVectorizer(analyzer=<function stemmed at 0x7f8461748700>, min_df=3,
                stop_words='english'), 
186     (CNN) Indian police have arrested three Kashmi...
212     India's Mayank Agarwal , left, talks with Axar...
495     Game recap\n\nOffensive MVP\n\nTanner Morgan, ...
190     Kashmiri students assaulted and bowler Mohamme...
890     Lincoln High School’s marching band was named ...
                              ...                        
303     Seacrest out, Ted Lasso in. Ryan Seacrest and ...
1012    Citrusw00d Productions unveils films minted as...
267     FILE - Tennessee Titans running back Derrick H...
1763    A swarm of earthquakes has been rattling the o...
1588    A very strong 5.5 earthquake was just recorded...
Name: full_text, Length: 1325, dtype: object, 
186     0
212     0
495     0
190     0
890   

________________________________________________fit_transform_one - 8.2s, 0.1min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TfidfTransformer(), <1326x8044 sparse matrix of type '<class 'numpy.int64'>'
	with 250212 stored elements in Compressed Sparse Row format>, 
1626    1
23      0
880     0
1040    1
950     0
       ..
303     0
1012    0
267     0
1763    1
1588    1
Name: root_label, Length: 1326, dtype: int64, 
None, message_clsname='Pipeline', message=None)
________________________________________________fit_transform_one - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TruncatedSVD(n_components=50, random_state=42), <1326x8044 sparse matrix of type '<class 'numpy.float64'>'
	with 250212 stored elements in Compressed Sparse Row format>, 
1626    1

________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(CountVectorizer(analyzer=<function stemmed at 0x7f8461748700>, min_df=5,
                stop_words='english'), 
1626    Coal mining authorities say that one miner is ...
23      © Nikhil Naz Virat Kohli to play ODIs in South...
880     Play Magnus Group to Bring Chess NFTs to Marke...
1040    This holiday season, The Save Movement is urgi...
950     Queens, NY – James Todd Smith — better known a...
                              ...                        
303     Seacrest out, Ted Lasso in. Ryan Seacrest and ...
1012    Citrusw00d Productions unveils films minted as...
267     FILE - Tennessee Titans running back Derrick H...
1763    A swarm of earthquakes has been rattling the o...
1588    A very strong 5.5 earthquake was just recorded...
Name: full_text, Length: 1326, dtype: object, 
1626    1
23      0
880     0
1040    1
950   

_______________________________________________fit_transform_one - 18.2s, 0.3min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TfidfTransformer(), <1325x8984 sparse matrix of type '<class 'numpy.int64'>'
	with 243688 stored elements in Compressed Sparse Row format>, 
186     0
212     0
495     0
190     0
890     0
       ..
303     0
1012    0
267     0
1763    1
1588    1
Name: root_label, Length: 1325, dtype: int64, 
None, message_clsname='Pipeline', message=None)
________________________________________________fit_transform_one - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TruncatedSVD(n_components=50, random_state=42), <1325x8984 sparse matrix of type '<class 'numpy.float64'>'
	with 243688 stored elements in Compressed Sparse Row format>, 
186     0

________________________________________________fit_transform_one - 0.2s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(CountVectorizer(analyzer=<function lemma at 0x7f8467a87160>, min_df=3,
                stop_words='english'), 
1626    Coal mining authorities say that one miner is ...
23      © Nikhil Naz Virat Kohli to play ODIs in South...
880     Play Magnus Group to Bring Chess NFTs to Marke...
1040    This holiday season, The Save Movement is urgi...
950     Queens, NY – James Todd Smith — better known a...
                              ...                        
852     The team behind the Stockfish chess engine has...
473     Sports Ramapo Icon Drew Gibbs Remembered By NJ...
942     Articles Sorry, there are no recent results fo...
493     LSU wide receiver Trey Palmer (33) heads upfie...
261     Texas finally halted its losing streak at six ...
Name: full_tex

________________________________________________fit_transform_one - 0.5s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TfidfTransformer(), <1326x11269 sparse matrix of type '<class 'numpy.int64'>'
	with 260430 stored elements in Compressed Sparse Row format>, 
1626    1
23      0
880     0
1040    1
950     0
       ..
303     0
1012    0
267     0
1763    1
1588    1
Name: root_label, Length: 1326, dtype: int64, 
None, message_clsname='Pipeline', message=None)
________________________________________________fit_transform_one - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TruncatedSVD(n_components=50, random_state=42), <1326x11269 sparse matrix of type '<class 'numpy.float64'>'
	with 260430 stored elements in Compressed Sparse Row format>, 
1626   

________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(CountVectorizer(analyzer=<function lemma at 0x7f8467a87160>, min_df=5,
                stop_words='english'), 
1626    Coal mining authorities say that one miner is ...
23      © Nikhil Naz Virat Kohli to play ODIs in South...
880     Play Magnus Group to Bring Chess NFTs to Marke...
1040    This holiday season, The Save Movement is urgi...
950     Queens, NY – James Todd Smith — better known a...
                              ...                        
303     Seacrest out, Ted Lasso in. Ryan Seacrest and ...
1012    Citrusw00d Productions unveils films minted as...
267     FILE - Tennessee Titans running back Derrick H...
1763    A swarm of earthquakes has been rattling the o...
1588    A very strong 5.5 earthquake was just recorded...
Name: full_text, Length: 1325, dtype: object, 
1626    1
23      0
880     0
1040    1
950     

_______________________________________________fit_transform_one - 18.7s, 0.3min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TfidfTransformer(), <1326x6256 sparse matrix of type '<class 'numpy.int64'>'
	with 236969 stored elements in Compressed Sparse Row format>, 
1626    1
23      0
880     0
1040    1
950     0
       ..
852     0
473     0
942     0
493     0
261     0
Name: root_label, Length: 1326, dtype: int64, 
None, message_clsname='Pipeline', message=None)
________________________________________________fit_transform_one - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TruncatedSVD(n_components=50, random_state=42), <1326x6256 sparse matrix of type '<class 'numpy.float64'>'
	with 236969 stored elements in Compressed Sparse Row format>, 
1626    1

________________________________________________fit_transform_one - 0.2s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(CountVectorizer(analyzer=<function nolemma at 0x7f8467a871f0>, min_df=5,
                stop_words='english'), 
1626    Coal mining authorities say that one miner is ...
23      © Nikhil Naz Virat Kohli to play ODIs in South...
880     Play Magnus Group to Bring Chess NFTs to Marke...
1040    This holiday season, The Save Movement is urgi...
950     Queens, NY – James Todd Smith — better known a...
                              ...                        
303     Seacrest out, Ted Lasso in. Ryan Seacrest and ...
1012    Citrusw00d Productions unveils films minted as...
267     FILE - Tennessee Titans running back Derrick H...
1763    A swarm of earthquakes has been rattling the o...
1588    A very strong 5.5 earthquake was just recorded...
Name: full_t

________________________________________________fit_transform_one - 4.3s, 0.1min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/42149b1b933c746499970a723b05358b
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/c8133f6fbaafc962b213df4d983e8750
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(NMF(init='random', max_iter=10000, n_components=50, random_state=42), <1326x8044 sparse matrix of type '<class 'numpy.float64'>'
	with 250212 stored elements in Compressed Sparse Row format>, 
1626    1
23      

________________________________________________fit_transform_one - 4.5s, 0.1min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/e17a321ee1988f17060fab9085657c1e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/5413d243e28eb3e17c68abb2c82ee09b
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(NMF(init='random', max_iter=10000, n_components=50, random_state=42), <1325x8984 sparse matrix of type '<class 'numpy.float64'>'
	with 243688 stored elements in Compressed Sparse Row format>, 
186     0
212     

________________________________________________fit_transform_one - 8.2s, 0.1min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/92da76a90960598bff46e14c10126f1c
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/5c5b2e45bc946220c27489515d5f544d
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(NMF(init='random', max_iter=10000, n_components=50, random_state=42), <1326x11269 sparse matrix of type '<class 'numpy.float64'>'
	with 260430 stored elements in Compressed Sparse Row format>, 
1626    1
23     

________________________________________________fit_transform_one - 3.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/853fd1756d56f24955fb21712de048ec
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/cf76d826d382c33eea6803df5b814985
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(NMF(init='random', max_iter=10000, n_components=50, random_state=42), <1326x6256 sparse matrix of type '<class 'numpy.float64'>'
	with 236969 stored elements in Compressed Sparse Row format>, 
1626    1
23      

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/727bf84b2c3b12a18d06ffa1aade3b1b
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/91545f3324650ca34e8537968adead6c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/14f4c2e2cee999dc59ff091dd8dedad0
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/db0d28cd273f801d4606eabc70146ecd
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/be3d7cfd98e0408045f76335b92cd8c1
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/94b6c2f3ea4bbe6d2a195a83e1a378ca
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/727bf84b2c3b12a18d06ffa1aade3b1b
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/91545f3324650ca34e8537968adead6c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/14f4c2e2cee999dc59ff091dd8dedad0
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/db0d28cd273f801d4606eabc70146ecd
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/be3d7cfd98e0408045f76335b92cd8c1
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/94b6c2f3ea4bbe6d2a195a83e1a378ca
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/727bf84b2c3b12a18d06ffa1aade3b1b
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/91545f3324650ca34e8537968adead6c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/14f4c2e2cee999dc59ff091dd8dedad0
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/db0d28cd273f801d4606eabc70146ecd
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/be3d7cfd98e0408045f76335b92cd8c1
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/94b6c2f3ea4bbe6d2a195a83e1a378ca
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpol1xiv2x/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

________________________________________________fit_transform_one - 4.0s, 0.1min


In [27]:
    
print(grid1.best_params_)


{'clf': GaussianNB(), 'reduce_dim': NMF(init='random', max_iter=10000, n_components=50, random_state=42), 'vect': CountVectorizer(analyzer=<function stemmed at 0x7f8461748700>, min_df=5,
                stop_words='english')}


In [25]:
gg1 = pd.DataFrame(grid1.cv_results_)

In [26]:
gg2 = gg1.sort_values(by='mean_test_score',ascending=False)
gg2

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_clf,param_reduce_dim,param_vect,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
43,0.079497,0.003423,2.044578,0.100321,GaussianNB(),"NMF(init='random', max_iter=10000, n_component...",CountVectorizer(analyzer=<function stemmed at ...,"{'clf': GaussianNB(), 'reduce_dim': NMF(init='...",0.960843,0.972892,0.969789,0.963746,0.94864,0.963182,0.008428,1
44,0.089244,0.00224,4.680717,0.213878,GaussianNB(),"NMF(init='random', max_iter=10000, n_component...",CountVectorizer(analyzer=<function lemma at 0x...,"{'clf': GaussianNB(), 'reduce_dim': NMF(init='...",0.954819,0.975904,0.963746,0.954683,0.963746,0.96258,0.007782,2
47,0.093858,0.000992,0.139942,0.006422,GaussianNB(),"NMF(init='random', max_iter=10000, n_component...",CountVectorizer(analyzer=<function nolemma at ...,"{'clf': GaussianNB(), 'reduce_dim': NMF(init='...",0.951807,0.963855,0.969789,0.954683,0.966767,0.96138,0.006962,3
45,0.100483,0.005163,0.140445,0.005268,GaussianNB(),"NMF(init='random', max_iter=10000, n_component...",CountVectorizer(analyzer=<function nolemma at ...,"{'clf': GaussianNB(), 'reduce_dim': NMF(init='...",0.948795,0.966867,0.966767,0.957704,0.960725,0.960172,0.006696,4
46,0.085876,0.001528,4.673411,0.211533,GaussianNB(),"NMF(init='random', max_iter=10000, n_component...",CountVectorizer(analyzer=<function lemma at 0x...,"{'clf': GaussianNB(), 'reduce_dim': NMF(init='...",0.951807,0.96988,0.966767,0.94864,0.963746,0.960168,0.008408,5
16,0.097311,0.001911,4.633917,0.207116,"LogisticRegression(C=10, max_iter=10000, penal...","TruncatedSVD(n_components=50, random_state=42)",CountVectorizer(analyzer=<function lemma at 0x...,"{'clf': LogisticRegression(C=10, max_iter=1000...",0.951807,0.966867,0.969789,0.951662,0.957704,0.959566,0.007536,6
15,0.112727,0.003694,0.089845,0.004033,"LogisticRegression(C=10, max_iter=10000, penal...","TruncatedSVD(n_components=50, random_state=42)",CountVectorizer(analyzer=<function nolemma at ...,"{'clf': LogisticRegression(C=10, max_iter=1000...",0.960843,0.966867,0.966767,0.94864,0.951662,0.958956,0.007573,7
20,0.092085,0.002162,4.683023,0.215639,"LogisticRegression(C=10, max_iter=10000, penal...","NMF(init='random', max_iter=10000, n_component...",CountVectorizer(analyzer=<function lemma at 0x...,"{'clf': LogisticRegression(C=10, max_iter=1000...",0.951807,0.963855,0.960725,0.951662,0.963746,0.958359,0.005525,8
13,0.090981,0.003367,2.005989,0.096134,"LogisticRegression(C=10, max_iter=10000, penal...","TruncatedSVD(n_components=50, random_state=42)",CountVectorizer(analyzer=<function stemmed at ...,"{'clf': LogisticRegression(C=10, max_iter=1000...",0.951807,0.966867,0.966767,0.954683,0.951662,0.958357,0.006991,9
26,0.095378,0.002703,4.638506,0.208964,"LogisticRegression(C=100, max_iter=10000, solv...","TruncatedSVD(n_components=50, random_state=42)",CountVectorizer(analyzer=<function lemma at 0x...,"{'clf': LogisticRegression(C=100, max_iter=100...",0.957831,0.963855,0.963746,0.94864,0.957704,0.958355,0.005557,10


In [None]:
gg2.shape

In [20]:
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.decomposition import TruncatedSVD, NMF

# used to cache results
from tempfile import mkdtemp
from shutil import rmtree
import joblib
from joblib import Memory

# print(__doc__)
location = mkdtemp()
memory = Memory(location=location, verbose=10)

pipeline2 = Pipeline([
    ('vect', vectorizer_df3_lemma),
    ('tfidf', TfidfTransformer()),
    ('reduce_dim', TruncatedSVD(n_components=50, random_state=42)),
    ('clf', SVC(kernel='linear', C=1)),
],
memory=memory
)

param_grid2 = [
    {
        'vect': [vectorizer_df3_stem, 
                 vectorizer_df5_stem, 
                 vectorizer_df3_lemma, 
                 vectorizer_df3_nolemma, 
                 vectorizer_df5_lemma, 
                 vectorizer_df5_nolemma],
        
        'reduce_dim': [TruncatedSVD(n_components=500, random_state=42), 
                       NMF(n_components=500, init='random', random_state=42, max_iter=10000)],
        
        'clf': [SVC(kernel='linear', C=1.0,max_iter=10000), 
                LogisticRegression(penalty='l1', C=10,max_iter=10000, solver='liblinear'), 
                LogisticRegression(penalty='l2', C=100,max_iter=10000, solver = 'liblinear'),
                GaussianNB()]
    }
]
grid2 = GridSearchCV(pipeline2, cv=5, n_jobs=1, param_grid=param_grid2, scoring='accuracy')
grid2.fit(train['full_text'], y_train_encoded.astype(str).astype(int))
rmtree(location)

________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(CountVectorizer(analyzer=<function stemmed at 0x7f8461748700>, min_df=3,
                stop_words='english'), 
186     (CNN) Indian police have arrested three Kashmi...
212     India's Mayank Agarwal , left, talks with Axar...
495     Game recap\n\nOffensive MVP\n\nTanner Morgan, ...
190     Kashmiri students assaulted and bowler Mohamme...
890     Lincoln High School’s marching band was named ...
                              ...                        
303     Seacrest out, Ted Lasso in. Ryan Seacrest and ...
1012    Citrusw00d Productions unveils films minted as...
267     FILE - Tennessee Titans running back Derrick H...
1763    A swarm of earthquakes has been rattling the o...
1588    A very strong 5.5 earthquake was just recorded...
Name: full_text, Length: 1325, dtype: object, 
186     0
212     0
495     0
190     0
890   

________________________________________________fit_transform_one - 8.1s, 0.1min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TfidfTransformer(), <1326x8044 sparse matrix of type '<class 'numpy.int64'>'
	with 250212 stored elements in Compressed Sparse Row format>, 
1626    1
23      0
880     0
1040    1
950     0
       ..
303     0
1012    0
267     0
1763    1
1588    1
Name: root_label, Length: 1326, dtype: int64, 
None, message_clsname='Pipeline', message=None)
________________________________________________fit_transform_one - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TruncatedSVD(n_components=500, random_state=42), <1326x8044 sparse matrix of type '<class 'numpy.float64'>'
	with 250212 stored elements in Compressed Sparse Row format>, 
1626    

________________________________________________fit_transform_one - 2.5s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(CountVectorizer(analyzer=<function stemmed at 0x7f8461748700>, min_df=5,
                stop_words='english'), 
1626    Coal mining authorities say that one miner is ...
23      © Nikhil Naz Virat Kohli to play ODIs in South...
880     Play Magnus Group to Bring Chess NFTs to Marke...
1040    This holiday season, The Save Movement is urgi...
950     Queens, NY – James Todd Smith — better known a...
                              ...                        
303     Seacrest out, Ted Lasso in. Ryan Seacrest and ...
1012    Citrusw00d Productions unveils films minted as...
267     FILE - Tennessee Titans running back Derrick H...
1763    A swarm of earthquakes has been rattling the o...
1588    A very strong 5.5 earthquake was just recorded...
Name: full_t

_______________________________________________fit_transform_one - 18.3s, 0.3min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TfidfTransformer(), <1325x8984 sparse matrix of type '<class 'numpy.int64'>'
	with 243688 stored elements in Compressed Sparse Row format>, 
186     0
212     0
495     0
190     0
890     0
       ..
303     0
1012    0
267     0
1763    1
1588    1
Name: root_label, Length: 1325, dtype: int64, 
None, message_clsname='Pipeline', message=None)
________________________________________________fit_transform_one - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TruncatedSVD(n_components=500, random_state=42), <1325x8984 sparse matrix of type '<class 'numpy.float64'>'
	with 243688 stored elements in Compressed Sparse Row format>, 
186     

________________________________________________fit_transform_one - 2.3s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(CountVectorizer(analyzer=<function lemma at 0x7f8467a87160>, min_df=3,
                stop_words='english'), 
1626    Coal mining authorities say that one miner is ...
23      © Nikhil Naz Virat Kohli to play ODIs in South...
880     Play Magnus Group to Bring Chess NFTs to Marke...
1040    This holiday season, The Save Movement is urgi...
950     Queens, NY – James Todd Smith — better known a...
                              ...                        
852     The team behind the Stockfish chess engine has...
473     Sports Ramapo Icon Drew Gibbs Remembered By NJ...
942     Articles Sorry, there are no recent results fo...
493     LSU wide receiver Trey Palmer (33) heads upfie...
261     Texas finally halted its losing streak at six ...
Name: full_tex

________________________________________________fit_transform_one - 0.5s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TfidfTransformer(), <1326x11269 sparse matrix of type '<class 'numpy.int64'>'
	with 260430 stored elements in Compressed Sparse Row format>, 
1626    1
23      0
880     0
1040    1
950     0
       ..
303     0
1012    0
267     0
1763    1
1588    1
Name: root_label, Length: 1326, dtype: int64, 
None, message_clsname='Pipeline', message=None)
________________________________________________fit_transform_one - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TruncatedSVD(n_components=500, random_state=42), <1326x11269 sparse matrix of type '<class 'numpy.float64'>'
	with 260430 stored elements in Compressed Sparse Row format>, 
1626  

________________________________________________fit_transform_one - 2.4s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(CountVectorizer(analyzer=<function lemma at 0x7f8467a87160>, min_df=5,
                stop_words='english'), 
1626    Coal mining authorities say that one miner is ...
23      © Nikhil Naz Virat Kohli to play ODIs in South...
880     Play Magnus Group to Bring Chess NFTs to Marke...
1040    This holiday season, The Save Movement is urgi...
950     Queens, NY – James Todd Smith — better known a...
                              ...                        
303     Seacrest out, Ted Lasso in. Ryan Seacrest and ...
1012    Citrusw00d Productions unveils films minted as...
267     FILE - Tennessee Titans running back Derrick H...
1763    A swarm of earthquakes has been rattling the o...
1588    A very strong 5.5 earthquake was just recorded...
Name: full_tex

_______________________________________________fit_transform_one - 18.7s, 0.3min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TfidfTransformer(), <1326x6256 sparse matrix of type '<class 'numpy.int64'>'
	with 236969 stored elements in Compressed Sparse Row format>, 
1626    1
23      0
880     0
1040    1
950     0
       ..
852     0
473     0
942     0
493     0
261     0
Name: root_label, Length: 1326, dtype: int64, 
None, message_clsname='Pipeline', message=None)
________________________________________________fit_transform_one - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(TruncatedSVD(n_components=500, random_state=42), <1326x6256 sparse matrix of type '<class 'numpy.float64'>'
	with 236969 stored elements in Compressed Sparse Row format>, 
1626    

________________________________________________fit_transform_one - 2.1s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(CountVectorizer(analyzer=<function nolemma at 0x7f8467a871f0>, min_df=5,
                stop_words='english'), 
1626    Coal mining authorities say that one miner is ...
23      © Nikhil Naz Virat Kohli to play ODIs in South...
880     Play Magnus Group to Bring Chess NFTs to Marke...
1040    This holiday season, The Save Movement is urgi...
950     Queens, NY – James Todd Smith — better known a...
                              ...                        
303     Seacrest out, Ted Lasso in. Ryan Seacrest and ...
1012    Citrusw00d Productions unveils films minted as...
267     FILE - Tennessee Titans running back Derrick H...
1763    A swarm of earthquakes has been rattling the o...
1588    A very strong 5.5 earthquake was just recorded...
Name: full_t

____________________________________________fit_transform_one - 1468.9s, 24.5min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/42149b1b933c746499970a723b05358b
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/c8133f6fbaafc962b213df4d983e8750
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(NMF(init='random', max_iter=10000, n_components=500, random_state=42), <1326x8044 sparse matrix of type '<class 'numpy.float64'>'
	with 250212 stored elements in Compressed Sparse Row format>, 
1626    1
23     

____________________________________________fit_transform_one - 1371.2s, 22.9min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/e17a321ee1988f17060fab9085657c1e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/5413d243e28eb3e17c68abb2c82ee09b
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(NMF(init='random', max_iter=10000, n_components=500, random_state=42), <1325x8984 sparse matrix of type '<class 'numpy.float64'>'
	with 243688 stored elements in Compressed Sparse Row format>, 
186     0
212    

____________________________________________fit_transform_one - 1308.4s, 21.8min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/92da76a90960598bff46e14c10126f1c
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/5c5b2e45bc946220c27489515d5f544d
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(NMF(init='random', max_iter=10000, n_components=500, random_state=42), <1326x11269 sparse matrix of type '<class 'numpy.float64'>'
	with 260430 stored elements in Compressed Sparse Row format>, 
1626    1
23    

_____________________________________________fit_transform_one - 916.5s, 15.3min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/853fd1756d56f24955fb21712de048ec
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/cf76d826d382c33eea6803df5b814985
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
________________________________________________________________________________
[Memory] Calling sklearn.pipeline._fit_transform_one...
_fit_transform_one(NMF(init='random', max_iter=10000, n_components=500, random_state=42), <1326x6256 sparse matrix of type '<class 'numpy.float64'>'
	with 236969 stored elements in Compressed Sparse Row format>, 
1626    1
23     

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/6464edc159e97adb03b9f42038caa8ec
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/500c928ed49659d72c269641748166c2
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/c010445463ac80620c925fe0ce65e6c9
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/6aa2074eb9ead9c6da56135b6faec73a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/57721b8deae79fdbb071dd9adac75aa4
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/ec1bbb20dff562129138b6779c255e61
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/6464edc159e97adb03b9f42038caa8ec
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/500c928ed49659d72c269641748166c2
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/c010445463ac80620c925fe0ce65e6c9
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/6aa2074eb9ead9c6da56135b6faec73a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/57721b8deae79fdbb071dd9adac75aa4
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/ec1bbb20dff562129138b6779c255e61
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/6464edc159e97adb03b9f42038caa8ec
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/500c928ed49659d72c269641748166c2
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/c010445463ac80620c925fe0ce65e6c9
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/59038c04361fc116933e9bbc404c780c
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/d550acc2e6d0cea56e41b1737b7bb05a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/6aa2074eb9ead9c6da56135b6faec73a
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/42149b1b93

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/be80216db77a3c6e91f072aeee51cd8e
___________________________________fit_transform_one cache loaded - 0.1s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/fca2d6e12f5132b8595fd2e76fd61d65
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/57721b8deae79fdbb071dd9adac75aa4
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/8a5de11a47

[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/9a99d4cecb18614fca7d7b2703002d87
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/78bc1ed517a1a5e4701434be6e4bb5ce
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.1s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/ec1bbb20dff562129138b6779c255e61
___________________________________fit_transform_one cache loaded - 0.0s, 0.0min
[Memory]0.0s, 0.0min    : Loading _fit_transform_one from /var/folders/nd/y54n6bcj22s8tqwfp8nvqjkr0000gn/T/tmpeke12o1q/joblib/sklearn/pipeline/_fit_transform_one/81d9fe590b

________________________________________________fit_transform_one - 2.7s, 0.0min


In [22]:
gg3 = pd.DataFrame(grid2.cv_results_)

gg4 = gg3.sort_values(by='mean_test_score',ascending=False)

In [23]:
gg4

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_clf,param_reduce_dim,param_vect,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
29,0.18416,0.002962,0.099025,0.003273,"LogisticRegression(C=100, max_iter=10000, solv...","TruncatedSVD(n_components=500, random_state=42)",CountVectorizer(analyzer=<function nolemma at ...,"{'clf': LogisticRegression(C=100, max_iter=100...",0.972892,0.975904,0.978852,0.957704,0.978852,0.972841,0.007884,1
13,0.14855,0.013074,2.024289,0.093728,"LogisticRegression(C=10, max_iter=10000, penal...","TruncatedSVD(n_components=500, random_state=42)",CountVectorizer(analyzer=<function stemmed at ...,"{'clf': LogisticRegression(C=10, max_iter=1000...",0.957831,0.978916,0.978852,0.966767,0.969789,0.970431,0.007943,2
16,0.159274,0.008236,4.948843,0.189158,"LogisticRegression(C=10, max_iter=10000, penal...","TruncatedSVD(n_components=500, random_state=42)",CountVectorizer(analyzer=<function lemma at 0x...,"{'clf': LogisticRegression(C=10, max_iter=1000...",0.960843,0.981928,0.975831,0.957704,0.97281,0.969823,0.009155,3
27,0.195431,0.005214,0.102562,0.004646,"LogisticRegression(C=100, max_iter=10000, solv...","TruncatedSVD(n_components=500, random_state=42)",CountVectorizer(analyzer=<function nolemma at ...,"{'clf': LogisticRegression(C=100, max_iter=100...",0.966867,0.972892,0.975831,0.954683,0.97281,0.968616,0.007551,4
26,0.179416,0.002615,4.702643,0.207203,"LogisticRegression(C=100, max_iter=10000, solv...","TruncatedSVD(n_components=500, random_state=42)",CountVectorizer(analyzer=<function lemma at 0x...,"{'clf': LogisticRegression(C=100, max_iter=100...",0.966867,0.963855,0.978852,0.960725,0.969789,0.968018,0.006202,5
24,0.174016,0.003785,2.022858,0.09531,"LogisticRegression(C=100, max_iter=10000, solv...","TruncatedSVD(n_components=500, random_state=42)",CountVectorizer(analyzer=<function stemmed at ...,"{'clf': LogisticRegression(C=100, max_iter=100...",0.963855,0.960843,0.978852,0.963746,0.969789,0.967417,0.006415,6
14,0.179823,0.010905,4.71609,0.232438,"LogisticRegression(C=10, max_iter=10000, penal...","TruncatedSVD(n_components=500, random_state=42)",CountVectorizer(analyzer=<function lemma at 0x...,"{'clf': LogisticRegression(C=10, max_iter=1000...",0.960843,0.96988,0.978852,0.954683,0.97281,0.967413,0.008617,7
25,0.168736,0.001078,2.026569,0.094424,"LogisticRegression(C=100, max_iter=10000, solv...","TruncatedSVD(n_components=500, random_state=42)",CountVectorizer(analyzer=<function stemmed at ...,"{'clf': LogisticRegression(C=100, max_iter=100...",0.966867,0.966867,0.975831,0.957704,0.966767,0.966807,0.005732,8
12,0.157422,0.011054,2.026038,0.099564,"LogisticRegression(C=10, max_iter=10000, penal...","TruncatedSVD(n_components=500, random_state=42)",CountVectorizer(analyzer=<function stemmed at ...,"{'clf': LogisticRegression(C=10, max_iter=1000...",0.957831,0.975904,0.978852,0.954683,0.966767,0.966807,0.009543,9
5,2.86302,0.023643,0.109387,0.003465,"SVC(kernel='linear', max_iter=10000)","TruncatedSVD(n_components=500, random_state=42)",CountVectorizer(analyzer=<function nolemma at ...,"{'clf': SVC(kernel='linear', max_iter=10000), ...",0.960843,0.975904,0.969789,0.957704,0.969789,0.966806,0.006618,10


In [28]:
print(grid2.best_params_)

{'clf': LogisticRegression(C=100, max_iter=10000, solver='liblinear'), 'reduce_dim': TruncatedSVD(n_components=500, random_state=42), 'vect': CountVectorizer(analyzer=<function nolemma at 0x7f8467a871f0>, min_df=5,
                stop_words='english')}
