In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import logging

logging.getLogger('jieba').setLevel(logging.WARN)
logging.getLogger('fgclassifier').setLevel(logging.INFO)

In [2]:
import os

os.chdir('..')

In [5]:
from fgclassifier.utils import read_data, get_dataset

X_train, y_train = read_data(get_dataset('train_en'), flavor=None, sample_n=1000)

2018-12-03 15:16:52,269 [INFO] Take 1000 samples with random state 1


In [7]:
np.linspace(0.01, 0.1, 10)

array([0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ])

In [4]:
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.externals import joblib

from fgclassifier.baseline import Baseline, MultiOutputClassifier
from fgclassifier.features import Count, Tfidf, SVD, SparseToDense
from fgclassifier.classifiers import LinearDiscriminantAnalysis

pipeline = Pipeline([
    ('vect', Count()),
    ('tfidf', Tfidf()),
    ('svd', SVD(n_components=1000)),
], memory='data/pipeline_cache')

Xt = pipeline.fit_transform(X_train)

clf = MultiOutputClassifier(LinearDiscriminantAnalysis())
parameters = {
    'estimator': [
        LinearDiscriminantAnalysis(n_components=n)
        for n in [100, 200, 300, 400, 500, None]
    ]
}

with joblib.parallel_backend('threading', n_jobs=1):
    grid_search = GridSearchCV(clf, parameters, cv=4, verbose=True)
    grid_search.fit(Xt, y_train)

2018-12-03 15:11:30,164 [INFO] Fit & Transform CountVectorizer...
2018-12-03 15:11:30,535 [INFO] Vocab Size: 8832
If this happens often in your code, it can cause performance problems 
(results will be correct in all cases). 
The reason for this is probably some large input arguments for a wrapped
 function (e.g. large strings).
THIS IS A JOBLIB ISSUE. If you can, kindly provide the joblib's team with an
 example so that they can fix the problem.
  **fit_params_steps[name])
2018-12-03 15:11:34,229 [INFO] Fit & Transform TF-IDF...
2018-12-03 15:11:34,264 [INFO] Fit & Transform TruncatedSVD...


Fitting 4 folds for each of 4 candidates, totalling 16 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
2018-12-03 15:12:02,085 [INFO] [Validate]: F1 Scores
  'precision', 'predicted', average, warn_for)
2018-12-03 15:12:02,093 [INFO]   location_traffic_convenience            	0.2204
2018-12-03 15:12:02,100 [INFO]   location_distance_from_business_district	0.2271
2018-12-03 15:12:02,103 [INFO]   location_easy_to_find                   	0.2126
2018-12-03 15:12:02,114 [INFO]   service_wait_time                       	0.2329
2018-12-03 15:12:02,140 [INFO]   service_waiters_attitude                	0.2508
2018-12-03 15:12:02,142 [INFO]   service_parking_convenience             	0.2401
2018-12-03 15:12:02,184 [INFO]   service_serving_speed                   	0.2300
2018-12-03 15:12:02,196 [INFO]   price_level                             	0.1711
2018-12-03 15:12:02,200 [INFO]   price_cost_effective                    	0.2271
2018-12-03 15:12:02,205 [INFO]   price_discount                          	0.2010
2018-12-0

2018-12-03 15:12:23,014 [INFO] [Validate]: F1 Scores
  'precision', 'predicted', average, warn_for)
2018-12-03 15:12:23,036 [INFO]   location_traffic_convenience            	0.2222
2018-12-03 15:12:23,046 [INFO]   location_distance_from_business_district	0.2166
2018-12-03 15:12:23,082 [INFO]   location_easy_to_find                   	0.2086
2018-12-03 15:12:23,087 [INFO]   service_wait_time                       	0.2289
2018-12-03 15:12:23,090 [INFO]   service_waiters_attitude                	0.2666
2018-12-03 15:12:23,092 [INFO]   service_parking_convenience             	0.2433
2018-12-03 15:12:23,095 [INFO]   service_serving_speed                   	0.2323
2018-12-03 15:12:23,105 [INFO]   price_level                             	0.1770
2018-12-03 15:12:23,109 [INFO]   price_cost_effective                    	0.2185
2018-12-03 15:12:23,113 [INFO]   price_discount                          	0.1851
2018-12-03 15:12:23,140 [INFO]   environment_decoration                  	0.2138
2018-12-0

2018-12-03 15:12:40,988 [INFO] [Validate]: F1 Scores
  'precision', 'predicted', average, warn_for)
2018-12-03 15:12:40,991 [INFO]   location_traffic_convenience            	0.2133
2018-12-03 15:12:40,996 [INFO]   location_distance_from_business_district	0.2222
2018-12-03 15:12:41,002 [INFO]   location_easy_to_find                   	0.2197
2018-12-03 15:12:41,006 [INFO]   service_wait_time                       	0.2306
2018-12-03 15:12:41,008 [INFO]   service_waiters_attitude                	0.2622
2018-12-03 15:12:41,011 [INFO]   service_parking_convenience             	0.2407
2018-12-03 15:12:41,013 [INFO]   service_serving_speed                   	0.2265
2018-12-03 15:12:41,016 [INFO]   price_level                             	0.1684
2018-12-03 15:12:41,018 [INFO]   price_cost_effective                    	0.2191
2018-12-03 15:12:41,021 [INFO]   price_discount                          	0.1883
2018-12-03 15:12:41,024 [INFO]   environment_decoration                  	0.1770
2018-12-0

2018-12-03 15:12:58,148 [INFO] [Validate]: F1 Scores
  'precision', 'predicted', average, warn_for)
2018-12-03 15:12:58,151 [INFO]   location_traffic_convenience            	0.2153
2018-12-03 15:12:58,153 [INFO]   location_distance_from_business_district	0.2159
2018-12-03 15:12:58,156 [INFO]   location_easy_to_find                   	0.2140
2018-12-03 15:12:58,159 [INFO]   service_wait_time                       	0.2357
2018-12-03 15:12:58,161 [INFO]   service_waiters_attitude                	0.2592
2018-12-03 15:12:58,163 [INFO]   service_parking_convenience             	0.2428
2018-12-03 15:12:58,166 [INFO]   service_serving_speed                   	0.2271
2018-12-03 15:12:58,168 [INFO]   price_level                             	0.1667
2018-12-03 15:12:58,171 [INFO]   price_cost_effective                    	0.2066
2018-12-03 15:12:58,173 [INFO]   price_discount                          	0.1898
2018-12-03 15:12:58,177 [INFO]   environment_decoration                  	0.1803
2018-12-0

2018-12-03 15:13:16,414 [INFO] [Validate]: F1 Scores
  'precision', 'predicted', average, warn_for)
2018-12-03 15:13:16,419 [INFO]   location_traffic_convenience            	0.2204
2018-12-03 15:13:16,424 [INFO]   location_distance_from_business_district	0.2271
2018-12-03 15:13:16,431 [INFO]   location_easy_to_find                   	0.2126
2018-12-03 15:13:16,436 [INFO]   service_wait_time                       	0.2329
2018-12-03 15:13:16,440 [INFO]   service_waiters_attitude                	0.2508
2018-12-03 15:13:16,446 [INFO]   service_parking_convenience             	0.2401
2018-12-03 15:13:16,450 [INFO]   service_serving_speed                   	0.2300
2018-12-03 15:13:16,454 [INFO]   price_level                             	0.1711
2018-12-03 15:13:16,461 [INFO]   price_cost_effective                    	0.2271
2018-12-03 15:13:16,464 [INFO]   price_discount                          	0.2010
2018-12-03 15:13:16,467 [INFO]   environment_decoration                  	0.1631
2018-12-0

2018-12-03 15:13:36,757 [INFO]   service_waiters_attitude                	0.2666
2018-12-03 15:13:36,769 [INFO]   service_parking_convenience             	0.2433
2018-12-03 15:13:36,773 [INFO]   service_serving_speed                   	0.2323
2018-12-03 15:13:36,783 [INFO]   price_level                             	0.1770
2018-12-03 15:13:36,788 [INFO]   price_cost_effective                    	0.2185
2018-12-03 15:13:36,791 [INFO]   price_discount                          	0.1851
2018-12-03 15:13:36,796 [INFO]   environment_decoration                  	0.2138
2018-12-03 15:13:36,803 [INFO]   environment_noise                       	0.2038
2018-12-03 15:13:36,806 [INFO]   environment_space                       	0.1851
2018-12-03 15:13:36,813 [INFO]   environment_cleaness                    	0.1851
2018-12-03 15:13:36,816 [INFO]   dish_portion                            	0.1753
2018-12-03 15:13:36,820 [INFO]   dish_taste                              	0.1667
2018-12-03 15:13:36,825 [INF

2018-12-03 15:13:56,269 [INFO]   dish_portion                            	0.1753
2018-12-03 15:13:56,278 [INFO]   dish_taste                              	0.1667
2018-12-03 15:13:56,281 [INFO]   dish_look                               	0.2086
2018-12-03 15:13:56,287 [INFO]   dish_recommendation                     	0.2259
2018-12-03 15:13:56,291 [INFO]   others_overall_experience               	0.2002
2018-12-03 15:13:56,293 [INFO]   others_willing_to_consume_again         	0.1959
2018-12-03 15:13:56,423 [INFO] [Validate]: F1 Scores
  'precision', 'predicted', average, warn_for)
2018-12-03 15:13:56,439 [INFO]   location_traffic_convenience            	0.2193
2018-12-03 15:13:56,450 [INFO]   location_distance_from_business_district	0.2199
2018-12-03 15:13:56,457 [INFO]   location_easy_to_find                   	0.2118
2018-12-03 15:13:56,460 [INFO]   service_wait_time                       	0.2325
2018-12-03 15:13:56,463 [INFO]   service_waiters_attitude                	0.2418
2018-12-0

2018-12-03 15:14:16,519 [INFO]   location_traffic_convenience            	0.2641
2018-12-03 15:14:16,524 [INFO]   location_distance_from_business_district	0.2220
2018-12-03 15:14:16,531 [INFO]   location_easy_to_find                   	0.2137
2018-12-03 15:14:16,536 [INFO]   service_wait_time                       	0.2308
2018-12-03 15:14:16,542 [INFO]   service_waiters_attitude                	0.2458
2018-12-03 15:14:16,546 [INFO]   service_parking_convenience             	0.2414
2018-12-03 15:14:16,548 [INFO]   service_serving_speed                   	0.2296
2018-12-03 15:14:16,554 [INFO]   price_level                             	0.1722
2018-12-03 15:14:16,557 [INFO]   price_cost_effective                    	0.2216
2018-12-03 15:14:16,562 [INFO]   price_discount                          	0.1916
2018-12-03 15:14:16,568 [INFO]   environment_decoration                  	0.1840
2018-12-03 15:14:16,570 [INFO]   environment_noise                       	0.2059
2018-12-03 15:14:16,573 [INF

2018-12-03 15:14:41,317 [INFO]   environment_decoration                  	0.1728
2018-12-03 15:14:41,321 [INFO]   environment_noise                       	0.2073
2018-12-03 15:14:41,325 [INFO]   environment_space                       	0.1934
2018-12-03 15:14:41,329 [INFO]   environment_cleaness                    	0.1954
2018-12-03 15:14:41,336 [INFO]   dish_portion                            	0.1756
2018-12-03 15:14:41,342 [INFO]   dish_taste                              	0.1745
2018-12-03 15:14:41,351 [INFO]   dish_look                               	0.2086
2018-12-03 15:14:41,354 [INFO]   dish_recommendation                     	0.2232
2018-12-03 15:14:41,358 [INFO]   others_overall_experience               	0.2033
2018-12-03 15:14:41,362 [INFO]   others_willing_to_consume_again         	0.1964


KeyboardInterrupt: 

Process ForkPoolWorker-6:
Process ForkPoolWorker-7:
Process ForkPoolWorker-8:
Process ForkPoolWorker-5:
Traceback (most recent call last):
  File "/Users/jesse/anaconda3/envs/idp/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/jesse/anaconda3/envs/idp/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/jesse/anaconda3/envs/idp/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/jesse/anaconda3/envs/idp/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/jesse/anaconda3/envs/idp/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/jesse/anaconda3/envs/idp/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._