### Analisis de Sentimiento a tweets en Español con el clasificador Support Vector Machine

#### Tweets obtenidos de base de datos con tweets recolectados en español de usuarios con geolocalizacion en Guatemala

### tweets class
* 0 = negativo
* 1 = positivo
* 2 = neutral

### Imports:

In [1]:
import MySQLdb
import pandas as pd
from sklearn.model_selection import train_test_split
import nltk
import numpy as np
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import Pipeline
from sklearn import metrics

import sys
import os
import time

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import svm
from sklearn.metrics import classification_report

### Retrieves data from db:

In [28]:
#Retrieve tweets from db
conn = MySQLdb.connect("13.58.190.139","root","123","tesis" )
data = pd.read_sql("select * from tweets where class is not null", conn)
data_copy = data

### Split data:

In [29]:
#Split label from dataset
y = data_copy["class"]
X = data_copy["text"]

#Split dataset into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [30]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((5958,), (1987,), (5958,), (1987,))

In [20]:
#Create dummy indicators for column label
y = pd.get_dummies(y, columns=["class"], prefix="class")
#Split dataset into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y)


### Import stop words:

In [31]:
# Import spanish stopword
spanish_stopwords = stopwords.words('spanish')
# Spanish stemmer
stemmer = SnowballStemmer('spanish')
analyzer = CountVectorizer(stop_words = spanish_stopwords).build_analyzer()

In [32]:
# Applies stemmer function to text
def customized_analyzer(doc):
    stemmed_doc = []
    for text in doc:
        word_list = ''
        for word in analyzer(text):
            item = str(stemmer.stem(word))
            word_list = word_list + " " + item
        stemmed_doc.append(word_list)
    return stemmed_doc

In [65]:
# Import spanish stopword
spanish_stopwords = stopwords.words('spanish')

vectorizer = CountVectorizer(
                analyzer = 'word',
                lowercase = True,
                ngram_range = (1,3),
                stop_words = spanish_stopwords)

In [66]:
# Bag of Words from training set
X_train_counts = vectorizer.fit_transform((X_train))

In [67]:
# Train classifier with TF-IDF words weigth
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)

In [68]:
X_new_counts = vectorizer.transform((X_test))
X_new_tfidf = tfidf_transformer.transform(X_new_counts)

### Build pipeline for classifier

In [71]:
text_clf = Pipeline([('vect', CountVectorizer()),
                      ('tfidf', TfidfTransformer()),
                      ('clf', SGDClassifier(loss='hinge', penalty='l2',
                                            alpha=1e-3, random_state=42)),
 ])
text_clf.fit(X_train, y_train)  
predicted = text_clf.predict(X_test)
np.mean(predicted == y_test) 

0.65676899849018622

In [72]:
# Print results
print(metrics.classification_report(y_test, predicted, target_names=("Negativo","Positivo","Neutral")))
#metrics.confusion_matrix(y_test, predicted)

             precision    recall  f1-score   support

   Negativo       0.17      0.01      0.01       180
   Positivo       0.44      0.34      0.38       501
    Neutral       0.71      0.87      0.78      1306

avg / total       0.59      0.66      0.61      1987



### Check accuracy for different SVM classifiers

In [86]:
# Create feature vectors
vectorizer = TfidfVectorizer(min_df=5,
                             max_df = 0.8,
                             sublinear_tf=True,stop_words = spanish_stopwords,
                             use_idf=True)

train_vectors = vectorizer.fit_transform(X_train)
test_vectors = vectorizer.transform(X_test)

# Perform classification with SVM, kernel=rbf
classifier_rbf = svm.SVC()
t0 = time.time()
classifier_rbf.fit(X_train_counts, y_train)
t1 = time.time()
prediction_rbf = classifier_rbf.predict(X_new_counts)
t2 = time.time()
time_rbf_train = t1-t0
time_rbf_predict = t2-t1

# Perform classification with SVM, kernel=linear
classifier_linear = svm.SVC(kernel='linear')
t0 = time.time()
classifier_linear.fit(X_train_counts, y_train)
t1 = time.time()
prediction_linear = classifier_linear.predict(X_new_counts)
t2 = time.time()
time_linear_train = t1-t0
time_linear_predict = t2-t1

# Perform classification with SVM, kernel=linear
classifier_liblinear = svm.LinearSVC()
t0 = time.time()
classifier_liblinear.fit(X_train_counts, y_train)
t1 = time.time()
prediction_liblinear = classifier_liblinear.predict(X_new_counts)
t2 = time.time()
time_liblinear_train = t1-t0
time_liblinear_predict = t2-t1

# Print results in a nice table
print("RESULTS FOR SVC(kernel=rbf)")
print("Training time: %fs; Prediction time: %fs" % (time_rbf_train, time_rbf_predict))
print(classification_report(y_test, prediction_rbf, target_names=("Negativo","Positivo","Neutral")))
print("RESULTS FOR SVC(kernel=linear)")
print("Training time: %fs; Prediction time: %fs" % (time_linear_train, time_linear_predict))
print(classification_report(y_test, prediction_linear, target_names=("Negativo","Positivo","Neutral")))
print("RESULTS FOR LinearSVC()")
print("Training time: %fs; Prediction time: %fs" % (time_liblinear_train, time_liblinear_predict))
print(classification_report(y_test, prediction_liblinear, target_names=("Negativo","Positivo","Neutral")))

RESULTS FOR SVC(kernel=rbf)
Training time: 1.030719s; Prediction time: 0.224569s
             precision    recall  f1-score   support

   Negativo       0.00      0.00      0.00       180
   Positivo       0.12      0.00      0.00       501
    Neutral       0.66      1.00      0.79      1306

avg / total       0.46      0.66      0.52      1987

RESULTS FOR SVC(kernel=linear)
Training time: 0.333240s; Prediction time: 0.059819s
             precision    recall  f1-score   support

   Negativo       0.00      0.00      0.00       180
   Positivo       0.00      0.00      0.00       501
    Neutral       0.66      1.00      0.79      1306

avg / total       0.43      0.66      0.52      1987

RESULTS FOR LinearSVC()
Training time: 0.834805s; Prediction time: 0.000213s
             precision    recall  f1-score   support

   Negativo       0.00      0.00      0.00       180
   Positivo       0.00      0.00      0.00       501
    Neutral       0.66      1.00      0.79      1306

avg / to

  'precision', 'predicted', average, warn_for)


In [76]:
# Test Linear model using TF-IDF::

# Perform classification with SVM, kernel=linear
classifier_linear = svm.SVC(kernel='linear')
t0 = time.time()
classifier_linear.fit(X_train_tfidf, y_train)
t1 = time.time()
prediction_linear = classifier_linear.predict(X_new_tfidf)
t2 = time.time()
time_linear_train = t1-t0
time_linear_predict = t2-t1

print("RESULTS FOR SVC(kernel=linear)")
print("Training time: %fs; Prediction time: %fs" % (time_linear_train, time_linear_predict))
print(classification_report(y_test, prediction_linear, target_names=("Negativo","Positivo","Neutral")))

RESULTS FOR SVC(kernel=linear)
Training time: 0.290675s; Prediction time: 0.059952s
             precision    recall  f1-score   support

   Negativo       0.00      0.00      0.00       180
   Positivo       0.00      0.00      0.00       501
    Neutral       0.66      1.00      0.79      1306

avg / total       0.43      0.66      0.52      1987



  'precision', 'predicted', average, warn_for)


In [77]:
# Score del classificador
classifier_linear.score(X_new_tfidf, y_test) #0.69112

0.65727226975339703

In [87]:
print("RESULTS FOR SVC(kernel=rbf)",classifier_rbf.score(X_new_counts, y_test))
print("RESULTS FOR SVC(kernel=linear)",classifier_linear.score(X_new_counts, y_test))
print("RESULTS FOR LinearSVC()",classifier_liblinear.score(X_new_counts, y_test))

RESULTS FOR SVC(kernel=rbf) 0.655762455964
RESULTS FOR SVC(kernel=linear) 0.657272269753
RESULTS FOR LinearSVC() 0.657272269753


### NOTAS:
* El SVM con kernel lineal es el que da mejor accuracy 
* Usar TF-IDF incrementa el accuracy

In [64]:
#Create dummy indicators for column label
#y = pd.get_dummies(y, columns=["class"], prefix="class")
#Split dataset into training and test set
#X_train, X_test, y_train, y_test = train_test_split(X, y)


'''y = label_binarize(y, classes=[0, 1, 2, 3])
clf_SVM = OneVsRestClassifier(LinearSVC())
params = {
      'estimator__C': [0.5, 1.0, 1.5],
      'estimator__tol': [1e-3, 1e-4, 1e-5],
      }
gs = GridSearchCV(clf_SVM, params, cv=5, scoring='roc_auc')
gs.fit(corpus1, y)'''


from sklearn.model_selection import GridSearchCV

pipeline = Pipeline([
    ('vect', vectorizer),
    ('cls', svm.LinearSVC()),
])

parameters = {
    'vect__max_df': (0.5, 1.9),
    'vect__min_df': (10, 20,50),
    'vect__max_features': (500, 1000),
    'vect__ngram_range': ((1, 1), (1, 2)),  # unigrams or bigrams
    'cls__C': (0.2, 0.5, 0.7),
    'cls__loss': ('hinge', 'squared_hinge'),
    'cls__max_iter': (500, 1000)
}


grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1 , scoring='roc_auc')
grid_search.fit(X_train, y_tparsed)

JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/home/sara/anaconda3/lib/python3.6/runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
/home/sara/anaconda3/lib/python3.6/runpy.py in _run_code(code=<code object <module> at 0x7fd78d283150, file "/...3.6/site-packages/ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': '/home/sara/anaconda3/lib/python3.6/site-packages/__pycache__/ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/home/sara/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/home/sara/a.../python3.6/site-packages/ipykernel/kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x7fd78d283150, file "/...3.6/site-packages/ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': '/home/sara/anaconda3/lib/python3.6/site-packages/__pycache__/ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/home/sara/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/home/sara/a.../python3.6/site-packages/ipykernel/kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()
     17 
     18 
     19 
     20 

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    472             return self.subapp.start()
    473         if self.poller is not None:
    474             self.poller.start()
    475         self.kernel.start()
    476         try:
--> 477             ioloop.IOLoop.instance().start()
    478         except KeyboardInterrupt:
    479             pass
    480 
    481 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    172             )
    173         return loop
    174     
    175     def start(self):
    176         try:
--> 177             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    178         except ZMQError as e:
    179             if e.errno == ETERM:
    180                 # quietly return on ETERM
    181                 pass

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    883                 self._events.update(event_pairs)
    884                 while self._events:
    885                     fd, events = self._events.popitem()
    886                     try:
    887                         fd_obj, handler_func = self._handlers[fd]
--> 888                         handler_func(fd_obj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 1
    889                     except (OSError, IOError) as e:
    890                         if errno_from_exception(e) == errno.EPIPE:
    891                             # Happens when the client closes the connection
    892                             pass

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': "#Create dummy indicators for column label\n#y = p...ng='roc_auc')\ngrid_search.fit(X_train, y_tparsed)", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2017, 9, 15, 15, 50, 23, 549288, tzinfo=datetime.timezone.utc), 'msg_id': 'ADB701D99DD643CBA4D8F1EF44F0C121', 'msg_type': 'execute_request', 'session': 'DDC86D33CA6341378925BC7D5C1BD619', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': 'ADB701D99DD643CBA4D8F1EF44F0C121', 'msg_type': 'execute_request', 'parent_header': {}})
    230             self.log.warn("Unknown message type: %r", msg_type)
    231         else:
    232             self.log.debug("%s: %s", msg_type, msg)
    233             self.pre_handler_hook()
    234             try:
--> 235                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'DDC86D33CA6341378925BC7D5C1BD619']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': "#Create dummy indicators for column label\n#y = p...ng='roc_auc')\ngrid_search.fit(X_train, y_tparsed)", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2017, 9, 15, 15, 50, 23, 549288, tzinfo=datetime.timezone.utc), 'msg_id': 'ADB701D99DD643CBA4D8F1EF44F0C121', 'msg_type': 'execute_request', 'session': 'DDC86D33CA6341378925BC7D5C1BD619', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': 'ADB701D99DD643CBA4D8F1EF44F0C121', 'msg_type': 'execute_request', 'parent_header': {}}
    236             except Exception:
    237                 self.log.error("Exception in message handler:", exc_info=True)
    238             finally:
    239                 self.post_handler_hook()

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'DDC86D33CA6341378925BC7D5C1BD619'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': "#Create dummy indicators for column label\n#y = p...ng='roc_auc')\ngrid_search.fit(X_train, y_tparsed)", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2017, 9, 15, 15, 50, 23, 549288, tzinfo=datetime.timezone.utc), 'msg_id': 'ADB701D99DD643CBA4D8F1EF44F0C121', 'msg_type': 'execute_request', 'session': 'DDC86D33CA6341378925BC7D5C1BD619', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': 'ADB701D99DD643CBA4D8F1EF44F0C121', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code="#Create dummy indicators for column label\n#y = p...ng='roc_auc')\ngrid_search.fit(X_train, y_tparsed)", silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    191 
    192         self._forward_input(allow_stdin)
    193 
    194         reply_content = {}
    195         try:
--> 196             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = "#Create dummy indicators for column label\n#y = p...ng='roc_auc')\ngrid_search.fit(X_train, y_tparsed)"
        store_history = True
        silent = False
    197         finally:
    198             self._restore_input()
    199 
    200         if res.error_before_exec is not None:

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=("#Create dummy indicators for column label\n#y = p...ng='roc_auc')\ngrid_search.fit(X_train, y_tparsed)",), **kwargs={'silent': False, 'store_history': True})
    528             )
    529         self.payload_manager.write_payload(payload)
    530 
    531     def run_cell(self, *args, **kwargs):
    532         self._last_traceback = None
--> 533         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ("#Create dummy indicators for column label\n#y = p...ng='roc_auc')\ngrid_search.fit(X_train, y_tparsed)",)
        kwargs = {'silent': False, 'store_history': True}
    534 
    535     def _showtraceback(self, etype, evalue, stb):
    536         # try to preserve ordering of tracebacks and print statements
    537         sys.stdout.flush()

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell="#Create dummy indicators for column label\n#y = p...ng='roc_auc')\ngrid_search.fit(X_train, y_tparsed)", store_history=True, silent=False, shell_futures=True)
   2712                 self.displayhook.exec_result = result
   2713 
   2714                 # Execute the user code
   2715                 interactivity = "none" if silent else self.ast_node_interactivity
   2716                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2717                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2718                 
   2719                 self.last_execution_succeeded = not has_raised
   2720 
   2721                 # Reset this so later displayed values do not modify the

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Expr object>, <_ast.ImportFrom object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Expr object>], cell_name='<ipython-input-64-2c3657452399>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 7fd74b9abba8, executi..._before_exec=None error_in_exec=None result=None>)
   2822                     return True
   2823 
   2824             for i, node in enumerate(to_run_interactive):
   2825                 mod = ast.Interactive([node])
   2826                 code = compiler(mod, cell_name, "single")
-> 2827                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x7fd74b9211e0, file "<ipython-input-64-2c3657452399>", line 36>
        result = <ExecutionResult object at 7fd74b9abba8, executi..._before_exec=None error_in_exec=None result=None>
   2828                     return True
   2829 
   2830             # Flush softspace
   2831             if softspace(sys.stdout, 0):

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x7fd74b9211e0, file "<ipython-input-64-2c3657452399>", line 36>, result=<ExecutionResult object at 7fd74b9abba8, executi..._before_exec=None error_in_exec=None result=None>)
   2876         outflag = 1  # happens in more places, so it's easier as default
   2877         try:
   2878             try:
   2879                 self.hooks.pre_run_code_hook()
   2880                 #rprint('Running code', repr(code_obj)) # dbg
-> 2881                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x7fd74b9211e0, file "<ipython-input-64-2c3657452399>", line 36>
        self.user_global_ns = {'CountVectorizer': <class 'sklearn.feature_extraction.text.CountVectorizer'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'import MySQLdb\nimport pandas as pd\nfrom sklearn....from sklearn.metrics import classification_report', '#Retrieve tweets from db\nconn = MySQLdb.connect(...s is not null limit 3650", conn)\ndata_copy = data', '#Split label from dataset\ny = data_copy["class"]... y_test = train_test_split(X, y, random_state=42)', '# Import spanish stopword\nspanish_stopwords = st...(stop_words = spanish_stopwords).build_analyzer()', '# Applies stemmer function to text\ndef customize...mmed_doc.append(word_list)\n    return stemmed_doc', '# Import spanish stopword\nspanish_stopwords = st...,\n                stop_words = spanish_stopwords)', '# Bag of Words from training set\nX_train_counts = vectorizer.fit_transform((X_train))', '# Train classifier with TF-IDF words weigth\ntfid...= tfidf_transformer.fit_transform(X_train_counts)', 'X_new_counts = vectorizer.transform((X_test))\nX_...tfidf = tfidf_transformer.transform(X_new_counts)', "text_clf = Pipeline([('vect', CountVectorizer())...clf.predict(X_test)\nnp.mean(predicted == y_test) ", '# Print results\nprint(metrics.classification_rep...")))\n#metrics.confusion_matrix(y_test, predicted)', '# Create feature vectors\nvectorizer = TfidfVecto... target_names=("Negativo","Positivo","Neutral")))', '# Test Linear model using TF-IDF::\n\n# Perform cl... target_names=("Negativo","Positivo","Neutral")))', '# Score del classificador\nclassifier_linear.score(X_new_tfidf, y_test)', "pipeline = Pipeline([\n    ('vect', vectorizer),\n...search.fit(customized_analyzer(X_train), y_train)", "pipeline = Pipeline([\n    ('vect', vectorizer),\n...search.fit(customized_analyzer(X_train), y_train)", 'sklearn.model_selection.GridSearchCV\n\npipeline =...search.fit(customized_analyzer(X_train), y_train)', 'import sklearn.model_selection.GridSearchCV\n\npip...search.fit(customized_analyzer(X_train), y_train)', 'from sklearn.model_selection import GridSearchCV...search.fit(customized_analyzer(X_train), y_train)', ...], 'MultinomialNB': <class 'sklearn.naive_bayes.MultinomialNB'>, 'MySQLdb': <module 'MySQLdb' from '/home/sara/anaconda3/lib/python3.6/site-packages/MySQLdb/__init__.py'>, 'Out': {10: 0.6976998904709748, 14: 0.69112814895947428, 21: 'from sklearn.model_selection import GridSearchCV...search.fit(customized_analyzer(X_train), y_train)', 50: 0.6976998904709748, 54: 0.69112814895947428}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'SGDClassifier': <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'>, 'SnowballStemmer': <class 'nltk.stem.snowball.SnowballStemmer'>, 'TfidfTransformer': <class 'sklearn.feature_extraction.text.TfidfTransformer'>, ...}
        self.user_ns = {'CountVectorizer': <class 'sklearn.feature_extraction.text.CountVectorizer'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'import MySQLdb\nimport pandas as pd\nfrom sklearn....from sklearn.metrics import classification_report', '#Retrieve tweets from db\nconn = MySQLdb.connect(...s is not null limit 3650", conn)\ndata_copy = data', '#Split label from dataset\ny = data_copy["class"]... y_test = train_test_split(X, y, random_state=42)', '# Import spanish stopword\nspanish_stopwords = st...(stop_words = spanish_stopwords).build_analyzer()', '# Applies stemmer function to text\ndef customize...mmed_doc.append(word_list)\n    return stemmed_doc', '# Import spanish stopword\nspanish_stopwords = st...,\n                stop_words = spanish_stopwords)', '# Bag of Words from training set\nX_train_counts = vectorizer.fit_transform((X_train))', '# Train classifier with TF-IDF words weigth\ntfid...= tfidf_transformer.fit_transform(X_train_counts)', 'X_new_counts = vectorizer.transform((X_test))\nX_...tfidf = tfidf_transformer.transform(X_new_counts)', "text_clf = Pipeline([('vect', CountVectorizer())...clf.predict(X_test)\nnp.mean(predicted == y_test) ", '# Print results\nprint(metrics.classification_rep...")))\n#metrics.confusion_matrix(y_test, predicted)', '# Create feature vectors\nvectorizer = TfidfVecto... target_names=("Negativo","Positivo","Neutral")))', '# Test Linear model using TF-IDF::\n\n# Perform cl... target_names=("Negativo","Positivo","Neutral")))', '# Score del classificador\nclassifier_linear.score(X_new_tfidf, y_test)', "pipeline = Pipeline([\n    ('vect', vectorizer),\n...search.fit(customized_analyzer(X_train), y_train)", "pipeline = Pipeline([\n    ('vect', vectorizer),\n...search.fit(customized_analyzer(X_train), y_train)", 'sklearn.model_selection.GridSearchCV\n\npipeline =...search.fit(customized_analyzer(X_train), y_train)', 'import sklearn.model_selection.GridSearchCV\n\npip...search.fit(customized_analyzer(X_train), y_train)', 'from sklearn.model_selection import GridSearchCV...search.fit(customized_analyzer(X_train), y_train)', ...], 'MultinomialNB': <class 'sklearn.naive_bayes.MultinomialNB'>, 'MySQLdb': <module 'MySQLdb' from '/home/sara/anaconda3/lib/python3.6/site-packages/MySQLdb/__init__.py'>, 'Out': {10: 0.6976998904709748, 14: 0.69112814895947428, 21: 'from sklearn.model_selection import GridSearchCV...search.fit(customized_analyzer(X_train), y_train)', 50: 0.6976998904709748, 54: 0.69112814895947428}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'SGDClassifier': <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'>, 'SnowballStemmer': <class 'nltk.stem.snowball.SnowballStemmer'>, 'TfidfTransformer': <class 'sklearn.feature_extraction.text.TfidfTransformer'>, ...}
   2882             finally:
   2883                 # Reset our crash handler in place
   2884                 sys.excepthook = old_excepthook
   2885         except SystemExit as e:

...........................................................................
/home/sara/Escritorio/tesis/<ipython-input-64-2c3657452399> in <module>()
     31     'cls__max_iter': (500, 1000)
     32 }
     33 
     34 
     35 grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1 , scoring='roc_auc')
---> 36 grid_search.fit(X_train, y_tparsed)
     37 
     38 
     39 
     40 

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/sklearn/model_selection/_search.py in fit(self=GridSearchCV(cv=None, error_score='raise',
     ..._score=True,
       scoring='roc_auc', verbose=0), X=318                      AT USER si pero no en e...co lunes 
Name: text, Length: 2737, dtype: object, y=      class_0  class_1  class_2
318         0   ...     0        1        0

[2737 rows x 3 columns], groups=None)
    940 
    941         groups : array-like, with shape (n_samples,), optional
    942             Group labels for the samples used while splitting the dataset into
    943             train/test set.
    944         """
--> 945         return self._fit(X, y, groups, ParameterGrid(self.param_grid))
        self._fit = <bound method BaseSearchCV._fit of GridSearchCV(...score=True,
       scoring='roc_auc', verbose=0)>
        X = 318                      AT USER si pero no en e...co lunes 
Name: text, Length: 2737, dtype: object
        y =       class_0  class_1  class_2
318         0   ...     0        1        0

[2737 rows x 3 columns]
        groups = None
        self.param_grid = {'cls__C': (0.2, 0.5, 0.7), 'cls__loss': ('hinge', 'squared_hinge'), 'cls__max_iter': (500, 1000), 'vect__max_df': (0.5, 1.9), 'vect__max_features': (500, 1000), 'vect__min_df': (10, 20, 50), 'vect__ngram_range': ((1, 1), (1, 2))}
    946 
    947 
    948 class RandomizedSearchCV(BaseSearchCV):
    949     """Randomized search on hyper parameters.

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/sklearn/model_selection/_search.py in _fit(self=GridSearchCV(cv=None, error_score='raise',
     ..._score=True,
       scoring='roc_auc', verbose=0), X=318                      AT USER si pero no en e...co lunes 
Name: text, Length: 2737, dtype: object, y=      class_0  class_1  class_2
318         0   ...     0        1        0

[2737 rows x 3 columns], groups=None, parameter_iterable=<sklearn.model_selection._search.ParameterGrid object>)
    559                                   fit_params=self.fit_params,
    560                                   return_train_score=self.return_train_score,
    561                                   return_n_test_samples=True,
    562                                   return_times=True, return_parameters=True,
    563                                   error_score=self.error_score)
--> 564           for parameters in parameter_iterable
        parameters = undefined
        parameter_iterable = <sklearn.model_selection._search.ParameterGrid object>
    565           for train, test in cv_iter)
    566 
    567         # if one choose to see train score, "out" will contain train score info
    568         if self.return_train_score:

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object BaseSearchCV._fit.<locals>.<genexpr>>)
    763             if pre_dispatch == "all" or n_jobs == 1:
    764                 # The iterable was consumed all at once by the above for loop.
    765                 # No need to wait for async callbacks to trigger to
    766                 # consumption.
    767                 self._iterating = False
--> 768             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    769             # Make sure that we get a last message telling us we are done
    770             elapsed_time = time.time() - self._start_time
    771             self._print('Done %3i out of %3i | elapsed: %s finished',
    772                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Fri Sep 15 09:50:23 2017
PID: 21105                    Python 3.6.1: /home/sara/anaconda3/bin/python
...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function _fit_and_score>, (Pipeline(steps=[('vect', TfidfVectorizer(analyze...l2', random_state=None, tol=0.0001, verbose=0))]), 318                      AT USER si pero no en e...co lunes 
Name: text, Length: 2737, dtype: object,       class_0  class_1  class_2
318         0   ...     0        1        0

[2737 rows x 3 columns], make_scorer(roc_auc_score, needs_threshold=True), array([ 913,  914,  915, ..., 2734, 2735, 2736]), array([  0,   1,   2,   3,   4,   5,   6,   7,  ..., 905, 906, 907, 908, 909,
       910, 911, 912]), 0, {'cls__C': 0.2, 'cls__loss': 'hinge', 'cls__max_iter': 500, 'vect__max_df': 0.5, 'vect__max_features': 500, 'vect__min_df': 10, 'vect__ngram_range': (1, 1)}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': True, 'return_times': True, 'return_train_score': True})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (Pipeline(steps=[('vect', TfidfVectorizer(analyze...l2', random_state=None, tol=0.0001, verbose=0))]), 318                      AT USER si pero no en e...co lunes 
Name: text, Length: 2737, dtype: object,       class_0  class_1  class_2
318         0   ...     0        1        0

[2737 rows x 3 columns], make_scorer(roc_auc_score, needs_threshold=True), array([ 913,  914,  915, ..., 2734, 2735, 2736]), array([  0,   1,   2,   3,   4,   5,   6,   7,  ..., 905, 906, 907, 908, 909,
       910, 911, 912]), 0, {'cls__C': 0.2, 'cls__loss': 'hinge', 'cls__max_iter': 500, 'vect__max_df': 0.5, 'vect__max_features': 500, 'vect__min_df': 10, 'vect__ngram_range': (1, 1)})
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': True, 'return_times': True, 'return_train_score': True}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator=Pipeline(steps=[('vect', TfidfVectorizer(analyze...l2', random_state=None, tol=0.0001, verbose=0))]), X=318                      AT USER si pero no en e...co lunes 
Name: text, Length: 2737, dtype: object, y=      class_0  class_1  class_2
318         0   ...     0        1        0

[2737 rows x 3 columns], scorer=make_scorer(roc_auc_score, needs_threshold=True), train=array([ 913,  914,  915, ..., 2734, 2735, 2736]), test=array([  0,   1,   2,   3,   4,   5,   6,   7,  ..., 905, 906, 907, 908, 909,
       910, 911, 912]), verbose=0, parameters={'cls__C': 0.2, 'cls__loss': 'hinge', 'cls__max_iter': 500, 'vect__max_df': 0.5, 'vect__max_features': 500, 'vect__min_df': 10, 'vect__ngram_range': (1, 1)}, fit_params={}, return_train_score=True, return_parameters=True, return_n_test_samples=True, return_times=True, error_score='raise')
    233 
    234     try:
    235         if y_train is None:
    236             estimator.fit(X_train, **fit_params)
    237         else:
--> 238             estimator.fit(X_train, y_train, **fit_params)
        estimator.fit = <bound method Pipeline.fit of Pipeline(steps=[('...2', random_state=None, tol=0.0001, verbose=0))])>
        X_train = 2202    hoy los hermanos de la iglesia se antici...co lunes 
Name: text, Length: 1824, dtype: object
        y_train =       class_0  class_1  class_2
2202        0   ...     0        1        0

[1824 rows x 3 columns]
        fit_params = {}
    239 
    240     except Exception as e:
    241         # Note fit time as time until error
    242         fit_time = time.time() - start_time

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/sklearn/pipeline.py in fit(self=Pipeline(steps=[('vect', TfidfVectorizer(analyze...l2', random_state=None, tol=0.0001, verbose=0))]), X=2202    hoy los hermanos de la iglesia se antici...co lunes 
Name: text, Length: 1824, dtype: object, y=      class_0  class_1  class_2
2202        0   ...     0        1        0

[1824 rows x 3 columns], **fit_params={})
    265         self : Pipeline
    266             This estimator
    267         """
    268         Xt, fit_params = self._fit(X, y, **fit_params)
    269         if self._final_estimator is not None:
--> 270             self._final_estimator.fit(Xt, y, **fit_params)
        self._final_estimator.fit = <bound method LinearSVC.fit of LinearSVC(C=0.2, ...='l2', random_state=None, tol=0.0001, verbose=0)>
        Xt = <1824x160 sparse matrix of type '<class 'numpy.f... stored elements in Compressed Sparse Row format>
        y =       class_0  class_1  class_2
2202        0   ...     0        1        0

[1824 rows x 3 columns]
        fit_params = {}
    271         return self
    272 
    273     def fit_transform(self, X, y=None, **fit_params):
    274         """Fit the model and transform with the final estimator

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/sklearn/svm/classes.py in fit(self=LinearSVC(C=0.2, class_weight=None, dual=True, f...y='l2', random_state=None, tol=0.0001, verbose=0), X=<1824x160 sparse matrix of type '<class 'numpy.f... stored elements in Compressed Sparse Row format>, y=      class_0  class_1  class_2
2202        0   ...     0        1        0

[1824 rows x 3 columns], sample_weight=None)
    202         if self.C < 0:
    203             raise ValueError("Penalty term must be positive; got (C=%r)"
    204                              % self.C)
    205 
    206         X, y = check_X_y(X, y, accept_sparse='csr',
--> 207                          dtype=np.float64, order="C")
    208         check_classification_targets(y)
    209         self.classes_ = np.unique(y)
    210 
    211         self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py in check_X_y(X=<1824x160 sparse matrix of type '<class 'numpy.f... stored elements in Compressed Sparse Row format>, y=      class_0  class_1  class_2
2202        0   ...     0        1        0

[1824 rows x 3 columns], accept_sparse='csr', dtype=<class 'numpy.float64'>, order='C', copy=False, force_all_finite=True, ensure_2d=True, allow_nd=False, multi_output=False, ensure_min_samples=1, ensure_min_features=1, y_numeric=False, warn_on_dtype=False, estimator=None)
    521                     ensure_min_features, warn_on_dtype, estimator)
    522     if multi_output:
    523         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,
    524                         dtype=None)
    525     else:
--> 526         y = column_or_1d(y, warn=True)
        y =       class_0  class_1  class_2
2202        0   ...     0        1        0

[1824 rows x 3 columns]
    527         _assert_all_finite(y)
    528     if y_numeric and y.dtype.kind == 'O':
    529         y = y.astype(np.float64)
    530 

...........................................................................
/home/sara/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py in column_or_1d(y=      class_0  class_1  class_2
2202        0   ...     0        1        0

[1824 rows x 3 columns], warn=True)
    557                           " expected. Please change the shape of y to "
    558                           "(n_samples, ), for example using ravel().",
    559                           _DataConversionWarning, stacklevel=2)
    560         return np.ravel(y)
    561 
--> 562     raise ValueError("bad input shape {0}".format(shape))
        shape = (1824, 3)
    563 
    564 
    565 def check_random_state(seed):
    566     """Turn seed into a np.random.RandomState instance

ValueError: bad input shape (1824, 3)
___________________________________________________________________________

In [63]:
y_tparsed = pd.get_dummies(y_train, columns=["class"], prefix="class")

In [66]:
y_tparsed.head()

Unnamed: 0,class_0,class_1,class_2
318,0,0,1
816,0,0,1
2636,0,0,1
59,0,1,0
3257,0,0,1
