In [30]:
import pandas as pd
from spacy.lang.id import Indonesian
from spacy.lang.id.stop_words import STOP_WORDS
import string

In [31]:
# ML Packages
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.metrics import accuracy_score 
from sklearn.base import TransformerMixin 
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC, SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.ensemble import VotingClassifier
from sklearn import model_selection

In [32]:
nlp = Indonesian()

In [33]:
df = pd.read_excel("News-Articles-Dataset.xls", sheet_name = "berita", na_values = ' ')
df.head(10)

Unnamed: 0,ID,Articles,Tagging
0,1,"Jakarta, Di jejaring sosial, banyak beredar in...",valid
1,2,Isu bahwa ikan lele mengandung sel kanker di j...,valid
2,3,Bagi penikmat kuliner dengan bahan dasar ikan ...,valid
3,4,Ikan lele merupakan salah satu makanan favorit...,valid
4,5,Ikan lele merupakan bahan makanan yang cukup p...,valid
5,6,"SURABAYA, KOMPAS.com - ""Dalam sesuap daging ik...",hoax
6,7,Bahaya Mengkonsumsi Ikan Lele Yang Mengandung ...,hoax
7,8,"Di jejaring sosial, banyak beredar informasi y...",hoax
8,9,"Jakarta, Sebuah artikel yang cukup viral di in...",valid
9,10,Pada dasarnya tidak ada makanan yang membawa s...,valid


In [34]:
#Replace tagging with numeric value
# 1 for valid article 
# 0 for hoax article
df.Tagging.replace(['valid', 'hoax'], [1, 0], inplace= True)
df.head(10)

Unnamed: 0,ID,Articles,Tagging
0,1,"Jakarta, Di jejaring sosial, banyak beredar in...",1
1,2,Isu bahwa ikan lele mengandung sel kanker di j...,1
2,3,Bagi penikmat kuliner dengan bahan dasar ikan ...,1
3,4,Ikan lele merupakan salah satu makanan favorit...,1
4,5,Ikan lele merupakan bahan makanan yang cukup p...,1
5,6,"SURABAYA, KOMPAS.com - ""Dalam sesuap daging ik...",0
6,7,Bahaya Mengkonsumsi Ikan Lele Yang Mengandung ...,0
7,8,"Di jejaring sosial, banyak beredar informasi y...",0
8,9,"Jakarta, Sebuah artikel yang cukup viral di in...",1
9,10,Pada dasarnya tidak ada makanan yang membawa s...,1


In [35]:
stopwords = list(STOP_WORDS)
stopwords

['sebutlah',
 'seluruhnya',
 'jangankan',
 'datang',
 'sela',
 'sebagainya',
 'waktunya',
 'seketika',
 'tiga',
 'mendapat',
 'ataupun',
 'biasa',
 'seorang',
 'khususnya',
 'sepertinya',
 'bermacam',
 'toh',
 'manalagi',
 'tuturnya',
 'bersiap',
 'dilihat',
 'mempersiapkan',
 'bakalan',
 'bawah',
 'dari',
 'mereka',
 'jadi',
 'meminta',
 'bermaksud',
 'janganlah',
 'berapa',
 'jumlah',
 'bermula',
 'sebutnya',
 'dimintai',
 'siapakah',
 'bertanya-tanya',
 'sendirian',
 'pada',
 'sejak',
 'siap',
 'sekaligus',
 'juga',
 'mengibaratkannya',
 'tandas',
 'malah',
 'selaku',
 'tersampaikan',
 'depan',
 'semaunya',
 'tidaklah',
 'mendatang',
 'segala',
 'tampak',
 'kepada',
 'cuma',
 'dimulailah',
 'semata',
 'sebelumnya',
 'ditunjuk',
 'masing',
 'menunjuknya',
 'bermacam-macam',
 'jawaban',
 'itulah',
 'perlunya',
 'kemudian',
 'kala',
 'setiap',
 'sejumlah',
 'selama',
 'sempat',
 'kamulah',
 'kalaupun',
 'kitalah',
 'berupa',
 'benarlah',
 'rupanya',
 'pertanyakan',
 'daripada',
 'sedik

In [36]:
sampleArticle = df['Articles'].values[0]
sampleArticle


'Jakarta, Di jejaring sosial, banyak beredar informasi yang menyebut lele sebagai ikan paling jorok. Dalam sesuap daging ikan lele, terkandung 3000 sel kanker. Benarkah?\nJulukan sebagai ikan paling jorok merujuk pada sifat lele yang doyan mengonsumsi segala jenis limbah di perairan. Bahkan sebuah artikel yang cukup viral di internet menyebutkan kotoran manusia juga dijadikan pakan pada sebuah budidaya lele di Kota Haikou, China.\nSementara itu di habitat aslinya, lele atau catfish juga dikenal sebagai spesies ikan yang sangat tangguh. Ikan ini dilengkapi alat pernapasan tambahan berupa labirin, sehingga mampu bertahan hidup dalam kondisi perairan berlumpur atau bahkan tercemar. Agaknya, fakta inilah yang memunculkan dugaan soal akumulasi racun karsinogen (penyebab kanker) di tubuh ikan lele.\nUntungnya, ikan lele yang beredar di pasaran bukan berasal dari alam liar. Lele banyak dibudidayakan di kolam-kolam, yang mestinya bisa dikendalikan agar bebas dari pencemaran. Pakan yang diberik

In [37]:
#Lemmatization
docx = nlp(sampleArticle)
for word in docx: 
    print(word.text,"Lemma =>",word.lemma_)


Jakarta Lemma => Jakarta
, Lemma => ,
Di Lemma => Di
jejaring Lemma => jejaring
sosial Lemma => sosial
, Lemma => ,
banyak Lemma => banyak
beredar Lemma => edar
informasi Lemma => informasi
yang Lemma => yang
menyebut Lemma => sebut
lele Lemma => lele
sebagai Lemma => bagai
ikan Lemma => ikan
paling Lemma => paling
jorok Lemma => jorok
. Lemma => .
Dalam Lemma => Dalam
sesuap Lemma => suap
daging Lemma => daging
ikan Lemma => ikan
lele Lemma => lele
, Lemma => ,
terkandung Lemma => kandung
3000 Lemma => 3000
sel Lemma => sel
kanker Lemma => kanker
. Lemma => .
Benarkah Lemma => Benarkah
? Lemma => ?

 Lemma => 

Julukan Lemma => Julukan
sebagai Lemma => bagai
ikan Lemma => ikan
paling Lemma => paling
jorok Lemma => jorok
merujuk Lemma => rujuk
pada Lemma => pada
sifat Lemma => sifat
lele Lemma => lele
yang Lemma => yang
doyan Lemma => doyan
mengonsumsi Lemma => konsumsi
segala Lemma => segala
jenis Lemma => jenis
limbah Lemma => limbah
di Lemma => di
perairan Lemma => air
. Lemma => .


In [38]:
# Filtering out Stopwords and Punctuations
for word in docx:
    if word.is_stop == False and not word.is_punct:
        if word.is_stop != True and not word.is_punct:
            print(word)

Jakarta
Di
jejaring
sosial
beredar
informasi
menyebut
lele
ikan
jorok
Dalam
sesuap
daging
ikan
lele
terkandung
3000
sel
kanker
Benarkah


Julukan
ikan
jorok
merujuk
sifat
lele
doyan
mengonsumsi
jenis
limbah
perairan
Bahkan
artikel
viral
internet
kotoran
manusia
dijadikan
pakan
budidaya
lele
Kota
Haikou
China


Sementara
habitat
aslinya
lele
catfish
dikenal
spesies
ikan
tangguh
Ikan
dilengkapi
alat
pernapasan
tambahan
labirin
bertahan
hidup
kondisi
perairan
berlumpur
tercemar
Agaknya
fakta
memunculkan
dugaan
akumulasi
racun
karsinogen
penyebab
kanker
tubuh
ikan
lele


Untungnya
ikan
lele
beredar
pasaran
berasal
alam
liar
Lele
dibudidayakan
kolam
kolam
mestinya
dikendalikan
bebas
pencemaran
Pakan
dipilih
mengandalkan
limbah


Yang
popularitas
ikan
bersungut
pudar
meningkat
Data
Kementerian
Kelautan
Perikanan
KKP
menyebut
produksi
lele
2013
mencapai
543,461
ton
meningkat
441,217
ton
2012
337,577
ton
2011


Konsumsi
ikan
lele
Badan
Pusat
Statistik
BPS
tercatat
29,98
kg
kapita
22,58
kg
kapi

In [39]:
# Stop words and Punctuation In List Comprehension
[ word for word in docx if word.is_stop == False and not word.is_punct ]

[Jakarta,
 Di,
 jejaring,
 sosial,
 beredar,
 informasi,
 menyebut,
 lele,
 ikan,
 jorok,
 Dalam,
 sesuap,
 daging,
 ikan,
 lele,
 terkandung,
 3000,
 sel,
 kanker,
 Benarkah,
 ,
 Julukan,
 ikan,
 jorok,
 merujuk,
 sifat,
 lele,
 doyan,
 mengonsumsi,
 jenis,
 limbah,
 perairan,
 Bahkan,
 artikel,
 viral,
 internet,
 kotoran,
 manusia,
 dijadikan,
 pakan,
 budidaya,
 lele,
 Kota,
 Haikou,
 China,
 ,
 Sementara,
 habitat,
 aslinya,
 lele,
 catfish,
 dikenal,
 spesies,
 ikan,
 tangguh,
 Ikan,
 dilengkapi,
 alat,
 pernapasan,
 tambahan,
 labirin,
 bertahan,
 hidup,
 kondisi,
 perairan,
 berlumpur,
 tercemar,
 Agaknya,
 fakta,
 memunculkan,
 dugaan,
 akumulasi,
 racun,
 karsinogen,
 penyebab,
 kanker,
 tubuh,
 ikan,
 lele,
 ,
 Untungnya,
 ikan,
 lele,
 beredar,
 pasaran,
 berasal,
 alam,
 liar,
 Lele,
 dibudidayakan,
 kolam,
 kolam,
 mestinya,
 dikendalikan,
 bebas,
 pencemaran,
 Pakan,
 dipilih,
 mengandalkan,
 limbah,
 ,
 Yang,
 popularitas,
 ikan,
 bersungut,
 pudar,
 meningkat,
 Data,
 

In [11]:
# Use the punctuations of string module
punctuations = string.punctuation
# Creating a Spacy Parser
parser = Indonesian()


In [12]:
def tokenizer(sentence):
    mytokens = parser(sentence)
    mytokens = [ word.lemma_.lower().strip() if word.lemma_ != "-PRON-" else word.lower_ for word in mytokens ]
    mytokens = [ word for word in mytokens if word not in stopwords and word not in punctuations ]
    return mytokens

In [40]:
tokenizer(sampleArticle)

['jakarta',
 'jejaring',
 'sosial',
 'edar',
 'informasi',
 'lele',
 'ikan',
 'jorok',
 'suap',
 'daging',
 'ikan',
 'lele',
 'kandung',
 '3000',
 'sel',
 'kanker',
 'julukan',
 'ikan',
 'jorok',
 'rujuk',
 'sifat',
 'lele',
 'doyan',
 'konsumsi',
 'jenis',
 'limbah',
 'air',
 'artikel',
 'viral',
 'internet',
 'kotor',
 'manusia',
 'dijadikan',
 'pakan',
 'budidaya',
 'lele',
 'kota',
 'haikou',
 'china',
 'habitat',
 'aslinya',
 'lele',
 'catfish',
 'dikenal',
 'spesies',
 'ikan',
 'tangguh',
 'ikan',
 'dilengkapi',
 'alat',
 'napas',
 'rupa',
 'labirin',
 'tahan',
 'hidup',
 'kondisi',
 'air',
 'lumpur',
 'cemar',
 'fakta',
 'muncul',
 'duga',
 'akumulasi',
 'racun',
 'karsinogen',
 'kanker',
 'tubuh',
 'ikan',
 'lele',
 'untungnya',
 'ikan',
 'lele',
 'edar',
 'pasar',
 'alam',
 'liar',
 'lele',
 'dibudidayakan',
 'kolam',
 'kolam',
 'mestinya',
 'dikendalikan',
 'bebas',
 'cemar',
 'pakan',
 'dipilih',
 'andal',
 'limbah',
 'popularitas',
 'ikan',
 'sungut',
 'pudar',
 'tingkat',


In [41]:
# Custom transformer
class predictors(TransformerMixin):
    def transform(self, X, **transform_params):
        return [clean_text(text) for text in X]
    def fit(self, X, y=None, **fit_params):
        return self
    def get_params(self, deep=True):
        return {}
class DenseTransformer(TransformerMixin):

    def transform(self, X, y=None, **fit_params):
        return X.todense()

    def fit_transform(self, X, y=None, **fit_params):
        self.fit(X, y, **fit_params)
        return self.transform(X)

    def fit(self, X, y=None, **fit_params):
        return self
    
    def get_params(self, deep=True):
        return {}    

# Basic function to clean the text 
def clean_text(text):     
    return text.strip().lower()

In [42]:
# Vectorization
vectorizer = CountVectorizer(tokenizer = tokenizer, ngram_range=(1,1)) 
classifier = LinearSVC()
nbClassifier = GaussianNB()
mNbClassifier = MultinomialNB()
svmClassifier = svm.SVC()
knnClassifier = KNeighborsClassifier(n_neighbors=3)

votingClassifier = VotingClassifier(estimators=[('KNN', knnClassifier),
                                                ('NaiveBayes', mNbClassifier), 
                                                ('SVM', svmClassifier)])

In [43]:
# Using Tfidf
tfvectorizer = TfidfVectorizer(tokenizer = tokenizer, max_df= 0.5, min_df=2, max_features = 600)

In [44]:
# Splitting Data Set
from sklearn.model_selection import train_test_split
# Features and Labels
X = df['Articles']
ylabels = df['Tagging']

In [45]:
X_train, X_test, y_train, y_test = train_test_split(X, ylabels, test_size=0.2, random_state=42)

In [57]:
# Create the  pipeline to clean, tokenize, vectorize, and classify 
pipe = Pipeline([("cleaner", predictors()),
                 ('vectorizer', tfvectorizer),
                 ('to_dense', DenseTransformer()),
                 ('clf', MultinomialNB())])

In [58]:
# Fit our data
pipe.fit(X_train,y_train)

Pipeline(memory=None,
     steps=[('cleaner', <__main__.predictors object at 0x1a218a4668>), ('vectorizer', TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=0.5, max_features=100, min_df=2,
        ngram...ormer object at 0x1a218a4f60>), ('clf', MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))])

In [59]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

# Predicting with a test dataset
sample_prediction = pipe.predict(X_test)
report = classification_report(y_test, sample_prediction)
print(report)

confusion = confusion_matrix(y_test, sample_prediction)
print(confusion)
#[row, column]
TP = confusion[1, 1]
TN = confusion[0, 0]
FP = confusion[0, 1]
FN = confusion[1, 0]

print(FP)

             precision    recall  f1-score   support

          0       0.73      0.42      0.53        19
          1       0.72      0.90      0.80        31

avg / total       0.72      0.72      0.70        50

[[ 8 11]
 [ 3 28]]
11


In [60]:
from sklearn.model_selection import GridSearchCV
parameter = {
    "clf__alpha": [0.01, 0.1, 0.2, 0.3, 0.4],
    "clf__fitprior": [True, False],    
}

model = GridSearchCV(pipe, param_grid=parameter, cv=3, n_jobs=-1, verbose=1)
model.fit(X_train, y_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits


JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/runpy.py in _run_code(code=<code object <module> at 0x10d4ff660, file "/Use...3.6/site-packages/ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': '/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/s...ges/__pycache__/ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/Users/rohma.../python3.6/site-packages/ipykernel/kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x10d4ff660, file "/Use...3.6/site-packages/ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': '/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/s...ges/__pycache__/ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/Users/rohma.../python3.6/site-packages/ipykernel/kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    481         if self.poller is not None:
    482             self.poller.start()
    483         self.kernel.start()
    484         self.io_loop = ioloop.IOLoop.current()
    485         try:
--> 486             self.io_loop.start()
        self.io_loop.start = <bound method BaseAsyncIOLoop.start of <tornado.platform.asyncio.AsyncIOMainLoop object>>
    487         except KeyboardInterrupt:
    488             pass
    489 
    490 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py in start(self=<tornado.platform.asyncio.AsyncIOMainLoop object>)
    122         except (RuntimeError, AssertionError):
    123             old_loop = None
    124         try:
    125             self._setup_logging()
    126             asyncio.set_event_loop(self.asyncio_loop)
--> 127             self.asyncio_loop.run_forever()
        self.asyncio_loop.run_forever = <bound method BaseEventLoop.run_forever of <_Uni...EventLoop running=True closed=False debug=False>>
    128         finally:
    129             asyncio.set_event_loop(old_loop)
    130 
    131     def stop(self):

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/asyncio/base_events.py in run_forever(self=<_UnixSelectorEventLoop running=True closed=False debug=False>)
    417             sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook,
    418                                    finalizer=self._asyncgen_finalizer_hook)
    419         try:
    420             events._set_running_loop(self)
    421             while True:
--> 422                 self._run_once()
        self._run_once = <bound method BaseEventLoop._run_once of <_UnixS...EventLoop running=True closed=False debug=False>>
    423                 if self._stopping:
    424                     break
    425         finally:
    426             self._stopping = False

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/asyncio/base_events.py in _run_once(self=<_UnixSelectorEventLoop running=True closed=False debug=False>)
   1427                         logger.warning('Executing %s took %.3f seconds',
   1428                                        _format_handle(handle), dt)
   1429                 finally:
   1430                     self._current_handle = None
   1431             else:
-> 1432                 handle._run()
        handle._run = <bound method Handle._run of <Handle BaseAsyncIOLoop._handle_events(15, 1)>>
   1433         handle = None  # Needed to break cycles when an exception occurs.
   1434 
   1435     def _set_coroutine_wrapper(self, enabled):
   1436         try:

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/asyncio/events.py in _run(self=<Handle BaseAsyncIOLoop._handle_events(15, 1)>)
    140             self._callback = None
    141             self._args = None
    142 
    143     def _run(self):
    144         try:
--> 145             self._callback(*self._args)
        self._callback = <bound method BaseAsyncIOLoop._handle_events of <tornado.platform.asyncio.AsyncIOMainLoop object>>
        self._args = (15, 1)
    146         except Exception as exc:
    147             cb = _format_callback_source(self._callback, self._args)
    148             msg = 'Exception in callback {}'.format(cb)
    149             context = {

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py in _handle_events(self=<tornado.platform.asyncio.AsyncIOMainLoop object>, fd=15, events=1)
    112             self.writers.remove(fd)
    113         del self.handlers[fd]
    114 
    115     def _handle_events(self, fd, events):
    116         fileobj, handler_func = self.handlers[fd]
--> 117         handler_func(fileobj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fileobj = <zmq.sugar.socket.Socket object>
        events = 1
    118 
    119     def start(self):
    120         try:
    121             old_loop = asyncio.get_event_loop()

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    445             return
    446         zmq_events = self.socket.EVENTS
    447         try:
    448             # dispatch events:
    449             if zmq_events & zmq.POLLIN and self.receiving():
--> 450                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    451                 if not self.socket:
    452                     return
    453             if zmq_events & zmq.POLLOUT and self.sending():
    454                 self._handle_send()

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    475             else:
    476                 raise
    477         else:
    478             if self._recv_callback:
    479                 callback = self._recv_callback
--> 480                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    481         
    482 
    483     def _handle_send(self):
    484         """Handle a send event."""

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    427         close our socket."""
    428         try:
    429             # Use a NullContext to ensure that all StackContexts are run
    430             # inside our blanket exception handler rather than outside.
    431             with stack_context.NullContext():
--> 432                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    433         except:
    434             gen_log.error("Uncaught exception in ZMQStream callback",
    435                           exc_info=True)
    436             # Re-raise the exception so that IOLoop.handle_callback_exception

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': 'from sklearn.model_selection import GridSearchCV...n_jobs=-1, verbose=1)\nmodel.fit(X_train, y_train)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 12, 20, 7, 38, 42, 973091, tzinfo=tzutc()), 'msg_id': 'aa395274957b4f199d4a0f9c4e833228', 'msg_type': 'execute_request', 'session': 'a13b8d89c8e04665afc2f8ef4fa9fcbb', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'aa395274957b4f199d4a0f9c4e833228', 'msg_type': 'execute_request', 'parent_header': {}})
    228             self.log.warn("Unknown message type: %r", msg_type)
    229         else:
    230             self.log.debug("%s: %s", msg_type, msg)
    231             self.pre_handler_hook()
    232             try:
--> 233                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'a13b8d89c8e04665afc2f8ef4fa9fcbb']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': 'from sklearn.model_selection import GridSearchCV...n_jobs=-1, verbose=1)\nmodel.fit(X_train, y_train)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 12, 20, 7, 38, 42, 973091, tzinfo=tzutc()), 'msg_id': 'aa395274957b4f199d4a0f9c4e833228', 'msg_type': 'execute_request', 'session': 'a13b8d89c8e04665afc2f8ef4fa9fcbb', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'aa395274957b4f199d4a0f9c4e833228', 'msg_type': 'execute_request', 'parent_header': {}}
    234             except Exception:
    235                 self.log.error("Exception in message handler:", exc_info=True)
    236             finally:
    237                 self.post_handler_hook()

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'a13b8d89c8e04665afc2f8ef4fa9fcbb'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': 'from sklearn.model_selection import GridSearchCV...n_jobs=-1, verbose=1)\nmodel.fit(X_train, y_train)', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 12, 20, 7, 38, 42, 973091, tzinfo=tzutc()), 'msg_id': 'aa395274957b4f199d4a0f9c4e833228', 'msg_type': 'execute_request', 'session': 'a13b8d89c8e04665afc2f8ef4fa9fcbb', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'aa395274957b4f199d4a0f9c4e833228', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code='from sklearn.model_selection import GridSearchCV...n_jobs=-1, verbose=1)\nmodel.fit(X_train, y_train)', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = 'from sklearn.model_selection import GridSearchCV...n_jobs=-1, verbose=1)\nmodel.fit(X_train, y_train)'
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=('from sklearn.model_selection import GridSearchCV...n_jobs=-1, verbose=1)\nmodel.fit(X_train, y_train)',), **kwargs={'silent': False, 'store_history': True})
    532             )
    533         self.payload_manager.write_payload(payload)
    534 
    535     def run_cell(self, *args, **kwargs):
    536         self._last_traceback = None
--> 537         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ('from sklearn.model_selection import GridSearchCV...n_jobs=-1, verbose=1)\nmodel.fit(X_train, y_train)',)
        kwargs = {'silent': False, 'store_history': True}
    538 
    539     def _showtraceback(self, etype, evalue, stb):
    540         # try to preserve ordering of tracebacks and print statements
    541         sys.stdout.flush()

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='from sklearn.model_selection import GridSearchCV...n_jobs=-1, verbose=1)\nmodel.fit(X_train, y_train)', store_history=True, silent=False, shell_futures=True)
   2657         -------
   2658         result : :class:`ExecutionResult`
   2659         """
   2660         try:
   2661             result = self._run_cell(
-> 2662                 raw_cell, store_history, silent, shell_futures)
        raw_cell = 'from sklearn.model_selection import GridSearchCV...n_jobs=-1, verbose=1)\nmodel.fit(X_train, y_train)'
        store_history = True
        silent = False
        shell_futures = True
   2663         finally:
   2664             self.events.trigger('post_execute')
   2665             if not silent:
   2666                 self.events.trigger('post_run_cell', result)

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in _run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='from sklearn.model_selection import GridSearchCV...n_jobs=-1, verbose=1)\nmodel.fit(X_train, y_train)', store_history=True, silent=False, shell_futures=True)
   2780                 self.displayhook.exec_result = result
   2781 
   2782                 # Execute the user code
   2783                 interactivity = 'none' if silent else self.ast_node_interactivity
   2784                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2785                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2786                 
   2787                 self.last_execution_succeeded = not has_raised
   2788                 self.last_execution_result = result
   2789 

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.ImportFrom object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Expr object>], cell_name='<ipython-input-60-a93d08aaf0bd>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 1a21f13eb8, execution...rue silent=False shell_futures=True> result=None>)
   2904                     return True
   2905 
   2906             for i, node in enumerate(to_run_interactive):
   2907                 mod = ast.Interactive([node])
   2908                 code = compiler(mod, cell_name, "single")
-> 2909                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x1a23f97db0, file "<ipython-input-60-a93d08aaf0bd>", line 8>
        result = <ExecutionResult object at 1a21f13eb8, execution...rue silent=False shell_futures=True> result=None>
   2910                     return True
   2911 
   2912             # Flush softspace
   2913             if softspace(sys.stdout, 0):

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x1a23f97db0, file "<ipython-input-60-a93d08aaf0bd>", line 8>, result=<ExecutionResult object at 1a21f13eb8, execution...rue silent=False shell_futures=True> result=None>)
   2958         outflag = True  # happens in more places, so it's easier as default
   2959         try:
   2960             try:
   2961                 self.hooks.pre_run_code_hook()
   2962                 #rprint('Running code', repr(code_obj)) # dbg
-> 2963                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x1a23f97db0, file "<ipython-input-60-a93d08aaf0bd>", line 8>
        self.user_global_ns = {'CountVectorizer': <class 'sklearn.feature_extraction.text.CountVectorizer'>, 'DenseTransformer': <class '__main__.DenseTransformer'>, 'FN': 3, 'FP': 11, 'GaussianNB': <class 'sklearn.naive_bayes.GaussianNB'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'import pandas as pd\nfrom spacy.lang.id import In...ang.id.stop_words import STOP_WORDS\nimport string', '# ML Packages\nfrom sklearn.feature_extraction.te...ingClassifier\nfrom sklearn import model_selection', 'nlp = Indonesian()', 'df = pd.read_excel("News-Articles-Dataset.xls", sheet_name = "berita", na_values = \' \')\ndf.head(10)', "#Replace tagging with numeric value\n# 1 for vali...lid', 'hoax'], [1, 0], inplace= True)\ndf.head(10)", 'stopwords = list(STOP_WORDS)\nstopwords', "sampleArticle = df['Articles'].values[0]\nsampleArticle", '#Lemmatization\ndocx = nlp(sampleArticle)\nfor wor...ocx: \n    print(word.text,"Lemma =>",word.lemma_)', '# Filtering out Stopwords and Punctuations\nfor w...ue and not word.is_punct:\n            print(word)', '# Stop words and Punctuation In List Comprehensi... if word.is_stop == False and not word.is_punct ]', '# Use the punctuations of string module\npunctuat...n\n# Creating a Spacy Parser\nparser = Indonesian()', 'def tokenizer(sentence):\n    mytokens = parser(s...nd word not in punctuations ]\n    return mytokens', 'tokenizer(sampleArticle)', '# Custom transformer\nclass predictors(Transforme..._text(text):     \n    return text.strip().lower()', "# Vectorization\nvectorizer = CountVectorizer(tok...                         ('SVM', svmClassifier)])", '# Using Tfidf\ntfvectorizer = TfidfVectorizer(tok...nizer, max_df= 0.5, min_df=2, max_features = 600)', "# Splitting Data Set\nfrom sklearn.model_selectio...Labels\nX = df['Articles']\nylabels = df['Tagging']", 'X_train, X_test, y_train, y_test = train_test_split(X, ylabels, test_size=0.2, random_state=42)', "# Create the  pipeline to clean, tokenize, vecto...Transformer()),\n                 ('clf', SVC())])", ...], 'Indonesian': <class 'spacy.lang.id.Indonesian'>, 'KNeighborsClassifier': <class 'sklearn.neighbors.classification.KNeighborsClassifier'>, 'LinearSVC': <class 'sklearn.svm.classes.LinearSVC'>, ...}
        self.user_ns = {'CountVectorizer': <class 'sklearn.feature_extraction.text.CountVectorizer'>, 'DenseTransformer': <class '__main__.DenseTransformer'>, 'FN': 3, 'FP': 11, 'GaussianNB': <class 'sklearn.naive_bayes.GaussianNB'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'import pandas as pd\nfrom spacy.lang.id import In...ang.id.stop_words import STOP_WORDS\nimport string', '# ML Packages\nfrom sklearn.feature_extraction.te...ingClassifier\nfrom sklearn import model_selection', 'nlp = Indonesian()', 'df = pd.read_excel("News-Articles-Dataset.xls", sheet_name = "berita", na_values = \' \')\ndf.head(10)', "#Replace tagging with numeric value\n# 1 for vali...lid', 'hoax'], [1, 0], inplace= True)\ndf.head(10)", 'stopwords = list(STOP_WORDS)\nstopwords', "sampleArticle = df['Articles'].values[0]\nsampleArticle", '#Lemmatization\ndocx = nlp(sampleArticle)\nfor wor...ocx: \n    print(word.text,"Lemma =>",word.lemma_)', '# Filtering out Stopwords and Punctuations\nfor w...ue and not word.is_punct:\n            print(word)', '# Stop words and Punctuation In List Comprehensi... if word.is_stop == False and not word.is_punct ]', '# Use the punctuations of string module\npunctuat...n\n# Creating a Spacy Parser\nparser = Indonesian()', 'def tokenizer(sentence):\n    mytokens = parser(s...nd word not in punctuations ]\n    return mytokens', 'tokenizer(sampleArticle)', '# Custom transformer\nclass predictors(Transforme..._text(text):     \n    return text.strip().lower()', "# Vectorization\nvectorizer = CountVectorizer(tok...                         ('SVM', svmClassifier)])", '# Using Tfidf\ntfvectorizer = TfidfVectorizer(tok...nizer, max_df= 0.5, min_df=2, max_features = 600)', "# Splitting Data Set\nfrom sklearn.model_selectio...Labels\nX = df['Articles']\nylabels = df['Tagging']", 'X_train, X_test, y_train, y_test = train_test_split(X, ylabels, test_size=0.2, random_state=42)', "# Create the  pipeline to clean, tokenize, vecto...Transformer()),\n                 ('clf', SVC())])", ...], 'Indonesian': <class 'spacy.lang.id.Indonesian'>, 'KNeighborsClassifier': <class 'sklearn.neighbors.classification.KNeighborsClassifier'>, 'LinearSVC': <class 'sklearn.svm.classes.LinearSVC'>, ...}
   2964             finally:
   2965                 # Reset our crash handler in place
   2966                 sys.excepthook = old_excepthook
   2967         except SystemExit as e:

...........................................................................
/Users/rohmadi.rohmadi/workspace/mine/thesis/<ipython-input-60-a93d08aaf0bd> in <module>()
      3     "clf__alpha": [0.01, 0.1, 0.2, 0.3, 0.4],
      4     "clf__fitprior": [True, False],    
      5 }
      6 
      7 model = GridSearchCV(pipe, param_grid=parameter, cv=3, n_jobs=-1, verbose=1)
----> 8 model.fit(X_train, y_train)

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/sklearn/model_selection/_search.py in fit(self=GridSearchCV(cv=3, error_score='raise',
       e...ain_score='warn',
       scoring=None, verbose=1), X=132    Sebelumnya sikat gigi bermerek Oral B men...ole...
Name: Articles, Length: 200, dtype: object, y=132    0
225    1
238    1
119    0
136    0
66 ...102    1
Name: Tagging, Length: 200, dtype: int64, groups=None, **fit_params={})
    634                                   return_train_score=self.return_train_score,
    635                                   return_n_test_samples=True,
    636                                   return_times=True, return_parameters=False,
    637                                   error_score=self.error_score)
    638           for parameters, (train, test) in product(candidate_params,
--> 639                                                    cv.split(X, y, groups)))
        cv.split = <bound method StratifiedKFold.split of Stratifie...ld(n_splits=3, random_state=None, shuffle=False)>
        X = 132    Sebelumnya sikat gigi bermerek Oral B men...ole...
Name: Articles, Length: 200, dtype: object
        y = 132    0
225    1
238    1
119    0
136    0
66 ...102    1
Name: Tagging, Length: 200, dtype: int64
        groups = None
    640 
    641         # if one choose to see train score, "out" will contain train score info
    642         if self.return_train_score:
    643             (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object BaseSearchCV.fit.<locals>.<genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Thu Dec 20 14:38:43 2018
PID: 19211        Python 3.6.5: /Users/rohmadi.rohmadi/anaconda3/bin/python
...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function _fit_and_score>, (Pipeline(memory=None,
     steps=[('cleaner', <_...(alpha=0.01, class_prior=None, fit_prior=True))]), 132    Sebelumnya sikat gigi bermerek Oral B men...ole...
Name: Articles, Length: 200, dtype: object, 132    0
225    1
238    1
119    0
136    0
66 ...102    1
Name: Tagging, Length: 200, dtype: int64, {'score': <function _passthrough_scorer>}, array([ 63,  64,  68,  71,  72,  73,  74,  75,  ..., 192, 193, 194, 195, 196, 197,
       198, 199]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 1... 56, 57, 58, 59, 60, 61, 62, 65, 66, 67, 69, 70]), 1, {'clf__alpha': 0.01, 'clf__fitprior': True}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (Pipeline(memory=None,
     steps=[('cleaner', <_...(alpha=0.01, class_prior=None, fit_prior=True))]), 132    Sebelumnya sikat gigi bermerek Oral B men...ole...
Name: Articles, Length: 200, dtype: object, 132    0
225    1
238    1
119    0
136    0
66 ...102    1
Name: Tagging, Length: 200, dtype: int64, {'score': <function _passthrough_scorer>}, array([ 63,  64,  68,  71,  72,  73,  74,  75,  ..., 192, 193, 194, 195, 196, 197,
       198, 199]), array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 1... 56, 57, 58, 59, 60, 61, 62, 65, 66, 67, 69, 70]), 1, {'clf__alpha': 0.01, 'clf__fitprior': True})
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator=Pipeline(memory=None,
     steps=[('cleaner', <_...(alpha=0.01, class_prior=None, fit_prior=True))]), X=132    Sebelumnya sikat gigi bermerek Oral B men...ole...
Name: Articles, Length: 200, dtype: object, y=132    0
225    1
238    1
119    0
136    0
66 ...102    1
Name: Tagging, Length: 200, dtype: int64, scorer={'score': <function _passthrough_scorer>}, train=array([ 63,  64,  68,  71,  72,  73,  74,  75,  ..., 192, 193, 194, 195, 196, 197,
       198, 199]), test=array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 1... 56, 57, 58, 59, 60, 61, 62, 65, 66, 67, 69, 70]), verbose=1, parameters={'clf__alpha': 0.01, 'clf__fitprior': True}, fit_params={}, return_train_score='warn', return_parameters=False, return_n_test_samples=True, return_times=True, error_score='raise')
    439                       for k, v in fit_params.items()])
    440 
    441     test_scores = {}
    442     train_scores = {}
    443     if parameters is not None:
--> 444         estimator.set_params(**parameters)
        estimator.set_params = <bound method Pipeline.set_params of Pipeline(me...alpha=0.01, class_prior=None, fit_prior=True))])>
        parameters = {'clf__alpha': 0.01, 'clf__fitprior': True}
    445 
    446     start_time = time.time()
    447 
    448     X_train, y_train = _safe_split(estimator, X, y, train)

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/sklearn/pipeline.py in set_params(self=Pipeline(memory=None,
     steps=[('cleaner', <_...(alpha=0.01, class_prior=None, fit_prior=True))]), **kwargs={'clf__alpha': 0.01, 'clf__fitprior': True})
    137 
    138         Returns
    139         -------
    140         self
    141         """
--> 142         self._set_params('steps', **kwargs)
        self._set_params = <bound method _BaseComposition._set_params of Pi...alpha=0.01, class_prior=None, fit_prior=True))])>
        kwargs = {'clf__alpha': 0.01, 'clf__fitprior': True}
    143         return self
    144 
    145     def _validate_steps(self):
    146         names, estimators = zip(*self.steps)

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/sklearn/utils/metaestimators.py in _set_params(self=Pipeline(memory=None,
     steps=[('cleaner', <_...(alpha=0.01, class_prior=None, fit_prior=True))]), attr='steps', **params={'clf__alpha': 0.01, 'clf__fitprior': True})
     44         names, _ = zip(*getattr(self, attr))
     45         for name in list(six.iterkeys(params)):
     46             if '__' not in name and name in names:
     47                 self._replace_estimator(attr, name, params.pop(name))
     48         # 3. Step parameters and other initilisation arguments
---> 49         super(_BaseComposition, self).set_params(**params)
        self.set_params = <bound method Pipeline.set_params of Pipeline(me...alpha=0.01, class_prior=None, fit_prior=True))])>
        params = {'clf__alpha': 0.01, 'clf__fitprior': True}
     50         return self
     51 
     52     def _replace_estimator(self, attr, name, new_val):
     53         # assumes `name` is a valid estimator name

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/sklearn/base.py in set_params(self=Pipeline(memory=None,
     steps=[('cleaner', <_...(alpha=0.01, class_prior=None, fit_prior=True))]), **params={'clf__alpha': 0.01, 'clf__fitprior': True})
    277                 nested_params[key][sub_key] = value
    278             else:
    279                 setattr(self, key, value)
    280 
    281         for key, sub_params in nested_params.items():
--> 282             valid_params[key].set_params(**sub_params)
        valid_params = {'cleaner': <__main__.predictors object>, 'clf': MultinomialNB(alpha=0.01, class_prior=None, fit_prior=True), 'clf__alpha': 1.0, 'clf__class_prior': None, 'clf__fit_prior': True, 'memory': None, 'steps': [('cleaner', <__main__.predictors object>), ('vectorizer', TfidfVectorizer(analyzer='word', binary=False, d...227101e0>, use_idf=True,
        vocabulary=None)), ('to_dense', <__main__.DenseTransformer object>), ('clf', MultinomialNB(alpha=0.01, class_prior=None, fit_prior=True))], 'to_dense': <__main__.DenseTransformer object>, 'vectorizer': TfidfVectorizer(analyzer='word', binary=False, d...227101e0>, use_idf=True,
        vocabulary=None), 'vectorizer__analyzer': 'word', ...}
        key.set_params = undefined
        sub_params = {'alpha': 0.01, 'fitprior': True}
    283 
    284         return self
    285 
    286     def __repr__(self):

...........................................................................
/Users/rohmadi.rohmadi/anaconda3/lib/python3.6/site-packages/sklearn/base.py in set_params(self=MultinomialNB(alpha=0.01, class_prior=None, fit_prior=True), **params={'alpha': 0.01, 'fitprior': True})
    269             key, delim, sub_key = key.partition('__')
    270             if key not in valid_params:
    271                 raise ValueError('Invalid parameter %s for estimator %s. '
    272                                  'Check the list of available parameters '
    273                                  'with `estimator.get_params().keys()`.' %
--> 274                                  (key, self))
        key = 'fitprior'
        self = MultinomialNB(alpha=0.01, class_prior=None, fit_prior=True)
    275 
    276             if delim:
    277                 nested_params[key][sub_key] = value
    278             else:

ValueError: Invalid parameter fitprior for estimator MultinomialNB(alpha=0.01, class_prior=None, fit_prior=True). Check the list of available parameters with `estimator.get_params().keys()`.
___________________________________________________________________________

In [None]:
model.best_estimator_

In [None]:
# Prediction Results
# 1 = Valid article
# 0 = Hoax article
for (sample,pred) in zip(X_test,sample_prediction):
    print(sample,"Prediction=>",pred)

In [29]:
# Accuracy
print("Accuracy: ",pipe.score(X_test,y_test))
print("Accuracy: ",pipe.score(X_test,sample_prediction))

  if diff:


Accuracy:  0.82
Accuracy:  0.76


  if diff:


In [53]:
tfvectorizer = TfidfVectorizer(tokenizer = tokenizer, max_df= 0.5, min_df=2, max_features = 100)

mNbClassifier = MultinomialNB(alpha=0.1, fit_prior=True, class_prior=None)
svmClassifier = svm.SVC()
knnClassifier = KNeighborsClassifier(n_neighbors=3)

# votingClassifier = VotingClassifier(estimators=[('KNN', knnClassifier),
#                                                 ('NaiveBayes', mNbClassifier), 
#                                                 ('SVM', svmClassifier)])

votingClassifier = VotingClassifier(estimators=[('KNN', knnClassifier),
                                                ('NaiveBayes', mNbClassifier), 
                                                ('SVM', svmClassifier)], voting='hard')



# Create the  pipeline to clean, tokenize, vectorize, and classify 
pipe = Pipeline([("cleaner", predictors()),
                 ('vectorizer', tfvectorizer),
                 ('to_dense', DenseTransformer()),
                 ('classifier', votingClassifier)])
# Fit our data
pipe.fit(X_train,y_train)
print("Accuracy: ",pipe.score(X_test,y_test))

Accuracy:  0.7


  if diff:


In [None]:
x= tfvectorizer.fit_transform(df['Articles'])
df1 = pd.DataFrame(x.toarray(), columns=tfvectorizer.get_feature_names())

In [None]:
df1.head()

In [None]:
feature_names = tfvectorizer.get_feature_names()
corpus_index = [n for n in corpus]
import pandas as pd
df3 = pd.DataFrame(tfs.T.todense(), index=feature_names, columns=corpus_index)
print(df3)

In [None]:
from sklearn.metrics import confusion_matrix
# from collections import defaultdict

# refsets = defaultdict(set)
# testsets = defaultdict(set)
# labels = []
# tests = []
# for i, (feats, label) in enumerate(X_test,sample_prediction):
#     refsets[label].add(i)
#     observed = classifier.classify(feats)
#     testsets[observed].add(i)
#     labels.append(label)
#     tests.append(observed)

# print(metrics.confusion_matrix(labels, tests))

# confusion_matrix(X_test,y_test)

# from sklearn.ensemble import VotingClassifier


In [None]:
from sklearn import datasets
iris = datasets.load_iris()