In [1]:
import sys
sys.path.append("../")
from src.utils.data_util import biocaser2text
from src.eval_method import CrossValidation
from src.experiment import Experiment
from src.metrics import Accuracy,Precision,Recall,F1Score
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import chi2
from sklearn.feature_selection import SelectKBest
from src.preprocess import BasicTextPreprocess,BasicTextFeatureExtraction

In [2]:
## this should load a dataframe, which col1 being the documents and col2 being the labels.
data_file = "../data/biocaster/BioCaster.3.xml"
data_df = biocaser2text(data_file)
map_dic = {"negative":0,"positive":1}
data_df.labels = data_df.labels.apply(lambda x: map_dic[x])
data_df.head(3)

parse biocaser data from ../data/biocaster/BioCaster.3.xml, docs number:1003, lablels number:1003


Unnamed: 0,docs,labels
0,\nBird Flu Outbreak Drill Spooks Manitoba Town...,0
1,\nTyphoid outbreak in Agusan del Sur town unde...,1
2,\n Typhoid Outbreak In Central Nepal November...,1


In [3]:
bp = BasicTextPreprocess()

bfe = BasicTextFeatureExtraction(["tfidf","bow","bigram",["tfidf","bigram"],["bow","bigram"],["tfidf","trigram"],["bow","trigram"],["tfidf","bigram","trigram"]])

cv = CrossValidation(n_splits=10, n_repeats=10, random_state=12345)

acc = Accuracy()
pre = Precision()
rec = Recall()
f1 = F1Score()

svm = LinearSVC(random_state=0, tol=1e-5,max_iter=5000)
svm.name = "svm"
dtree = DecisionTreeClassifier(random_state=0,criterion='entropy')
dtree.name="dtree"
bnb = BernoulliNB()
bnb.name = "BernoulliNB"
gnb = GaussianNB()
gnb.name = "GaussianNB"
mnb = MultinomialNB()
mnb.name = "MultinomialNB"

# chi2 feature selection
ch2 = SelectKBest(chi2, k=9000)
svm_ch2 = Pipeline([('sel', ch2), ('cls', svm)])
svm_ch2.name = "svm+chi2"

# chi2 feature selection
dtree_ch2 = Pipeline([('sel', ch2), ('cls', dtree)])
dtree_ch2.name = "dtree+chi2"

# chi2 feature selection
bnb_ch2 = Pipeline([('sel', ch2), ('cls', bnb)])
bnb_ch2.name = "BernoulliNB+chi2"

# chi2 feature selection
gnb_ch2 = Pipeline([('sel', ch2), ('cls', gnb)])
gnb_ch2.name = "GaussianNB+chi2"

# chi2 feature selection
mnb_ch2 = Pipeline([('sel', ch2), ('cls', mnb)])
mnb_ch2.name = "MultinomialNB+chi2"

In [4]:
exp = Experiment(
    data_df = data_df,
    preprocessor = bp,
    extractor = bfe,
    eval_method=cv,
    models=[svm,dtree,bnb,gnb,mnb,svm_ch2,dtree_ch2,bnb_ch2,gnb_ch2,mnb_ch2],
    metrics=[acc,pre,rec,f1],
    result_file = "../biocaser_result.csv",
    verbose = True
).run()

2it [00:00, 10.08it/s]

Found feature_columns: ['tfidf', 'bow', 'bigram', 'tfidf+bigram', 'bow+bigram', 'tfidf+trigram', 'bow+trigram', 'tfidf+bigram+trigram']
Experiment with svm model on tfidf feature.


100it [00:10,  9.50it/s]
1it [00:00,  6.00it/s]

total num of splits: 100
Create new result_file: ../biocaser_result.csv
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s) model feature
0  0.917743   0.880836  0.902338  0.890168   0.097547  0.005657   svm   tfidf
Experiment with svm model on bow feature.


100it [00:16,  6.11it/s]
1it [00:00,  5.62it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s) model feature
0  0.906368   0.853702  0.902955  0.876369    0.15407  0.007595   svm     bow
Experiment with svm model on bigram feature.


100it [00:16,  5.91it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s) model feature
0  0.851225    0.84217  0.740953  0.785788   0.161262  0.006162   svm  bigram
Experiment with svm model on tfidf+bigram feature.


100it [00:39,  2.53it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s) model  \
0  0.861924   0.847669  0.767729  0.803507   0.385064  0.008059   svm   

        feature  
0  tfidf+bigram  
Experiment with svm model on bow+bigram feature.


100it [00:35,  2.85it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision   recall  f1_score  Train (s)  Test (s) model  \
0  0.904998   0.852435  0.90332  0.875837   0.337442  0.011544   svm   

      feature  
0  bow+bigram  
Experiment with svm model on tfidf+trigram feature.


100it [00:23,  4.25it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s) model  \
0  0.853949   0.851059  0.739083   0.78829   0.226791  0.006356   svm   

         feature  
0  tfidf+trigram  
Experiment with svm model on bow+trigram feature.


100it [00:23,  4.22it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s) model  \
0  0.902895   0.847637  0.901686   0.87245   0.227034  0.007774   svm   

       feature  
0  bow+trigram  
Experiment with svm model on tfidf+bigram+trigram feature.


100it [00:57,  1.74it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s) model  \
0  0.855538   0.847202  0.749401  0.792507    0.56265  0.008682   svm   

                feature  
0  tfidf+bigram+trigram  
Experiment with dtree model on tfidf feature.


100it [01:05,  1.53it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  model feature
0   0.81135   0.751344  0.739193  0.742918   0.644681  0.005985  dtree   tfidf
Experiment with dtree model on bow feature.


100it [00:58,  1.71it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision  recall  f1_score  Train (s)  Test (s)  model feature
0   0.82025   0.751421  0.7717   0.75886   0.577845  0.006143  dtree     bow
Experiment with dtree model on bigram feature.


100it [04:45,  2.86s/it]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision   recall  f1_score  Train (s)  Test (s)  model feature
0  0.775482   0.701936  0.69304  0.694391   2.849804  0.005469  dtree  bigram
Experiment with dtree model on tfidf+bigram feature.


100it [02:20,  1.40s/it]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  model  \
0  0.814432   0.751816  0.750941   0.74826   1.393553  0.009513  dtree   

        feature  
0  tfidf+bigram  
Experiment with dtree model on bow+bigram feature.


100it [02:14,  1.35s/it]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  model  \
0  0.819013   0.754109  0.765272  0.756732   1.335315   0.00977  dtree   

      feature  
0  bow+bigram  
Experiment with dtree model on tfidf+trigram feature.


100it [01:27,  1.14it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  model  \
0  0.811924    0.74927  0.745274  0.743971   0.871351   0.00718  dtree   

         feature  
0  tfidf+trigram  
Experiment with dtree model on bow+trigram feature.


100it [01:21,  1.23it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  model  \
0  0.827645   0.761004  0.787612  0.771601   0.802053  0.007266  dtree   

       feature  
0  bow+trigram  
Experiment with dtree model on tfidf+bigram+trigram feature.


100it [02:44,  1.64s/it]
1it [00:00,  7.25it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  model  \
0  0.810863   0.751062  0.736113  0.740748   1.632152  0.010597  dtree   

                feature  
0  tfidf+bigram+trigram  
Experiment with BernoulliNB model on tfidf feature.


100it [00:14,  7.05it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)        model  \
0  0.876374   0.762563  0.971843  0.853011   0.126264  0.013752  BernoulliNB   

  feature  
0   tfidf  
Experiment with BernoulliNB model on bow feature.


100it [00:18,  5.30it/s]
1it [00:00,  5.20it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)        model  \
0  0.876254   0.761656  0.970822  0.851734   0.175709  0.011475  BernoulliNB   

  feature  
0     bow  
Experiment with BernoulliNB model on bigram feature.


100it [00:19,  5.19it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)        model  \
0  0.835007   0.816217  0.723588   0.76377   0.181347   0.00981  BernoulliNB   

  feature  
0  bigram  
Experiment with BernoulliNB model on tfidf+bigram feature.


100it [00:26,  3.79it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)        model  \
0  0.896316   0.808509  0.949253  0.871911   0.228354  0.033382  BernoulliNB   

        feature  
0  tfidf+bigram  
Experiment with BernoulliNB model on bow+bigram feature.


100it [00:43,  2.30it/s]
1it [00:00,  5.55it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)        model  \
0  0.895436   0.806416  0.949345  0.870503   0.402428  0.031113  BernoulliNB   

      feature  
0  bow+bigram  
Experiment with BernoulliNB model on tfidf+trigram feature.


100it [00:17,  5.62it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)        model  \
0  0.900917    0.80937  0.960418  0.876381   0.159121  0.017012  BernoulliNB   

         feature  
0  tfidf+trigram  
Experiment with BernoulliNB model on bow+trigram feature.


100it [00:28,  3.50it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)        model  \
0  0.901984   0.813525  0.959572  0.879208   0.269832  0.014116  BernoulliNB   

       feature  
0  bow+trigram  
Experiment with BernoulliNB model on tfidf+bigram+trigram feature.


100it [00:30,  3.29it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision   recall  f1_score  Train (s)  Test (s)        model  \
0  0.895012   0.826608  0.91038  0.865204   0.264559  0.038064  BernoulliNB   

                feature  
0  tfidf+bigram+trigram  
Experiment with GaussianNB model on tfidf feature.


100it [00:21,  4.67it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)       model  \
0  0.761016   0.884504  0.412512  0.558834   0.195946  0.016793  GaussianNB   

  feature  
0   tfidf  
Experiment with GaussianNB model on bow feature.


100it [00:22,  4.47it/s]
1it [00:00,  5.30it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)       model  \
0  0.771096   0.885779  0.443021   0.58706   0.205123  0.017306  GaussianNB   

  feature  
0     bow  
Experiment with GaussianNB model on bigram feature.


100it [00:18,  5.28it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)       model  \
0  0.833597   0.863956  0.658361  0.744534   0.172831  0.015171  GaussianNB   

  feature  
0  bigram  
Experiment with GaussianNB model on tfidf+bigram feature.


100it [00:41,  2.42it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)       model  \
0  0.830919    0.89412  0.620091  0.729312   0.373223  0.037947  GaussianNB   

        feature  
0  tfidf+bigram  
Experiment with GaussianNB model on bow+bigram feature.


100it [00:41,  2.39it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)       model  \
0  0.828007   0.905382  0.602167  0.719318   0.379049  0.037886  GaussianNB   

      feature  
0  bow+bigram  
Experiment with GaussianNB model on tfidf+trigram feature.


100it [00:27,  3.63it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)       model  \
0  0.819142   0.765415  0.743253  0.751216   0.252567  0.021517  GaussianNB   

         feature  
0  tfidf+trigram  
Experiment with GaussianNB model on bow+trigram feature.


100it [00:28,  3.50it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)       model  \
0  0.826431   0.868623  0.633819  0.729377   0.262489  0.022042  GaussianNB   

       feature  
0  bow+trigram  
Experiment with GaussianNB model on tfidf+bigram+trigram feature.


100it [00:47,  2.08it/s]
2it [00:00, 16.17it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)       model  \
0  0.848566   0.881167  0.688573  0.769958   0.434174  0.044261  GaussianNB   

                feature  
0  tfidf+bigram+trigram  
Experiment with MultinomialNB model on tfidf feature.


100it [00:06, 15.16it/s]
1it [00:00,  7.46it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0   0.84992   0.925726  0.656536    0.7641   0.058717  0.006007   

           model feature  
0  MultinomialNB   tfidf  
Experiment with MultinomialNB model on bow feature.


100it [00:12,  7.89it/s]
1it [00:00,  6.84it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0   0.87351    0.75859  0.971198  0.850191   0.118917  0.006276   

           model feature  
0  MultinomialNB     bow  
Experiment with MultinomialNB model on bigram feature.


100it [00:13,  7.29it/s]
1it [00:00,  9.17it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision   recall  f1_score  Train (s)  Test (s)          model  \
0   0.85455   0.733893  0.95967  0.830527   0.130271  0.005339  MultinomialNB   

  feature  
0  bigram  
Experiment with MultinomialNB model on tfidf+bigram feature.


100it [00:11,  9.01it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.873068   0.771141  0.941116  0.845914   0.100778  0.008513   

           model       feature  
0  MultinomialNB  tfidf+bigram  
Experiment with MultinomialNB model on bow+bigram feature.


100it [00:29,  3.39it/s]
2it [00:00, 12.82it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision   recall  f1_score  Train (s)  Test (s)          model  \
0  0.881734   0.770558  0.97207  0.858066   0.284007  0.009606  MultinomialNB   

      feature  
0  bow+bigram  
Experiment with MultinomialNB model on tfidf+trigram feature.


100it [00:08, 11.89it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.867795   0.794305  0.874418  0.830126   0.075956  0.006896   

           model        feature  
0  MultinomialNB  tfidf+trigram  
Experiment with MultinomialNB model on bow+trigram feature.


100it [00:20,  4.87it/s]
1it [00:00,  8.16it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.887582   0.782657  0.966419  0.863392   0.196327  0.007382   

           model      feature  
0  MultinomialNB  bow+trigram  
Experiment with MultinomialNB model on tfidf+bigram+trigram feature.


100it [00:12,  7.97it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0   0.87407    0.77082  0.943999  0.846847   0.114371   0.00936   

           model               feature  
0  MultinomialNB  tfidf+bigram+trigram  
Experiment with svm+chi2 model on tfidf feature.


100it [00:25,  3.86it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)     model  \
0  0.919254   0.889766  0.894683  0.890985   0.247534  0.009372  svm+chi2   

  feature  
0   tfidf  
Experiment with svm+chi2 model on bow feature.


100it [00:38,  2.58it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision   recall  f1_score  Train (s)  Test (s)     model  \
0  0.903111   0.862814  0.87962   0.86972   0.375646  0.009689  svm+chi2   

  feature  
0     bow  
Experiment with svm+chi2 model on bigram feature.


100it [00:37,  2.68it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)     model  \
0  0.859413   0.818355  0.802532  0.808131   0.361321    0.0092  svm+chi2   

  feature  
0  bigram  
Experiment with svm+chi2 model on tfidf+bigram feature.


100it [00:46,  2.16it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)     model  \
0  0.860101   0.819676  0.801516  0.808144   0.449234  0.012732  svm+chi2   

        feature  
0  tfidf+bigram  
Experiment with svm+chi2 model on bow+bigram feature.


100it [01:07,  1.49it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)     model  \
0  0.897106   0.861325  0.863972  0.860891   0.655295  0.013105  svm+chi2   

      feature  
0  bow+bigram  
Experiment with svm+chi2 model on tfidf+trigram feature.


100it [00:37,  2.65it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)     model  \
0  0.843082   0.844813  0.714716  0.770406    0.36551  0.010789  svm+chi2   

         feature  
0  tfidf+trigram  
Experiment with svm+chi2 model on bow+trigram feature.


100it [00:50,  1.97it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)     model  \
0  0.899096   0.866671  0.862894  0.863371   0.494779  0.010873  svm+chi2   

       feature  
0  bow+trigram  
Experiment with svm+chi2 model on tfidf+bigram+trigram feature.


100it [00:52,  1.90it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)     model  \
0  0.842093   0.821049  0.736794  0.774131   0.509811  0.014067  svm+chi2   

                feature  
0  tfidf+bigram+trigram  
Experiment with dtree+chi2 model on tfidf feature.


100it [00:38,  2.61it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)       model  \
0   0.81525   0.755123  0.748454  0.749519   0.373895  0.008535  dtree+chi2   

  feature  
0   tfidf  
Experiment with dtree+chi2 model on bow feature.


100it [00:42,  2.35it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision   recall  f1_score  Train (s)  Test (s)       model  \
0  0.831787   0.769167  0.78969  0.776271   0.417603   0.00739  dtree+chi2   

  feature  
0     bow  
Experiment with dtree+chi2 model on bigram feature.


100it [01:23,  1.20it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)       model  \
0   0.77999   0.713231  0.683454   0.69513      0.822  0.006933  dtree+chi2   

  feature  
0  bigram  
Experiment with dtree+chi2 model on tfidf+bigram feature.


100it [00:52,  1.90it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)       model  \
0  0.822032   0.771371  0.742718  0.753499   0.514113  0.011974  dtree+chi2   

        feature  
0  tfidf+bigram  
Experiment with dtree+chi2 model on bow+bigram feature.


100it [01:12,  1.38it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)       model  \
0  0.828309   0.767319  0.774717  0.768322   0.714056  0.010942  dtree+chi2   

      feature  
0  bow+bigram  
Experiment with dtree+chi2 model on tfidf+trigram feature.


100it [00:43,  2.30it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)       model  \
0  0.820532   0.765862  0.748881  0.754059    0.42399  0.009937  dtree+chi2   

         feature  
0  tfidf+trigram  
Experiment with dtree+chi2 model on bow+trigram feature.


100it [00:54,  1.84it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision   recall  f1_score  Train (s)  Test (s)       model  \
0  0.827114   0.763846  0.77662  0.767544   0.532748  0.008744  dtree+chi2   

       feature  
0  bow+trigram  
Experiment with dtree+chi2 model on tfidf+bigram+trigram feature.


100it [00:58,  1.72it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)       model  \
0  0.825995   0.775963  0.748711  0.759903   0.566316  0.013345  dtree+chi2   

                feature  
0  tfidf+bigram+trigram  
Experiment with BernoulliNB+chi2 model on tfidf feature.


100it [00:28,  3.46it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.900902   0.810976  0.957493  0.876819   0.269866  0.017574   

              model feature  
0  BernoulliNB+chi2   tfidf  
Experiment with BernoulliNB+chi2 model on bow feature.


100it [00:36,  2.71it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.894307   0.801539  0.953363  0.869504   0.353058  0.014223   

              model feature  
0  BernoulliNB+chi2     bow  
Experiment with BernoulliNB+chi2 model on bigram feature.


100it [00:35,  2.78it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.868393   0.793349  0.878288  0.831443   0.344701  0.013725   

              model feature  
0  BernoulliNB+chi2  bigram  
Experiment with BernoulliNB+chi2 model on tfidf+bigram feature.


100it [00:46,  2.15it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.872785   0.797351  0.883239    0.8363   0.442311  0.021612   

              model       feature  
0  BernoulliNB+chi2  tfidf+bigram  
Experiment with BernoulliNB+chi2 model on bow+bigram feature.


100it [01:06,  1.52it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.901885   0.824376  0.938681  0.875947   0.640889   0.01753   

              model     feature  
0  BernoulliNB+chi2  bow+bigram  
Experiment with BernoulliNB+chi2 model on tfidf+trigram feature.


100it [00:35,  2.79it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.899008    0.84524  0.894346  0.867611   0.336632  0.019657   

              model        feature  
0  BernoulliNB+chi2  tfidf+trigram  
Experiment with BernoulliNB+chi2 model on bow+trigram feature.


100it [00:48,  2.05it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.906096   0.846245  0.917305  0.878534    0.47153  0.015125   

              model      feature  
0  BernoulliNB+chi2  bow+trigram  
Experiment with BernoulliNB+chi2 model on tfidf+bigram+trigram feature.


100it [00:52,  1.91it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.845745   0.818713  0.757272  0.783845   0.499472  0.022875   

              model               feature  
0  BernoulliNB+chi2  tfidf+bigram+trigram  
Experiment with GaussianNB+chi2 model on tfidf feature.


100it [00:30,  3.31it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision   recall  f1_score  Train (s)  Test (s)  \
0  0.796393   0.830587  0.57347  0.674345   0.285693  0.015423   

             model feature  
0  GaussianNB+chi2   tfidf  
Experiment with GaussianNB+chi2 model on bow feature.


100it [00:38,  2.57it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.803406   0.843121  0.581857   0.68466   0.372887   0.01451   

             model feature  
0  GaussianNB+chi2     bow  
Experiment with GaussianNB+chi2 model on bigram feature.


100it [00:37,  2.65it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.793099   0.655562  0.942659  0.771032   0.362336  0.013845   

             model feature  
0  GaussianNB+chi2  bigram  
Experiment with GaussianNB+chi2 model on tfidf+bigram feature.


100it [00:47,  2.09it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision   recall  f1_score  Train (s)  Test (s)  \
0   0.80169   0.664085  0.94933  0.779314   0.456153  0.019779   

             model       feature  
0  GaussianNB+chi2  tfidf+bigram  
Experiment with GaussianNB+chi2 model on bow+bigram feature.


100it [01:07,  1.48it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.860615    0.74658  0.954034  0.835684   0.657628  0.018901   

             model     feature  
0  GaussianNB+chi2  bow+bigram  
Experiment with GaussianNB+chi2 model on tfidf+trigram feature.


100it [00:36,  2.71it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0   0.82025   0.758583  0.764772  0.758582   0.349588   0.01745   

             model        feature  
0  GaussianNB+chi2  tfidf+trigram  
Experiment with GaussianNB+chi2 model on bow+trigram feature.


100it [00:50,  1.97it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.855336   0.762162  0.891623  0.819989   0.490541  0.016301   

             model      feature  
0  GaussianNB+chi2  bow+trigram  
Experiment with GaussianNB+chi2 model on tfidf+bigram+trigram feature.


100it [00:53,  1.86it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.718427   0.571408  0.989197  0.722544   0.515164  0.021308   

             model               feature  
0  GaussianNB+chi2  tfidf+bigram+trigram  
Experiment with MultinomialNB+chi2 model on tfidf feature.


100it [00:21,  4.56it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.875062   0.896598  0.756328  0.818303   0.207066  0.010263   

                model feature  
0  MultinomialNB+chi2   tfidf  
Experiment with MultinomialNB+chi2 model on bow feature.


100it [00:30,  3.30it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.894022   0.802547  0.948391  0.868097   0.292614  0.009152   

                model feature  
0  MultinomialNB+chi2     bow  
Experiment with MultinomialNB+chi2 model on bigram feature.


100it [00:29,  3.39it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.813812    0.67318  0.977131  0.794535   0.284983  0.008744   

                model feature  
0  MultinomialNB+chi2  bigram  
Experiment with MultinomialNB+chi2 model on tfidf+bigram feature.


100it [00:39,  2.51it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.818056   0.677429  0.977858  0.798695   0.381844  0.014681   

                model       feature  
0  MultinomialNB+chi2  tfidf+bigram  
Experiment with MultinomialNB+chi2 model on bow+bigram feature.


100it [00:59,  1.68it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0   0.89589   0.799584  0.963138  0.872194    0.58022  0.012495   

                model     feature  
0  MultinomialNB+chi2  bow+bigram  
Experiment with MultinomialNB+chi2 model on tfidf+trigram feature.


100it [00:28,  3.49it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.860893   0.777802  0.879179  0.823384    0.27236  0.012205   

                model        feature  
0  MultinomialNB+chi2  tfidf+trigram  
Experiment with MultinomialNB+chi2 model on bow+trigram feature.


100it [00:42,  2.34it/s]
0it [00:00, ?it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0   0.90212   0.824694  0.939053  0.876759   0.415399  0.010624   

                model      feature  
0  MultinomialNB+chi2  bow+trigram  
Experiment with MultinomialNB+chi2 model on tfidf+bigram+trigram feature.


100it [00:45,  2.19it/s]

total num of splits: 100
../biocaser_result.csv  already exists, appending result to it
saving result ...
   accuracy  precision    recall  f1_score  Train (s)  Test (s)  \
0  0.835984    0.70418  0.968495  0.813918    0.43963  0.015777   

                model               feature  
0  MultinomialNB+chi2  tfidf+bigram+trigram  





In [8]:
data_df.iloc[1]["docs"]

'number told magandang libertad far declar bunawan confirm del municip abl sinc well fatal clarifi hospit dr doctor water ruth sur deep main town checkup said particularli fever pilipina place 30 servic agusan manag offic health improso outbreak document umaga 500 case control peopl gone sought wa close advis use went admit typhoid barangay resid sourc villag monday high deni ha'