In [1]:
from spam_detection import *

preprocessings = [
    TfidfVectorizer(stop_words='english', max_df=0.7), 
    CountVectorizer(ngram_range=(1,1), # to use bigrams ngram_range=(2,2)
                    stop_words='english')             
                 ]

classifiers = [MultinomialNB(),
               PassiveAggressiveClassifier(),
               LogisticRegression()
              ]
parameters = [
    # Naive Bayes
    {
        'alpha': [0.1, 0.2, 0.5, 1.]
    },
    # Passive Aggressive
    {
        'max_iter': [10, 20, 50, 100],
         'C': [0.1, 0.5, 1.],
         'loss': ['hinge', 'squared_hinge']
    },
    # Logistic Regression
    {
        'penalty': ['l1', 'l2'],
        'C': [0.1, 0.5, 1., 2.],
        'max_iter': [20, 50, 100, 200]
    }
]

detector = SpamDetection('news.csv', 'text', 'label')
for prep in preprocessings:
    for clf, params in zip(classifiers, parameters):
        print(prep.__class__.__name__, clf.__class__.__name__)
        detector.set_methods(prep, clf)
        detector.set_parameters(params)
        detector.run()

TfidfVectorizer MultinomialNB
Running  function
Start timing at:  1624631023.4990184
Running  function
Start timing at:  1624631023.4990184
----- Running data preprocessing. Currently nothing to do...
Time elapsed:  0.0
Running  function
Start timing at:  1624631023.5000172
Time elapsed:  4.3902809619903564
Running  function
Start timing at:  1624631027.8902981




Time elapsed:  0.7240297794342041
Running  function
Start timing at:  1624631028.614328
accuracy_score:  0.9005524861878453
confusion matrix for  MultinomialNB :  [[525  96]
 [ 30 616]]
Time elapsed:  0.008976221084594727
Time elapsed:  5.124285697937012
TfidfVectorizer PassiveAggressiveClassifier
Running  function
Start timing at:  1624631028.6233041
Running  function
Start timing at:  1624631028.6243012
----- Running data preprocessing. Currently nothing to do...
Time elapsed:  0.0
Running  function
Start timing at:  1624631028.6243012
Time elapsed:  3.862215757369995
Running  function
Start timing at:  1624631032.486517




Time elapsed:  14.497930526733398
Running  function
Start timing at:  1624631046.9844475
accuracy_score:  0.9313338595106551
confusion matrix for  PassiveAggressiveClassifier :  [[580  41]
 [ 46 600]]
Time elapsed:  0.012964010238647461
Time elapsed:  18.37311029434204
TfidfVectorizer LogisticRegression
Running  function
Start timing at:  1624631046.9974115
Running  function
Start timing at:  1624631046.9974115
----- Running data preprocessing. Currently nothing to do...
Time elapsed:  0.0
Running  function
Start timing at:  1624631046.9974115
Time elapsed:  4.271599769592285
Running  function
Start timing at:  1624631051.2690113




Time elapsed:  16.92227268218994
Running  function
Start timing at:  1624631068.1922827
accuracy_score:  0.9187056037884768
confusion matrix for  LogisticRegression :  [[578  43]
 [ 60 586]]
Time elapsed:  0.007977008819580078
Time elapsed:  21.202848196029663
CountVectorizer MultinomialNB
Running  function
Start timing at:  1624631068.2002597
Running  function
Start timing at:  1624631068.2002597
----- Running data preprocessing. Currently nothing to do...
Time elapsed:  0.0
Running  function
Start timing at:  1624631068.201257
Time elapsed:  3.8576762676239014
Running  function
Start timing at:  1624631072.059931




Time elapsed:  0.6911492347717285
Running  function
Start timing at:  1624631072.7510803
accuracy_score:  0.8958168902920284
confusion matrix for  MultinomialNB :  [[529  92]
 [ 40 606]]
Time elapsed:  0.008975744247436523
Time elapsed:  4.559796333312988
CountVectorizer PassiveAggressiveClassifier
Running  function
Start timing at:  1624631072.7610526
Running  function
Start timing at:  1624631072.7610526
----- Running data preprocessing. Currently nothing to do...
Time elapsed:  0.0
Running  function
Start timing at:  1624631072.762052
Time elapsed:  3.5601816177368164
Running  function
Start timing at:  1624631076.3232365




Time elapsed:  17.217880964279175
Running  function
Start timing at:  1624631093.5421145
accuracy_score:  0.8997632202052092
confusion matrix for  PassiveAggressiveClassifier :  [[555  66]
 [ 61 585]]
Time elapsed:  0.014959573745727539
Time elapsed:  20.796021461486816
CountVectorizer LogisticRegression
Running  function
Start timing at:  1624631093.5580726
Running  function
Start timing at:  1624631093.5590684
----- Running data preprocessing. Currently nothing to do...
Time elapsed:  0.0
Running  function
Start timing at:  1624631093.5590684
Time elapsed:  4.859024524688721
Running  function
Start timing at:  1624631098.4191043




Time elapsed:  66.96713495254517
Running  function
Start timing at:  1624631165.3862393
accuracy_score:  0.9226519337016574
confusion matrix for  LogisticRegression :  [[580  41]
 [ 57 589]]
Time elapsed:  0.008978128433227539
Time elapsed:  71.83614897727966
