In [1]:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import normalize
from py.utils import load_data
import pickle

directory = '../data/'
heads = ['l30_r15', 'l10_r10', 'l5_r5']
n_cv = 5

In [2]:
import pickle
performances = {}

for head in heads:
    print('\n\nhead = %s' % head)
    x, y, x_words, vocabs = load_data(head, directory)
    x = normalize(x)
    
    classifier = BernoulliNB()
    scores = cross_val_score(classifier, x, y, cv=n_cv)
    print('\nBernoulli Naive Bayes: ', end='')
    print(' > %s' % ['%.5f' % s for s in scores])
    performances[('BernoulliNB norm', head)] = scores
    with open('performance_other_classifier norm.pkl', 'wb') as f:
        pickle.dump(performances, f)
    classifier.fit(x, y)
    model_name = 'BernoulliNB norm ' + head
    with open('../models/%s.pkl' % model_name, 'wb') as f:
        pickle.dump(classifier, f)   
        
    
    classifier = MLPClassifier(hidden_layer_sizes=(5,))
    scores = cross_val_score(classifier, x, y, cv=n_cv)
    print('Multilayer Perceptron Classifier (h=[5]): ', end='')
    print(' > %s' % ['%.5f' % s for s in scores])
    performances[('MLPClassifier (5,) norm', head)] = scores
    with open('performance_other_classifier norm.pkl', 'wb') as f:
        pickle.dump(performances, f)
    classifier.fit(x, y)
    model_name = 'MLPClassifier (5,) norm' + head
    with open('../models/%s.pkl' % model_name, 'wb') as f:
        pickle.dump(classifier, f)   

    
    classifier = MLPClassifier(hidden_layer_sizes=(20,))
    scores = cross_val_score(classifier, x, y, cv=n_cv)
    print('Multilayer Perceptron Classifier (h=[20])', end='')
    print(' > %s' % ['%.5f' % s for s in scores])
    performances[('MLPClassifier (20,) norm', head)] = scores
    with open('performance_other_classifier norm.pkl', 'wb') as f:
        pickle.dump(performances, f)
    classifier.fit(x, y)
    model_name = 'MLPClassifier (20,) norm' + head
    with open('../models/%s.pkl' % model_name, 'wb') as f:
        pickle.dump(classifier, f)   

    
    classifier = MLPClassifier(hidden_layer_sizes=(50,10))
    scores = cross_val_score(classifier, x, y, cv=n_cv)
    print('Multilayer Perceptron Classifier (h=[50, 10]): ', end='')
    print(' > %s' % ['%.5f' % s for s in scores])
    performances[('MLPClassifier (50,10) norm', head)] = scores
    with open('performance_other_classifier norm.pkl', 'wb') as f:
        pickle.dump(performances, f)
    classifier.fit(x, y)
    model_name = 'MLPClassifier (50,10) norm' + head
    with open('../models/%s.pkl' % model_name, 'wb') as f:
        pickle.dump(classifier, f)   

    
    classifier = SVC(C=10.0, kernel='rbf',shrinking=True)
    scores = cross_val_score(classifier, x, y, cv=n_cv)
    print('Support Vector Machine (rbf, C=10.0): ', end='')
    print(' > %s' % ['%.5f' % s for s in scores])
    performances[('SVC (C=10) norm', head)] = scores
    with open('performance_other_classifier norm.pkl', 'wb') as f:
        pickle.dump(performances, f)
    classifier.fit(x, y)
    model_name = 'Support Vector Machine (rbf, C=10.0) norm' + head
    with open('../models/%s.pkl' % model_name, 'wb') as f:
        pickle.dump(classifier, f)   

    
    classifier = SVC(C=1.0, kernel='rbf',shrinking=True)
    scores = cross_val_score(classifier, x, y, cv=n_cv)
    print('Support Vector Machine (rbf, C=1.0): ', end='')
    print(' > %s' % ['%.5f' % s for s in scores])
    performances[('SVC (C=1.0) norm', head)] = scores
    with open('performance_other_classifier norm.pkl', 'wb') as f:
        pickle.dump(performances, f)
    classifier.fit(x, y)
    model_name = 'SVC (C=1.0) norm' + head
    with open('../models/%s.pkl' % model_name, 'wb') as f:
        pickle.dump(classifier, f)   

    
    classifier = SVC(C=0.1, kernel='rbf',shrinking=True)
    scores = cross_val_score(classifier, x, y, cv=n_cv)
    print('Support Vector Machine (rbf, C=0.1): ', end='')
    print(' > %s' % ['%.5f' % s for s in scores])
    performances[('SVC (C=0.1) norm', head)] = scores
    with open('performance_other_classifier norm.pkl', 'wb') as f:
        pickle.dump(performances, f)
    classifier.fit(x, y)
    model_name = 'SVC (C=0.1) norm' + head
    with open('../models/%s.pkl' % model_name, 'wb') as f:
        pickle.dump(classifier, f)   

    
    print('-' * 80)



head = l30_r15
x shape = (15166, 2617)
y shape = (15166,)
# features = 2617
# L words = 15166

Bernoulli Naive Bayes:  > ['0.99011', '0.98681', '0.98648', '0.98615', '0.98582']




Multilayer Perceptron Classifier (h=[5]):  > ['0.99308', '0.98879', '0.98978', '0.99143', '0.99110']
Multilayer Perceptron Classifier (h=[20]) > ['0.99176', '0.98978', '0.98879', '0.99110', '0.99011']
Multilayer Perceptron Classifier (h=[50, 10]):  > ['0.99308', '0.98879', '0.98912', '0.99143', '0.98912']
Support Vector Machine (rbf, C=10.0):  > ['0.98583', '0.97857', '0.98351', '0.98220', '0.98318']
Support Vector Machine (rbf, C=1.0):  > ['0.83454', '0.83482', '0.83482', '0.83482', '0.83482']
Support Vector Machine (rbf, C=0.1):  > ['0.83454', '0.83482', '0.83482', '0.83482', '0.83482']
--------------------------------------------------------------------------------


head = l10_r10
x shape = (31797, 3297)
y shape = (31797,)
# features = 3297
# L words = 31797

Bernoulli Naive Bayes:  > ['0.98145', '0.98302', '0.98207', '0.98081', '0.98066']




Multilayer Perceptron Classifier (h=[5]):  > ['0.98726', '0.98884', '0.98789', '0.98695', '0.98679']
Multilayer Perceptron Classifier (h=[20]) > ['0.98695', '0.98805', '0.98695', '0.98569', '0.98710']
Multilayer Perceptron Classifier (h=[50, 10]):  > ['0.98726', '0.98711', '0.98758', '0.98600', '0.98789']
Support Vector Machine (rbf, C=10.0):  > ['0.97626', '0.97736', '0.97908', '0.97688', '0.97327']
Support Vector Machine (rbf, C=1.0):  > ['0.87013', '0.87013', '0.87011', '0.87011', '0.87011']
Support Vector Machine (rbf, C=0.1):  > ['0.87013', '0.87013', '0.87011', '0.87011', '0.87011']
--------------------------------------------------------------------------------


head = l5_r5
x shape = (50764, 4995)
y shape = (50764,)
# features = 4995
# L words = 50764

Bernoulli Naive Bayes:  > ['0.97853', '0.97538', '0.97725', '0.97626', '0.97419']




Multilayer Perceptron Classifier (h=[5]):  > ['0.98670', '0.98355', '0.98434', '0.98424', '0.98463']
Multilayer Perceptron Classifier (h=[20]) > ['0.98532', '0.98247', '0.98464', '0.98483', '0.98375']
Multilayer Perceptron Classifier (h=[50, 10]):  > ['0.98641', '0.98385', '0.98532', '0.98483', '0.98414']
Support Vector Machine (rbf, C=10.0):  > ['0.96819', '0.96760', '0.96829', '0.96819', '0.96277']
Support Vector Machine (rbf, C=1.0):  > ['0.89097', '0.89097', '0.89097', '0.89097', '0.89106']
Support Vector Machine (rbf, C=0.1):  > ['0.89097', '0.89097', '0.89097', '0.89097', '0.89106']
--------------------------------------------------------------------------------
