# Neural Network Multiclass Example

This notebook runs the neural network algorithm on the 20 newsgroup data.

In [1]:
categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']
from sklearn.datasets import fetch_20newsgroups
twenty_train = fetch_20newsgroups(subset='train', 
                                  categories=categories, shuffle=True, random_state=42)
twenty_train.target_names

['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian']

In [2]:
# text preprocessing
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer

stopwords = set(stopwords.words('english'))
vectorizer = TfidfVectorizer(stop_words=stopwords, binary=True)

In [3]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, log_loss


pipe1 = Pipeline([
        ('tfidf', TfidfVectorizer()),
        ('neuralnet', MLPClassifier(solver='lbfgs', alpha=1e-5,
                   hidden_layer_sizes=(15, 7), random_state=1)),
         ])

pipe1.fit(twenty_train.data, twenty_train.target)

Pipeline(memory=None,
         steps=[('tfidf',
                 TfidfVectorizer(analyzer='word', binary=False,
                                 decode_error='strict',
                                 dtype=<class 'numpy.float64'>,
                                 encoding='utf-8', input='content',
                                 lowercase=True, max_df=1.0, max_features=None,
                                 min_df=1, ngram_range=(1, 1), norm='l2',
                                 preprocessor=None, smooth_idf=True,
                                 stop_words=None, strip_accents=None,
                                 sublinear_tf=False,
                                 token_pattern='...
                               batch_size='auto', beta_1=0.9, beta_2=0.999,
                               early_stopping=False, epsilon=1e-08,
                               hidden_layer_sizes=(15, 7),
                               learning_rate='constant',
                               learning_ra

In [4]:
# evaluate on test data
twenty_test = fetch_20newsgroups(subset='test', categories=categories, shuffle=True, random_state=42)
pred = pipe1.predict(twenty_test.data)

from sklearn import metrics
print(metrics.classification_report(twenty_test.target, pred,
     target_names=twenty_test.target_names))

print("Confusion matrix:\n", metrics.confusion_matrix(twenty_test.target, pred))

import numpy as np
print("\nOverall accuracy: ", np.mean(pred==twenty_test.target))

                        precision    recall  f1-score   support

           alt.atheism       0.96      0.73      0.83       319
         comp.graphics       0.93      0.89      0.91       389
               sci.med       0.93      0.84      0.88       396
soc.religion.christian       0.72      0.96      0.82       398

              accuracy                           0.86      1502
             macro avg       0.88      0.85      0.86      1502
          weighted avg       0.88      0.86      0.86      1502

Confusion matrix:
 [[234   2   9  74]
 [  0 345  13  31]
 [  5  16 332  43]
 [  6   9   2 381]]

Overall accuracy:  0.8601864181091877


The neural net classifier took longer to train and performed worse than logistic regression on this data. 

A network of (200, 67, 21, 7) got 88.5% accuracy.
A network of (100, 67, 15, 7) got 87% accuracy. 
A network of (15, 7) got 86% accuracy. 