# 9.3 Neural Network Classifiers
author: Rachel Nelson

class: DSC550-T302 Data Mining (2215-1)

1. Neural Network Classifier with Scikit
Using the multi-label classifier dataset from earlier exercises (categorized-comments.jsonl in the reddit folder), fit a neural network classifier using scikit-learn. Use the code found in chapter 12 of the Applied Text Analysis with Python book as a guideline. Report the accuracy, precision, recall, F1-score, and confusion matrix.
2. Neural Network Classifier with Keras
Using the multi-label classifier dataset from earlier exercises (categorized-comments.jsonl in the reddit folder), fit a neural network classifier using Keras. Use the code found in chapter 12 of the Applied Text Analysis with Python book as a guideline. Report the accuracy, precision, recall, F1-score, and confusion matrix.
3. Classifying Images
In chapter 20 of the Machine Learning with Python Cookbook, implement the code found in section 20.15 classify MSINT images using a convolutional neural network. Report the accuracy of your results.

1. Neural Network Classifier with Scikit
Report the accuracy, precision, recall, F1-score, and confusion matrix.

In [42]:
import jsonlines

2. Neural Network Classifier with Keras (page 277-279)

In [43]:
# Reading the JSON line file into a dataframe
newlist = []
with jsonlines.open('categorized-comments.jsonl') as f:
    for obj in f.iter(type=dict, skip_invalid=True):
        newlist.append(obj)
comments_df = pd.DataFrame(newlist)
comments_df.head(5)

Unnamed: 0,cat,txt
0,sports,Barely better than Gabbert? He was significant...
1,sports,Fuck the ducks and the Angels! But welcome to ...
2,sports,Should have drafted more WRs.\n\n- Matt Millen...
3,sports,[Done](https://i.imgur.com/2YZ90pm.jpg)
4,sports,No!! NOO!!!!!


In [44]:
import numpy as np

def documents(corpus):
    return list(corpus.reviews())

def continuous(corpus):
    return list(corpus.scores())

def make_categorical(corpus):
    """
    terrible : 0.0 < y <= 3.0
    okay : 3.0 < y <= 5.0
    great : 5.0 < y <= 7.0
    amazing : 7.0 < y <= 10.1
    """
    return np.digitize(continuous(corpus), [0.0, 3.0, 5.0, 7.0, 10.1])

In [45]:
import joblib
from sklearn.model_selection import cross_val_score

def train_model(path, model, continuous=True, saveto=None, cv=12):
    """
    Trains model from corpus at specified path; constructing cross-validation
    scores using the cv parameter, then fitting the model on the full data.
    Returns the scores.
    """
    # Load the corpus data and labels for classification
    corpus = PickledReviewsReader(path)
    X = documents(corpus)
    if continuous:
        y = continuous(corpus)
        scoring = 'r2_score'
    else:
        y = make_categorical(corpus)
        scoring = 'f1_score'

    # Compute cross-validation scores
    scores = cross_val_score(model, X, y, cv=cv, scoring=scoring)

    # Write to disk if specified
    if saveto:
        joblib.dump(model, saveto)

    # Fit the model on entire dataset
    model.fit(X, y)

    # Return scores
    return scores


I wanted to show you the error I kept getting here. I tried both transformer and transformers
and could not get the package to work for the next part

In [46]:
pip install transformer

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement transformer (from versions: none)
ERROR: No matching distribution found for transformer


In [47]:
if __name__ == '__main__':
    from transformers import TextNormalizer
    from reader import PickledReviewsReader

    from sklearn.pipeline import Pipeline
    from sklearn.neural_network import MLPRegressor, MLPClassifier
    from sklearn.feature_extraction.text import TfidfVectorizer

    # Path to postpreprocessed, part-of-speech tagged review corpus
    cpath = '../review_corpus_proc'
    regressor = Pipeline([
        ('norm', TextNormalizer()),
        ('tfidf', TfidfVectorizer()),
        ('ann', MLPRegressor(hidden_layer_sizes=[500,150], verbose=True))
        ])
    regression_scores = train_model(cpath, regressor, continuous=True)
    classifier = Pipeline([
        ('norm', TextNormalizer()),
        ('tfidf', TfidfVectorizer()),
        ('ann', MLPClassifier(hidden_layer_sizes=[500,150], verbose=True))
        ])
    classifer_scores = train_model(cpath, classifier, continuous=False)

ImportError: cannot import name 'TextNormalizer' from 'transformers' (unknown location)

2. Neural Network Classifier with Keras (page 281

In [None]:
from keras.layers import Dense
from keras.models import Sequential

N_FEATURES = 5000
N_CLASSES = 4

def build_network():
     """
     Create a function that returns a compiled neural network
     """
     nn = Sequential()
     nn.add(Dense(500, activation='relu', input_shape=(N_FEATURES,)))
     nn.add(Dense(150, activation='relu'))
     nn.add(Dense(N_CLASSES, activation='softmax'))
     nn.compile(
     loss='categorical_crossentropy',
     optimizer='adam',
     metrics=['accuracy']
     )
     return nn

In [None]:
if __name__ == '__main__':
    from sklearn.pipeline import Pipeline
    from transformer import TextNormalizer
    from keras.wrappers.scikit_learn import KerasClassifier
    from sklearn.feature_extraction.text import TfidfVectorizer

    pipeline = Pipeline([
        ('norm', TextNormalizer()),
        ('vect', TfidfVectorizer(max_features=N_FEATURES)),
        ('nn', KerasClassifier(build_fn=build_network,
        epochs=200,
        batch_size=128))
        ])

In [None]:
def train_model(path, model, saveto=None, cv=12):
    """
    Trains model from corpus at specified path and fits on full data.
    If a saveto dictionary is specified, writes Keras and Sklearn
    pipeline components to disk separately. Returns the scores.
    """
    corpus = PickledReviewsReader(path)
    X = documents(corpus)
    y = make_categorical(corpus)

    scores = cross_val_score(model, X, y, cv=cv, scoring='accuracy', n_jobs=-1)
    model.fit(X, y)

    if saveto:
        model.steps[-1][1].model.save(saveto['keras_model'])
        model.steps.pop(-1)
        joblib.dump(model, saveto['sklearn_pipe'])

    return scores

In [None]:
cpath = '../review_corpus_proc'
mpath = {
    'keras_model' : 'keras_nn.h5',
    'sklearn_pipe' : 'pipeline.pkl'
    }
scores = train_model(cpath, pipeline, saveto=mpath, cv=12)

3. Classifying Images (page 327)

In [None]:
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras import backend as K

In [None]:
# Set that the color channel value will be first
K.set_image_data_format("channels_first")

In [None]:
# Set seed
np.random.seed(0)

In [None]:
# Set image information
channels = 1
height = 28
width = 28

In [None]:
# Load data and target from MNIST data
(data_train, target_train), (data_test, target_test) = mnist.load_data()

In [None]:
# Reshape training image data into features
data_train = data_train.reshape(data_train.shape[0], channels, height, width)

In [None]:
# Reshape test image data into features
data_test = data_test.reshape(data_test.shape[0], channels, height, width)

In [None]:
# Rescale pixel intensity to between 0 and 1
features_train = data_train / 255
features_test = data_test / 255

In [None]:
# One-hot encode target
target_train = np_utils.to_categorical(target_train)
target_test = np_utils.to_categorical(target_test)
number_of_classes = target_test.shape[1]

In [None]:
# Start neural network
network = Sequential()

In [None]:
# Add convolutional layer with 64 filters, a 5x5 window, and ReLU activation function
network.add(Conv2D(filters=64,
                   kernel_size=(5, 5),
                   input_shape=(channels, width, height),
                   activation='relu'))

In [None]:
# Add max pooling layer with a 2x2 window
network.add(MaxPooling2D(pool_size=(2, 2)))

In [None]:
# Add dropout layer
network.add(Dropout(0.5))

In [None]:
# Add layer to flatten input
network.add(Flatten())

In [None]:
# # Add fully connected layer of 128 units with a ReLU activation function
network.add(Dense(128, activation="relu"))

In [None]:
# Add dropout layer
network.add(Dropout(0.5))

In [None]:
# Add fully connected layer with a softmax activation function
network.add(Dense(number_of_classes, activation="softmax"))

In [None]:
# Compile neural network
network.compile(loss="categorical_crossentropy", # Cross-entropy
                optimizer="rmsprop", # Root Mean Square Propagation
                metrics=["accuracy"]) # Accuracy performance metric

In [None]:
# Train neural network
network.fit(features_train, # Features
    target_train, # Target
    epochs=2, # Number of epochs
    verbose=0, # Don't print description after each epoch
    batch_size=1000, # Number of observations per batch
    validation_data=(features_test, target_test)) # Data for evaluation