In [23]:
#@title MIT License
#
# Copyright (c) 2020 Balázs Pintér 
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

In [24]:
import numpy as np
import scipy
import scipy.sparse as sps
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import pandas as pd
import tensorflow as tf
from nltk.corpus import movie_reviews

In [25]:
num_neurons = 20
num_features = 5000

In [26]:
# getting the data, like last time

corpus, targets = zip(*[(movie_reviews.raw(fileid), category)
                         for category in movie_reviews.categories() for fileid in movie_reviews.fileids(category)])

count_vectorizer = CountVectorizer(stop_words='english', max_df=0.95, min_df=2, max_features=num_features)
bows = count_vectorizer.fit_transform(corpus)

# convert targets to numbers
targets = np.array([0 if target == 'neg' else 1 for target in targets])

bows = bows.astype(np.float32)
targets = targets.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(bows, targets, test_size=0.1, shuffle=True)

# the problem: we have sparse arrays, but neural network need dense arrays!
# the solution will be word embeddings, here we just convert to dense arrays
X_train = X_train.toarray()
X_test = X_test.toarray()

In [27]:
# in TensorFlow 2.0, we can see the tensor instantly (same as eager mode in TensorFlow 1.0)
tf.ones((4, 4))

<tf.Tensor: shape=(4, 4), dtype=float32, numpy=
array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]], dtype=float32)>

In [28]:
# we also see the results of an operation instantly
tf.add(tf.constant([1, 2, 3]), tf.constant([4, 5, 6]))

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([5, 7, 9], dtype=int32)>

In [29]:
tf.zeros((4, 4)) + 4

<tf.Tensor: shape=(4, 4), dtype=float32, numpy=
array([[4., 4., 4., 4.],
       [4., 4., 4., 4.],
       [4., 4., 4., 4.],
       [4., 4., 4., 4.]], dtype=float32)>

In [79]:
# variables are for the parameters (weights) for the NN
####### WRONG
W1 = tf.Variable(tf.random.normal((num_features, num_neurons), stddev=0.01), name='W1')
b1 = tf.Variable(tf.random.normal((num_features, num_neurons), stddev=0.01), name='b1')
W2 = tf.Variable(tf.random.normal((num_neurons, 1), stddev=0.01), name='W2')
b2 = tf.Variable(tf.random.normal((num_neurons, 1), stddev=0.01), name='b2')
W3 = tf.Variable(tf.random.normal((1, num_neurons), stddev=0.01), name='W3')
b3 = tf.Variable(tf.random.normal((1, 1), stddev=0.01), name='b3')
W1, b1, W2, b2
####### WRONG

(<tf.Variable 'W1:0' shape=(5000, 20) dtype=float32, numpy=
 array([[ 0.01592522, -0.00419015,  0.00619319, ..., -0.00845196,
          0.00620717, -0.00735237],
        [-0.00192597, -0.01602287,  0.01834721, ...,  0.00972443,
          0.00741144, -0.00854362],
        [-0.00443492, -0.0035197 , -0.0140771 , ...,  0.0019029 ,
         -0.01177116, -0.0007234 ],
        ...,
        [ 0.00117155,  0.00637321,  0.00346894, ..., -0.01635621,
          0.00848693,  0.01190089],
        [-0.00217524,  0.00598196, -0.00262365, ...,  0.00253259,
         -0.00359229, -0.01129465],
        [-0.00057631,  0.00029861,  0.02240941, ...,  0.01077843,
          0.00327318, -0.01328602]], dtype=float32)>,
 <tf.Variable 'b1:0' shape=(5000, 20) dtype=float32, numpy=
 array([[-0.01260709, -0.00986665,  0.01458601, ...,  0.00301094,
         -0.00341765, -0.00124887],
        [ 0.00594413, -0.00910233,  0.00886685, ..., -0.00310771,
         -0.01121224,  0.00931075],
        [-0.00040262, -0.00140321

In [80]:
# the network itself
def forward_pass(X):
    z = tf.add(tf.matmul(W1, X), b1)
    a = tf.nn.relu(z)
    z = tf.add(tf.matmul(W2, a), b2)
    a = tf.nn.relu(z)
    z = tf.add(tf.matmul(W3, a), b3)
    #a = tf.nn.sigmoid(z) # already in loss function
    return z

In [81]:
# the loss function
def loss(a, y):
    return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=a, labels=y))

In [82]:
def minibatches(X, y, minibatch_size):

# shuffling    
    perm = np.random.permutation(X.shape[0])
    X_shuffled = X[perm]
    y_shuffled = y[perm]
    
    mini_batches = [(X_shuffled[i*minibatch_size:(i+1)*minibatch_size], y_shuffled[i*minibatch_size:(i+1)*minibatch_size])
                   for i in range(X_shuffled.shape[0] // minibatch_size)]
# we lost some examples at the end, doesn't really matter for this example
    return mini_batches

In [83]:
num_epochs = 10

$w_{11} := w_{11} - lr*\frac{loss}{\delta w_{11}}$

In [84]:
learning_rate = 0.3
minibatch_size = 32
for epoch in range(num_epochs):
# minibatch training
    epoch_loss = 0
    num_minibatches = X_train.shape[0] // minibatch_size
    for X_mini, y_mini in minibatches(X_train, y_train, minibatch_size):
# we use a GradientTape to record the gradient for each minibatch
        with tf.GradientTape() as t:
            # important: we have to transpose here! each example is in a column
            mini_loss = loss(forward_pass(X_mini.T), y_mini[None, :])            
        # update the weights
        dW1, db1, dW2, db2, dW3, db3 = t.gradient(mini_loss, [W1, b1, W2, b2, W3, b3])
        W1.assign_sub(learning_rate * dW1)
        b1.assign_sub(learning_rate * db1)
        W2.assign_sub(learning_rate * dW2)
        b2.assign_sub(learning_rate * db2)
        W3.assign_sub(learning_rate * dW3)
        b3.assign_sub(learning_rate * db3)
        epoch_loss += mini_loss
    epoch_loss /= num_minibatches
    predictions = forward_pass(X_train.T)
    predictions = np.array([0 if pred < 0 else 1 for pred in tf.squeeze(predictions)])
    accuracy = (y_train == predictions).sum() / len(y_train)
    print("Loss in epoch {}: {}, accuracy: {}".format(epoch, epoch_loss, accuracy))
# accuracy on test set
predictions = forward_pass(X_test.T)
predictions = np.array([0 if pred < 0 else 1 for pred in tf.squeeze(predictions)])
accuracy = (y_test == predictions).sum() / len(y_test)
print("Accuracy on test set: {}".format(accuracy))

InvalidArgumentError: Matrix size-incompatible: In[0]: [5000,20], In[1]: [5000,32] [Op:MatMul]