# Neural network sentiment analysis

In [1]:
!pip install trax==1.3.1 #Use this version for this notebook 

Collecting trax==1.3.1
  Downloading trax-1.3.1-py2.py3-none-any.whl (347 kB)
[K     |████████████████████████████████| 347 kB 3.0 MB/s eta 0:00:01
[?25hCollecting jax
  Downloading jax-0.2.7.tar.gz (520 kB)
[K     |████████████████████████████████| 520 kB 27.2 MB/s eta 0:00:01
[?25hCollecting funcsigs
  Downloading funcsigs-1.0.2-py2.py3-none-any.whl (17 kB)
Collecting tensor2tensor
  Downloading tensor2tensor-1.15.7-py2.py3-none-any.whl (1.4 MB)
[K     |████████████████████████████████| 1.4 MB 14.4 MB/s eta 0:00:01
[?25hCollecting tensorflow-datasets
  Downloading tensorflow_datasets-4.1.0-py3-none-any.whl (3.6 MB)
[K     |████████████████████████████████| 3.6 MB 15.2 MB/s eta 0:00:01
Collecting gin-config
  Downloading gin_config-0.4.0-py2.py3-none-any.whl (46 kB)
[K     |████████████████████████████████| 46 kB 2.9 MB/s eta 0:00:01
Collecting jaxlib
  Downloading jaxlib-0.1.57-cp38-none-macosx_10_9_x86_64.whl (42.7 MB)
[K     |████████████████████████████████| 42.7 MB 12.8 

Collecting typeguard>=2.7
  Downloading typeguard-2.10.0-py3-none-any.whl (16 kB)
Collecting httplib2>=0.9.1
  Downloading httplib2-0.18.1-py3-none-any.whl (95 kB)
[K     |████████████████████████████████| 95 kB 7.1 MB/s  eta 0:00:01
Collecting pygame>=1.9.2
  Downloading pygame-2.0.0-cp38-cp38-macosx_10_9_intel.whl (6.9 MB)
[K     |████████████████████████████████| 6.9 MB 16.0 MB/s eta 0:00:01
[?25hCollecting flax>=0.2.0
  Downloading flax-0.3.0-py3-none-any.whl (154 kB)
[K     |████████████████████████████████| 154 kB 8.4 MB/s eta 0:00:01
Collecting uritemplate<4dev,>=3.0.0
  Downloading uritemplate-3.0.1-py2.py3-none-any.whl (15 kB)
Collecting google-auth-httplib2>=0.0.3
  Downloading google_auth_httplib2-0.0.4-py2.py3-none-any.whl (9.1 kB)
Collecting portalocker
  Downloading portalocker-2.0.0-py2.py3-none-any.whl (11 kB)
Building wheels for collected packages: jax, gym, pypng, bz2file, promise


  Building wheel for jax (setup.py) ... [?25ldone
[?25h  Created wheel for jax: filename=jax-0.2.7-py3-none-any.whl size=606843 sha256=30a20021263b5eb3b75babecf19f3057d485578446a8c270e192fd94d7420d7e
  Stored in directory: /Users/sdeshpande/Library/Caches/pip/wheels/a5/3a/c2/f1de3b2efbfa6ca8229969acd6e3a028479b72d5d318f6ec78
  Building wheel for gym (setup.py) ... [?25ldone
[?25h  Created wheel for gym: filename=gym-0.17.3-py3-none-any.whl size=1654654 sha256=8c0aa1e8e997db6c2ecce23a2ce546e46a0fd8b69f6aae0b096f6135540c7bd6
  Stored in directory: /Users/sdeshpande/Library/Caches/pip/wheels/84/40/e7/14efb9870cfc92ac236d78cb721dce614ddec9666c8a5e0a35
  Building wheel for pypng (setup.py) ... [?25ldone
[?25h  Created wheel for pypng: filename=pypng-0.0.20-py3-none-any.whl size=67162 sha256=c6489b271124bd29789b8c5b1f727ae0987aae164a8b94bd112ff470b08ae4b1
  Stored in directory: /Users/sdeshpande/Library/Caches/pip/wheels/3a/ad/91/4f6a5e9f3db79c28e71e7b59099dce8a75618a34ff415d44b1
  Bui

In [2]:
import numpy as np  # regular ol' numpy

from trax import layers as tl  # core building block
from trax import shapes  # data signatures: dimensionality and type
from trax import fastmath  # uses jax, offers numpy on steroids



In [3]:
# Trax version 1.3.1 or better 
!pip list | grep trax

trax                               1.3.1


# Relu Layer

In [5]:
# Layers
# Create a relu trax layer
relu = tl.Relu()

# Inspect properties
print("-- Properties --")
print("name :", relu.name)
print("expected inputs :", relu.n_in)
print("promised outputs :", relu.n_out, "\n")

# Inputs
x = np.array([-2, -1, 0, 1, 2])
print("-- Inputs --")
print("x :", x, "\n")

# Outputs
y = relu(x)
print("-- Outputs --")
print("y :", y)



-- Properties --
name : Relu
expected inputs : 1
promised outputs : 1 

-- Inputs --
x : [-2 -1  0  1  2] 

-- Outputs --
y : [0 0 0 1 2]


# Concatenate Layer

In [6]:
# Create a concatenate trax layer
concat = tl.Concatenate()
print("-- Properties --")
print("name :", concat.name)
print("expected inputs :", concat.n_in)
print("promised outputs :", concat.n_out, "\n")

# Inputs
x1 = np.array([-10, -20, -30])
x2 = x1 / -10
print("-- Inputs --")
print("x1 :", x1)
print("x2 :", x2, "\n")

# Outputs
y = concat([x1, x2])
print("-- Outputs --")
print("y :", y)

-- Properties --
name : Concatenate
expected inputs : 2
promised outputs : 1 

-- Inputs --
x1 : [-10 -20 -30]
x2 : [1. 2. 3.] 

-- Outputs --
y : [-10. -20. -30.   1.   2.   3.]


# Layers are Configurable

In [7]:
# Configure a concatenate layer
concat_3 = tl.Concatenate(n_items=3)  # configure the layer's expected inputs
print("-- Properties --")
print("name :", concat_3.name)
print("expected inputs :", concat_3.n_in)
print("promised outputs :", concat_3.n_out, "\n")

# Inputs
x1 = np.array([-10, -20, -30])
x2 = x1 / -10
x3 = x2 * 0.99
print("-- Inputs --")
print("x1 :", x1)
print("x2 :", x2)
print("x3 :", x3, "\n")

# Outputs
y = concat_3([x1, x2, x3])
print("-- Outputs --")
print("y :", y)

-- Properties --
name : Concatenate
expected inputs : 3
promised outputs : 1 

-- Inputs --
x1 : [-10 -20 -30]
x2 : [1. 2. 3.]
x3 : [0.99 1.98 2.97] 

-- Outputs --
y : [-10.   -20.   -30.     1.     2.     3.     0.99   1.98   2.97]


In [None]:
#help(tl.Concatenate) #Uncomment this to see the function docstring with explaination

# Layers can have Weights

In [8]:
# Uncomment any of them to see information regarding the function
# help(tl.LayerNorm)
# help(shapes.signature)

In [9]:
# Layer initialization
norm = tl.LayerNorm()
# You first must know what the input data will look like
x = np.array([0, 1, 2, 3], dtype="float")

# Use the input data signature to get shape and type for initializing weights and biases
norm.init(shapes.signature(x)) # We need to convert the input datatype from usual tuple to trax ShapeDtype

print("Normal shape:",x.shape, "Data Type:",type(x.shape))
print("Shapes Trax:",shapes.signature(x),"Data Type:",type(shapes.signature(x)))

# Inspect properties
print("-- Properties --")
print("name :", norm.name)
print("expected inputs :", norm.n_in)
print("promised outputs :", norm.n_out)
# Weights and biases
print("weights :", norm.weights[0])
print("biases :", norm.weights[1], "\n")

# Inputs
print("-- Inputs --")
print("x :", x)

# Outputs
y = norm(x)
print("-- Outputs --")
print("y :", y)

Normal shape: (4,) Data Type: <class 'tuple'>
Shapes Trax: ShapeDtype{shape:(4,), dtype:float64} Data Type: <class 'trax.shapes.ShapeDtype'>
-- Properties --
name : LayerNorm
expected inputs : 1
promised outputs : 1
weights : [1. 1. 1. 1.]
biases : [0. 0. 0. 0.] 

-- Inputs --
x : [0. 1. 2. 3.]
-- Outputs --
y : [-1.3416404  -0.44721344  0.44721344  1.3416404 ]




# Custom Layers

In [None]:
help(tl.Fn)

In [10]:
# Define a custom layer
# In this example you will create a layer to calculate the input times 2

def TimesTwo():
    layer_name = "TimesTwo" #don't forget to give your custom layer a name to identify

    # Custom function for the custom layer
    def func(x):
        return x * 2

    return tl.Fn(layer_name, func)


# Test it
times_two = TimesTwo()

# Inspect properties
print("-- Properties --")
print("name :", times_two.name)
print("expected inputs :", times_two.n_in)
print("promised outputs :", times_two.n_out, "\n")

# Inputs
x = np.array([1, 2, 3])
print("-- Inputs --")
print("x :", x, "\n")

# Outputs
y = times_two(x)
print("-- Outputs --")
print("y :", y)

-- Properties --
name : TimesTwo
expected inputs : 1
promised outputs : 1 

-- Inputs --
x : [1 2 3] 

-- Outputs --
y : [2 4 6]


# Combinators

In [None]:
# help(tl.Serial)
# help(tl.Parallel)

In [11]:
# Serial combinator
serial = tl.Serial(
    tl.LayerNorm(),         # normalize input
    tl.Relu(),              # convert negative values to zero
    times_two,              # the custom layer you created above, multiplies the input recieved from above by 2
    
    ### START CODE HERE
#     tl.Dense(n_units=2),  # try adding more layers. eg uncomment these lines
#     tl.Dense(n_units=1),  # Binary classification, maybe? uncomment at your own peril
#     tl.LogSoftmax()       # Yes, LogSoftmax is also a layer
    ### END CODE HERE
)

# Initialization
x = np.array([-2, -1, 0, 1, 2]) #input
serial.init(shapes.signature(x)) #initialising serial instance

print("-- Serial Model --")
print(serial,"\n")
print("-- Properties --")
print("name :", serial.name)
print("sublayers :", serial.sublayers)
print("expected inputs :", serial.n_in)
print("promised outputs :", serial.n_out)
print("weights & biases:", serial.weights, "\n")

# Inputs
print("-- Inputs --")
print("x :", x, "\n")

# Outputs
y = serial(x)
print("-- Outputs --")
print("y :", y)

-- Serial Model --
Serial[
  LayerNorm
  Relu
  TimesTwo
] 

-- Properties --
name : Serial
sublayers : [LayerNorm, Relu, TimesTwo]
expected inputs : 1
promised outputs : 1
weights & biases: [(DeviceArray([1, 1, 1, 1, 1], dtype=int32), DeviceArray([0, 0, 0, 0, 0], dtype=int32)), (), ()] 

-- Inputs --
x : [-2 -1  0  1  2] 

-- Outputs --
y : [0.        0.        0.        1.4142132 2.8284264]




# JAX

In [12]:
# Numpy vs fastmath.numpy have different data types
# Regular ol' numpy
x_numpy = np.array([1, 2, 3])
print("good old numpy : ", type(x_numpy), "\n")

# Fastmath and jax numpy
x_jax = fastmath.numpy.array([1, 2, 3])
print("jax trax numpy : ", type(x_jax))

good old numpy :  <class 'numpy.ndarray'> 

jax trax numpy :  <class 'jax.interpreters.xla._DeviceArray'>


# Classes and subclasses

# Part 1: Parameters, methods and instances

In [13]:
class My_Class: #Definition of My_class
    x = None   

In [14]:
instance_a= My_Class() #To create an instance from class "My_Class" you have to call "My_Class"
instance_b= My_Class()
print('Parameter x of instance_a: ' + str(instance_a.x)) #To get a parameter 'x' from an instance 'a', write 'a.x'
print('Parameter x of instance_b: ' + str(instance_b.x))

Parameter x of instance_a: None
Parameter x of instance_b: None


In [15]:
### START CODE HERE (1 line) ### 
instance_a.x = 5
### END CODE HERE ###
print('Parameter x of instance_a: ' + str(instance_a.x))

Parameter x of instance_a: 5


# 1.1 The __init__ method

When you want to assign values to the parameters of your class when an instance is created, it is necessary to define a special method: __init__. The __init__ method is called when you create an instance of a class. It can have multiple arguments to initialize the paramenters of your instance. In the next cell I will define My_Class with an __init__ method that takes the instance (self) and an argument y as inputs.

In [17]:
class My_Class: 
    def __init__(self, y): # The __init__ method takes as input the instance to be initialized and a variable y
        self.x = y         # Sets parameter x to be equal to y

In [18]:
### START CODE HERE (1 line) ### 
instance_c = My_Class(10)
### END CODE HERE ###
print('Parameter x of instance_c: ' + str(instance_c.x))

Parameter x of instance_c: 10


# 1.2 The __call__ method

Another important method is the __call__ method. It is performed whenever you call an initialized instance of a class. It can have multiple arguments and you can define it to do whatever you want like


In [19]:
class My_Class: 
    def __init__(self, y): # The __init__ method takes as input the instance to be initialized and a variable y
        self.x = y         # Sets parameter x to be equal to y
    def __call__(self, z): # __call__ method with self and z as arguments
        self.x += z        # Adds z to parameter x when called 
        print(self.x)

In [20]:
instance_d = My_Class(5)

In [21]:
instance_d(10)

15


In [22]:
class My_Class: 
    def __init__(self, y, z): #Initialization of x_1 and x_2 with arguments y and z
        ### START CODE HERE (2 lines) ### 
        self.x_1 = y
        self.x_2 = z
        ### END CODE HERE ###
    def __call__(self):       #When called, adds the values of parameters x_1 and x_2, prints and returns the result 
        ### START CODE HERE (1 line) ### 
        result = self.x_1 + self.x_2 
        ### END CODE HERE ### 
        print("Addition of {} and {} is {}".format(self.x_1,self.x_2,result))
        return result

In [23]:
instance_e = My_Class(10,15)
def test_class_definition():
    
    assert instance_e.x_1 == 10, "Check the value assigned to x_1"
    assert instance_e.x_2 == 15, "Check the value assigned to x_2"
    assert instance_e() == 25, "Check the __call__ method"
    
    print("\033[92mAll tests passed!")
    
test_class_definition()

Addition of 10 and 15 is 25
[92mAll tests passed!


# 1.3 Custom methods
In addition to the __init__ and __call__ methods, your classes can have custom-built methods to do whatever you want when called. To define a custom method, you have to indicate its input arguments, the instructions that you want it to perform and the values to return (if any). In the next cell, My_Class is defined with my_method that multiplies the values of x_1 and x_2, sums that product with an input w, and returns the result

In [24]:
class My_Class: 
    def __init__(self, y, z): #Initialization of x_1 and x_2 with arguments y and z
        self.x_1 = y
        self.x_2 = z
    def __call__(self):       #Performs an operation with x_1 and x_2, and returns the result
        a = self.x_1 - 2*self.x_2 
        return a
    def my_method(self, w):   #Multiplies x_1 and x_2, adds argument w and returns the result
        result = self.x_1*self.x_2 + w
        return result

In [25]:
### START CODE HERE (1 line) ### 
instance_f = My_Class(1,10)
### END CODE HERE ### 
print("Output of my_method:",instance_f.my_method(16))

Output of my_method: 26


In [27]:
# hidden-cell
class My_Class: 
    def __init__(self, y, z):      #Initialization of x_1 and x_2 with arguments y and z
        self.x_1 = y
        self.x_2 = z
    def __call__(self):            #Performs an operation with x_1 and x_2, and returns the result
        a = self.x_1 - 2*self.x_2 
        return a
    def my_method(self, w):        #Multiplies x_1 and x_2, adds argument w and returns the result
        b = self.x_1*self.x_2 + w
        return b
    def new_method(self, v):       #Calls My_method with argument v
        result = self.my_method(v)
        return result

In [28]:
instance_g = My_Class(1,10)
print("Output of my_method:",instance_g.my_method(16))
print("Output of new_method:",instance_g.new_method(16))

Output of my_method: 26
Output of new_method: 26


# Part 2: Subclasses and Inheritance

In [29]:
class sub_c(My_Class):           #Subclass sub_c from My_class
    def additional_method(self): #Prints the value of parameter x_1
        print(self.x_1)

# 2.1 Inheritance

In [30]:
instance_sub_a = sub_c(1,10)
print('Parameter x_1 of instance_sub_a: ' + str(instance_sub_a.x_1))
print('Parameter x_2 of instance_sub_a: ' + str(instance_sub_a.x_2))
print("Output of my_method of instance_sub_a:",instance_sub_a.my_method(16))

Parameter x_1 of instance_sub_a: 1
Parameter x_2 of instance_sub_a: 10
Output of my_method of instance_sub_a: 26


In [31]:
class sub_c(My_Class):           #Subclass sub_c from My_class
    def my_method(self):         #Multiplies x_1 and x_2 and returns the result
        ### START CODE HERE (1 line) ###
        b = self.x_1*self.x_2 
        ### END CODE HERE ###
        return b

In [32]:
test = sub_c(3,10)
assert test.my_method() == 30, "The method my_method should return the product between x_1 and x_2"

print("Output of overridden my_method of test:",test.my_method()) #notice we didn't pass any parameter to call my_method
#print("Output of overridden my_method of test:",test.my_method(16)) #try to see what happens if you call it with 1 argument

Output of overridden my_method of test: 30


In [33]:
y,z= 1,10
instance_sub_a = sub_c(y,z)
instance_a = My_Class(y,z)
print('My_method for an instance of sub_c returns: ' + str(instance_sub_a.my_method()))
print('My_method for an instance of My_Class returns: ' + str(instance_a.my_method(10)))

My_method for an instance of sub_c returns: 10
My_method for an instance of My_Class returns: 20


# Data generators

In [34]:
import random 
import numpy as np

# Example of traversing a list of indexes to create a circular list
a = [1, 2, 3, 4]
b = [0] * 10

a_size = len(a)
b_size = len(b)
lines_index = [*range(a_size)] # is equivalent to [i for i in range(0,a_size)], the difference being the advantage of using * to pass values of range iterator to list directly
index = 0                      # similar to index in data_generator below
for i in range(b_size):        # `b` is longer than `a` forcing a wrap
    # We wrap by resetting index to 0 so the sequences circle back at the end to point to the first index
    if index >= a_size:
        index = 0
    
    b[i] = a[lines_index[index]]     #  `indexes_list[index]` point to a index of a. Store the result in b
    index += 1
    
print(b)

[1, 2, 3, 4, 1, 2, 3, 4, 1, 2]


In [35]:
# Example of traversing a list of indexes to create a circular list
a = [1, 2, 3, 4]
b = []

a_size = len(a)
b_size = 10
lines_index = [*range(a_size)]
print("Original order of index:",lines_index)

# if we shuffle the index_list we can change the order of our circular list
# without modifying the order or our original data
random.shuffle(lines_index) # Shuffle the order
print("Shuffled order of index:",lines_index)

print("New value order for first batch:",[a[index] for index in lines_index])
batch_counter = 1
index = 0                # similar to index in data_generator below
for i in range(b_size):  # `b` is longer than `a` forcing a wrap
    # We wrap by resetting index to 0
    if index >= a_size:
        index = 0
        batch_counter += 1
        random.shuffle(lines_index) # Re-shuffle the order
        print("\nShuffled Indexes for Batch No.{} :{}".format(batch_counter,lines_index))
        print("Values for Batch No.{} :{}".format(batch_counter,[a[index] for index in lines_index]))
    
    b.append(a[lines_index[index]])     #  `indexes_list[index]` point to a index of a. Store the result in b
    index += 1
print()    
print("Final value of b:",b)

Original order of index: [0, 1, 2, 3]
Shuffled order of index: [0, 1, 2, 3]
New value order for first batch: [1, 2, 3, 4]

Shuffled Indexes for Batch No.2 :[2, 0, 1, 3]
Values for Batch No.2 :[3, 1, 2, 4]

Shuffled Indexes for Batch No.3 :[2, 0, 1, 3]
Values for Batch No.3 :[3, 1, 2, 4]

Final value of b: [1, 2, 3, 4, 3, 1, 2, 4, 3, 1]


In [36]:
def data_generator(batch_size, data_x, data_y, shuffle=True):

    data_lng = len(data_x) # len(data_x) must be equal to len(data_y)
    index_list = [*range(data_lng)] # Create a list with the ordered indexes of sample data
    
    # If shuffle is set to true, we traverse the list in a random way
    if shuffle:
        rnd.shuffle(index_list) # Inplace shuffle of the list
    
    index = 0 # Start with the first element
    while True:
        X = [0] * batch_size # We can create a list with batch_size elements. 
        Y = [0] * batch_size # We can create a list with batch_size elements. 
        
        for i in range(batch_size):
            
            # Wrap the index each time that we reach the end of the list
            if index >= data_lng:
                index = 0
                # Shuffle the index_list if shuffle is true
                if shuffle:
                    rnd.shuffle(index_list) # re-shuffle the order
            
            X[i] = data_x[index_list[index]] 
            Y[i] = data_y[index_list[index]] 
            
            index += 1
        
        yield((X, Y))

In [37]:
def test_data_generator():
    x = [1, 2, 3, 4]
    y = [xi ** 2 for xi in x]
    
    generator = data_generator(3, x, y, shuffle=False)

    assert np.allclose(next(generator), ([1, 2, 3], [1, 4, 9])),  "First batch does not match"
    assert np.allclose(next(generator), ([4, 1, 2], [16, 1, 4])), "Second batch does not match"
    assert np.allclose(next(generator), ([3, 4, 1], [9, 16, 1])), "Third batch does not match"
    assert np.allclose(next(generator), ([2, 3, 4], [4, 9, 16])), "Fourth batch does not match"

    print("\033[92mAll tests passed!")

test_data_generator()

[92mAll tests passed!


# Sentiment analysis with DeepNet

In [None]:
import string
import re
import os
import nltk
nltk.download('twitter_samples')
nltk.download('stopwords')
from nltk.tokenize import TweetTokenizer
from nltk.corpus import stopwords, twitter_samples 

In [None]:
tweet_tokenizer = TweetTokenizer(preserve_case=False, strip_handles=True, reduce_len=True)

# Stop words are messy and not that compelling; 
# "very" and "not" are considered stop words, but they are obviously expressing sentiment

# The porter stemmer lemmatizes "was" to "wa".  Seriously???

# I'm not sure we want to get into stop words
stopwords_english = stopwords.words('english')

# Also have my doubts about stemming...
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()


In [None]:
def process_tweet(tweet):
    '''
    Input: 
        tweet: a string containing a tweet
    Output:
        tweets_clean: a list of words containing the processed tweet
    
    '''
    # remove stock market tickers like $GE
    tweet = re.sub(r'\$\w*', '', tweet)
    # remove old style retweet text "RT"
    tweet = re.sub(r'^RT[\s]+', '', tweet)
    # remove hyperlinks
    tweet = re.sub(r'https?:\/\/.*[\r\n]*', '', tweet)
    # remove hashtags
    # only removing the hash # sign from the word
    tweet = re.sub(r'#', '', tweet)
    # tokenize tweets
    tokenizer = TweetTokenizer(preserve_case=False, strip_handles=True, reduce_len=True)
    tweet_tokens = tokenizer.tokenize(tweet)
    ### START CODE HERE ###
    tweets_clean = []
    for word in tweet_tokens:
        if (word not in stopwords_english and # remove stopwords
            word not in string.punctuation): # remove punctuation
            #tweets_clean.append(word)
            stem_word = stemmer.stem(word) # stemming word
            tweets_clean.append(stem_word)
    ### END CODE HERE ###
    return tweets_clean

In [None]:
# let's not reuse variables
#all_positive_tweets = twitter_samples.strings('positive_tweets.json')
#all_negative_tweets = twitter_samples.strings('negative_tweets.json')

def load_tweets():
    all_positive_tweets = twitter_samples.strings('positive_tweets.json')
    all_negative_tweets = twitter_samples.strings('negative_tweets.json')  
    return all_positive_tweets, all_negative_tweets
    
# Layers have weights and a foward function.
# They create weights when layer.initialize is called and use them.
# remove this or make it optional 

class Layer(object):
    """Base class for layers."""
    def __init__(self):
        self.weights = None

    def forward(self, x):
        raise NotImplementedError
  
    def init_weights_and_state(self, input_signature, random_key):
        pass

    def init(self, input_signature, random_key):
        self.init_weights_and_state(input_signature, random_key)
        return self.weights
    
    def __call__(self, x):
        return self.forward(x)

In [None]:
import os 
import random as rnd

# import relevant libraries
import trax

# set random seeds to make this notebook easier to replicate
trax.supervised.trainer_lib.init_random_number_generators(31)

# import trax.fastmath.numpy
import trax.fastmath.numpy as np

# import trax.layers
from trax import layers as tl


In [None]:
# Create an array using trax.fastmath.numpy
a = np.array(5.0)

# View the returned array
display(a)

print(type(a))

In [None]:
# Define a function that will use the trax.fastmath.numpy array
def f(x):
    
    # f = x^2
    return (x**2)

In [None]:
# Call the function
print(f"f(a) for a={a} is {f(a)}")

In [None]:
# Directly use trax.fastmath.grad to calculate the gradient (derivative) of the function
grad_f = trax.fastmath.grad(fun=f)  # df / dx - Gradient of function f(x) with respect to x

# View the type of the retuned object (it's a function)
type(grad_f)

In [None]:
# Call the newly created function and pass in a value for x (the DeviceArray stored in 'a')
grad_calculation = grad_f(a)

# View the result of calling the grad_f function
display(grad_calculation)

# Part 2: Importing the data

In [None]:
## DO NOT EDIT THIS CELL

# Import functions from the utils.py file

import numpy as np

# Load positive and negative tweets
all_positive_tweets, all_negative_tweets = load_tweets()

# View the total number of positive and negative tweets.
print(f"The number of positive tweets: {len(all_positive_tweets)}")
print(f"The number of negative tweets: {len(all_negative_tweets)}")

# Split positive set into validation and training
val_pos   = all_positive_tweets[4000:] # generating validation set for positive tweets
train_pos  = all_positive_tweets[:4000]# generating training set for positive tweets

# Split negative set into validation and training
val_neg   = all_negative_tweets[4000:] # generating validation set for negative tweets
train_neg  = all_negative_tweets[:4000] # generating training set for nagative tweets

# Combine training data into one set
train_x = train_pos + train_neg 

# Combine validation data into one set
val_x  = val_pos + val_neg

# Set the labels for the training set (1 for positive, 0 for negative)
train_y = np.append(np.ones(len(train_pos)), np.zeros(len(train_neg)))

# Set the labels for the validation set (1 for positive, 0 for negative)
val_y  = np.append(np.ones(len(val_pos)), np.zeros(len(val_neg)))

print(f"length of train_x {len(train_x)}")
print(f"length of val_x {len(val_x)}")

In [None]:
# Import a function that processes the tweets
# from utils import process_tweet

# Try out function that processes tweets
print("original tweet at training position 0")
print(train_pos[0])

print("Tweet at training position 0 after processing:")
process_tweet(train_pos[0])

# 2.2 Building the vocabulary

In [None]:
# Build the vocabulary
# Unit Test Note - There is no test set here only train/val

# Include special tokens 
# started with pad, end of line and unk tokens
Vocab = {'__PAD__': 0, '__</e>__': 1, '__UNK__': 2} 

# Note that we build vocab using training data
for tweet in train_x: 
    processed_tweet = process_tweet(tweet)
    for word in processed_tweet:
        if word not in Vocab: 
            Vocab[word] = len(Vocab)
    
print("Total words in vocab are",len(Vocab))
display(Vocab)

# 2.3 Converting a tweet to a tensor

In [None]:
# UNQ_C1 (UNIQUE CELL IDENTIFIER, DO NOT EDIT)
# GRADED FUNCTION: tweet_to_tensor
def tweet_to_tensor(tweet, vocab_dict, unk_token='__UNK__', verbose=False):
    '''
    Input: 
        tweet - A string containing a tweet
        vocab_dict - The words dictionary
        unk_token - The special string for unknown tokens
        verbose - Print info durign runtime
    Output:
        tensor_l - A python list with
        
    '''  
    
    ### START CODE HERE (Replace instances of 'None' with your code) ###
    # Process the tweet into a list of words
    # where only important words are kept (stop words removed)
    word_l = process_tweet(tweet)
    
    if verbose:
        print("List of words from the processed tweet:")
        print(word_l)
        
    # Initialize the list that will contain the unique integer IDs of each word
    tensor_l = []
    
    # Get the unique integer ID of the __UNK__ token
    unk_ID = vocab_dict[unk_token]
    
    if verbose:
        print(f"The unique integer ID for the unk_token is {unk_ID}")
        
    # for each word in the list:
    for word in word_l:
        
        # Get the unique integer ID.
        # If the word doesn't exist in the vocab dictionary,
        # use the unique ID for __UNK__ instead.
        word_ID = vocab_dict[word] if word in vocab_dict else unk_ID
    ### END CODE HERE ###
        
        # Append the unique integer ID to the tensor list.
        tensor_l.append(word_ID) 
    
    return tensor_l

In [None]:
print("Actual tweet is\n", val_pos[0])
print("\nTensor of tweet:\n", tweet_to_tensor(val_pos[0], vocab_dict=Vocab))

In [None]:
# test tweet_to_tensor

def test_tweet_to_tensor():
    test_cases = [
        
        {
            "name":"simple_test_check",
            "input": [val_pos[1], Vocab],
            "expected":[444, 2, 304, 567, 56, 9],
            "error":"The function gives bad output for val_pos[1]. Test failed"
        },
        {
            "name":"datatype_check",
            "input":[val_pos[1], Vocab],
            "expected":type([]),
            "error":"Datatype mismatch. Need only list not np.array"
        },
        {
            "name":"without_unk_check",
            "input":[val_pos[1], Vocab],
            "expected":6,
            "error":"Unk word check not done- Please check if you included mapping for unknown word"
        }
    ]
    count = 0
    for test_case in test_cases:
        
        try:
            if test_case['name'] == "simple_test_check":
                assert test_case["expected"] == tweet_to_tensor(*test_case['input'])
                count += 1
            if test_case['name'] == "datatype_check":
                assert isinstance(tweet_to_tensor(*test_case['input']), test_case["expected"])
                count += 1
            if test_case['name'] == "without_unk_check":
                assert None not in tweet_to_tensor(*test_case['input'])
                count += 1
                
            
            
        except:
            print(test_case['error'])
    if count == 3:
        print("\033[92m All tests passed")
    else:
        print(count," Tests passed out of 3")
test_tweet_to_tensor() 

# 2.4 Creating a batch generator

In [None]:
# UNQ_C2 (UNIQUE CELL IDENTIFIER, DO NOT EDIT)
# GRADED: Data generator
def data_generator(data_pos, data_neg, batch_size, loop, vocab_dict, shuffle=False):
    '''
    Input: 
        data_pos - Set of posstive examples
        data_neg - Set of negative examples
        batch_size - number of samples per batch. Must be even
        loop - True or False
        vocab_dict - The words dictionary
        shuffle - Shuffle the data order
    Yield:
        inputs - Subset of positive and negative examples
        targets - The corresponding labels for the subset
        example_weights - An array specifying the importance of each example
        
    '''     
### START GIVEN CODE ###
    # make sure the batch size is an even number
    # to allow an equal number of positive and negative samples
    assert batch_size % 2 == 0
    
    # Number of positive examples in each batch is half of the batch size
    # same with number of negative examples in each batch
    n_to_take = batch_size // 2
    
    # Use pos_index to walk through the data_pos array
    # same with neg_index and data_neg
    pos_index = 0
    neg_index = 0
    
    len_data_pos = len(data_pos)
    len_data_neg = len(data_neg)
    
    # Get and array with the data indexes
    pos_index_lines = list(range(len_data_pos))
    neg_index_lines = list(range(len_data_neg))
    
    # shuffle lines if shuffle is set to True
    if shuffle:
        rnd.shuffle(pos_index_lines)
        rnd.shuffle(neg_index_lines)
        
    stop = False
    
    # Loop indefinitely
    while not stop:  
        
        # create a batch with positive and negative examples
        batch = []
        
        # First part: Pack n_to_take positive examples
        
        # Start from pos_index and increment i up to n_to_take
        for i in range(n_to_take):
                    
            # If the positive index goes past the positive dataset lenght,
            if pos_index >= len_data_pos: 
                
                # If loop is set to False, break once we reach the end of the dataset
                if not loop:
                    stop = True;
                    break;
                
                # If user wants to keep re-using the data, reset the index
                pos_index = 0
                
                if shuffle:
                    # Shuffle the index of the positive sample
                    rnd.shuffle(pos_index_lines)
                    
            # get the tweet as pos_index
            tweet = data_pos[pos_index_lines[pos_index]]
            
            # convert the tweet into tensors of integers representing the processed words
            tensor = tweet_to_tensor(tweet, vocab_dict)
            
            # append the tensor to the batch list
            batch.append(tensor)
            
            # Increment pos_index by one
            pos_index = pos_index + 1

### END GIVEN CODE ###
            
### START CODE HERE (Replace instances of 'None' with your code) ###

        # Second part: Pack n_to_take negative examples
    
        # Using the same batch list, start from neg_index and increment i up to n_to_take
        for i in range(n_to_take):
            
            # If the negative index goes past the negative dataset length,
            if neg_index >= len_data_neg:
                
                # If loop is set to False, break once we reach the end of the dataset
                if not loop:
                    stop = True;
                    break;
                    
                # If user wants to keep re-using the data, reset the index
                neg_index = 0
                
                if shuffle:
                    # Shuffle the index of the negative sample
                    rnd.shuffle(neg_index_lines)
            # get the tweet as neg_index
            tweet = data_neg[neg_index_lines[neg_index]]
            
            # convert the tweet into tensors of integers representing the processed words
            tensor = tweet_to_tensor(tweet, vocab_dict)
            
            # append the tensor to the batch list
            batch.append(tensor)
            
            # Increment neg_index by one
            neg_index = neg_index + 1

### END CODE HERE ###        

### START GIVEN CODE ###
        if stop:
            break;

        # Update the start index for positive data 
        # so that it's n_to_take positions after the current pos_index
        pos_index += n_to_take
        
        # Update the start index for negative data 
        # so that it's n_to_take positions after the current neg_index
        neg_index += n_to_take
        
        # Get the max tweet length (the length of the longest tweet) 
        # (you will pad all shorter tweets to have this length)
        max_len = max([len(t) for t in batch]) 
        
        
        # Initialize the input_l, which will 
        # store the padded versions of the tensors
        tensor_pad_l = []
        # Pad shorter tweets with zeros
        for tensor in batch:
### END GIVEN CODE ###

### START CODE HERE (Replace instances of 'None' with your code) ###
            # Get the number of positions to pad for this tensor so that it will be max_len long
            n_pad = max_len - len(tensor)
            
            # Generate a list of zeros, with length n_pad
            pad_l = [0]*n_pad
            
            # concatenate the tensor and the list of padded zeros
            tensor_pad = tensor + pad_l
            
            # append the padded tensor to the list of padded tensors
            tensor_pad_l.append(tensor_pad)

        # convert the list of padded tensors to a numpy array
        # and store this as the model inputs
        inputs = np.array(tensor_pad_l)
  
        # Generate the list of targets for the positive examples (a list of ones)
        # The length is the number of positive examples in the batch
        target_pos = [1]*n_to_take
        
        # Generate the list of targets for the negative examples (a list of ones)
        # The length is the number of negative examples in the batch
        target_neg = [0]*n_to_take
        
        # Concatenate the positve and negative targets
        target_l = target_pos + target_neg
        
        # Convert the target list into a numpy array
        targets = np.array(target_l)

        # Example weights: Treat all examples equally importantly.
        example_weights = np.ones_like(targets)
        

### END CODE HERE ###

### GIVEN CODE ###
        # note we use yield and not return
        yield inputs, targets, example_weights

In [None]:
# Set the random number generator for the shuffle procedure
rnd.seed(30) 

# Create the training data generator
def train_generator(batch_size, shuffle = False):
    return data_generator(train_pos, train_neg, batch_size, True, Vocab, shuffle)

# Create the validation data generator
def val_generator(batch_size, shuffle = False):
    return data_generator(val_pos, val_neg, batch_size, True, Vocab, shuffle)

# Create the validation data generator
def test_generator(batch_size, shuffle = False):
    return data_generator(val_pos, val_neg, batch_size, False, Vocab, shuffle)

# Get a batch from the train_generator and inspect.
inputs, targets, example_weights = next(train_generator(4, shuffle=True))

# this will print a list of 4 tensors padded with zeros
print(f'Inputs: {inputs}')
print(f'Targets: {targets}')
print(f'Example Weights: {example_weights}')

In [None]:
# Test the train_generator

# Create a data generator for training data,
# which produces batches of size 4 (for tensors and their respective targets)
tmp_data_gen = train_generator(batch_size = 4)

# Call the data generator to get one batch and its targets
tmp_inputs, tmp_targets, tmp_example_weights = next(tmp_data_gen)

print(f"The inputs shape is {tmp_inputs.shape}")
print(f"The targets shape is {tmp_targets.shape}")
print(f"The example weights shape is {tmp_example_weights.shape}")

for i,t in enumerate(tmp_inputs):
    print(f"input tensor: {t}; target {tmp_targets[i]}; example weights {tmp_example_weights[i]}")

# Part 3: Defining classes

# 3.1 ReLU class

In [None]:
# UNQ_C3 (UNIQUE CELL IDENTIFIER, DO NOT EDIT)
# GRADED FUNCTION: Relu
class Relu(Layer):
    """Relu activation function implementation"""
    def forward(self, x):
        '''
        Input: 
            - x (a numpy array): the input
        Output:
            - activation (numpy array): all positive or 0 version of x
        '''
        ### START CODE HERE (Replace instances of 'None' with your code) ###
        
        activation = np.maximum(x,0)

        ### END CODE HERE ###
        
        return activation

In [None]:
# Test your relu function
x = np.array([[-2.0, -1.0, 0.0], [0.0, 1.0, 2.0]], dtype=float)
relu_layer = Relu()
print("Test data is:")
print(x)
print("Output of Relu is:")
print(relu_layer(x))

# 3.2 Dense class

In [None]:
# use the fastmath module within trax
from trax import fastmath

# use the numpy module from trax
np = fastmath.numpy

# use the fastmath.random module from trax
random = fastmath.random

In [None]:
# See how the fastmath.trax.random.normal function works
tmp_key = random.get_prng(seed=1)
print("The random seed generated by random.get_prng")
display(tmp_key)

print("choose a matrix with 2 rows and 3 columns")
tmp_shape=(2,3)
display(tmp_shape)

# Generate a weight matrix
# Note that you'll get an error if you try to set dtype to tf.float32, where tf is tensorflow
# Just avoid setting the dtype and allow it to use the default data type
tmp_weight = trax.fastmath.random.normal(key=tmp_key, shape=tmp_shape)

print("Weight matrix generated with a normal distribution with mean 0 and stdev of 1")
display(tmp_weight)

In [None]:
# UNQ_C4 (UNIQUE CELL IDENTIFIER, DO NOT EDIT)
# GRADED FUNCTION: Dense

class Dense(Layer):
    """
    A dense (fully-connected) layer.
    """

    # __init__ is implemented for you
    def __init__(self, n_units, init_stdev=0.1):
        
        # Set the number of units in this layer
        self._n_units = n_units
        self._init_stdev = init_stdev

    # Please implement 'forward()'
    def forward(self, x):

### START CODE HERE (Replace instances of 'None' with your code) ###

        # Matrix multiply x and the weight matrix
        dense = np.dot(x, self.weights) 
        
### END CODE HERE ###
        return dense

    # init_weights
    def init_weights_and_state(self, input_signature, random_key):
        
### START CODE HERE (Replace instances of 'None' with your code) ###
        # The input_signature has a .shape attribute that gives the shape as a tuple
        input_shape = input_signature.shape

        # Generate the weight matrix from a normal distribution, 
        # and standard deviation of 'stdev'        
        w = self._init_stdev * random.normal(key = random_key, shape = (input_shape[-1], self._n_units))
        
### END CODE HERE ###     
        self.weights = w
        return self.weights

In [None]:
# Testing your Dense layer 
dense_layer = Dense(n_units=10)  #sets  number of units in dense layer
random_key = random.get_prng(seed=0)  # sets random seed
z = np.array([[2.0, 7.0, 25.0]]) # input array 

dense_layer.init(z, random_key)
print("Weights are\n ",dense_layer.weights) #Returns randomly generated weights
print("Foward function output is ", dense_layer(z)) # Returns multiplied values of units and weights

# 3.3 Model

In [None]:
# View documentation on tl.Dense
help(tl.Dense)

In [None]:
# View documentation on tl.Serial
help(tl.Serial)

In [None]:
# View documentation for tl.Embedding
help(tl.Embedding)

In [None]:
tmp_embed = tl.Embedding(vocab_size=3, d_feature=2)
display(tmp_embed)

In [None]:
# view the documentation for tl.mean
help(tl.Mean)

In [None]:
# Pretend the embedding matrix uses 
# 2 elements for embedding the meaning of a word
# and has a vocabulary size of 3
# So it has shape (2,3)
tmp_embed = np.array([[1,2,3,],
                    [4,5,6]
                   ])

# take the mean along axis 0
print("The mean along axis 0 creates a vector whose length equals the vocabulary size")
display(np.mean(tmp_embed,axis=0))

print("The mean along axis 1 creates a vector whose length equals the number of elements in a word embedding")
display(np.mean(tmp_embed,axis=1))

In [None]:
help(tl.LogSoftmax)

In [None]:
# UNQ_C5 (UNIQUE CELL IDENTIFIER, DO NOT EDIT)
# GRADED FUNCTION: classifier
def classifier(vocab_size=len(Vocab), embedding_dim=256, output_dim=2, mode='train'):
        
### START CODE HERE (Replace instances of 'None' with your code) ###
    # create embedding layer
    embed_layer = tl.Embedding(
        vocab_size=vocab_size, # Size of the vocabulary
        d_feature=embedding_dim)  # Embedding dimension
    
    # Create a mean layer, to create an "average" word embedding
    mean_layer = tl.Mean(axis=1)
    
    # Create a dense layer, one unit for each output
    dense_output_layer = tl.Dense(n_units = output_dim)

    
    # Create the log softmax layer (no parameters needed)
    log_softmax_layer = tl.LogSoftmax()
    
    # Use tl.Serial to combine all layers
    # and create the classifier
    # of type trax.layers.combinators.Serial
    model = tl.Serial(
      embed_layer,  # embedding layer
      mean_layer, # mean layer
      dense_output_layer, # dense output layer 
      log_softmax_layer # log softmax layer
    )
### END CODE HERE ###     
    
    # return the model of type
    return model

In [None]:
tmp_model = classifier()

In [None]:
print(type(tmp_model))
display(tmp_model)

# Part 4: Training

In [None]:
# View documentation for trax.supervised.training.TrainTask
help(trax.supervised.training.TrainTask)

In [None]:
# View documentation for trax.supervised.training.EvalTask
help(trax.supervised.training.EvalTask)

In [None]:
# View documentation for trax.supervised.training.Loop
help(trax.supervised.training.Loop)

In [None]:
# View optimizers that you could choose from
help(trax.optimizers)

# 4.1 Training the model

In [None]:
from trax.supervised import training

batch_size = 16
rnd.seed(271)

train_task = training.TrainTask(
    labeled_data=train_generator(batch_size=batch_size, shuffle=True),
    loss_layer=tl.CrossEntropyLoss(),
    optimizer=trax.optimizers.Adam(0.01),
    n_steps_per_checkpoint=10,
)

eval_task = training.EvalTask(
    labeled_data=val_generator(batch_size=batch_size, shuffle=True),
    metrics=[tl.CrossEntropyLoss(), tl.Accuracy()],
)

model = classifier()

In [None]:
output_dir = '~/model/'
output_dir_expand = os.path.expanduser(output_dir)
print(output_dir_expand)

In [None]:
# UNQ_C6 (UNIQUE CELL IDENTIFIER, DO NOT EDIT)
# GRADED FUNCTION: train_model
def train_model(classifier, train_task, eval_task, n_steps, output_dir):
    '''
    Input: 
        classifier - the model you are building
        train_task - Training task
        eval_task - Evaluation task
        n_steps - the evaluation steps
        output_dir - folder to save your files
    Output:
        trainer -  trax trainer
    '''
### START CODE HERE (Replace instances of 'None' with your code) ###
    training_loop = training.Loop(
                                classifier,  # The learning model
                                train_task,  # The training task
                                eval_task = eval_task, # The evaluation task
                                output_dir = output_dir) # The output directory

    training_loop.run(n_steps = n_steps)
### END CODE HERE ###

    # Return the training_loop, since it has the model.
    return training_loop

In [None]:
training_loop = train_model(model, train_task, eval_task, 100, output_dir_expand)

# 4.2 Practice Making a prediction

In [None]:
# Create a generator object
tmp_train_generator = train_generator(16)

# get one batch
tmp_batch = next(tmp_train_generator)

# Position 0 has the model inputs (tweets as tensors)
# position 1 has the targets (the actual labels)
tmp_inputs, tmp_targets, tmp_example_weights = tmp_batch

print(f"The batch is a tuple of length {len(tmp_batch)} because position 0 contains the tweets, and position 1 contains the targets.") 
print(f"The shape of the tweet tensors is {tmp_inputs.shape} (num of examples, length of tweet tensors)")
print(f"The shape of the labels is {tmp_targets.shape}, which is the batch size.")
print(f"The shape of the example_weights is {tmp_example_weights.shape}, which is the same as inputs/targets size.")

In [None]:
# feed the tweet tensors into the model to get a prediction
tmp_pred = training_loop.eval_model(tmp_inputs)
print(f"The prediction shape is {tmp_pred.shape}, num of tensor_tweets as rows")
print("Column 0 is the probability of a negative sentiment (class 0)")
print("Column 1 is the probability of a positive sentiment (class 1)")
print()
print("View the prediction array")
tmp_pred

In [None]:
# turn probabilites into category predictions
tmp_is_positive = tmp_pred[:,1] > tmp_pred[:,0]
for i, p in enumerate(tmp_is_positive):
    print(f"Neg log prob {tmp_pred[i,0]:.4f}\tPos log prob {tmp_pred[i,1]:.4f}\t is positive? {p}\t actual {tmp_targets[i]}")

In [None]:
# View the array of booleans
print("Array of booleans")
display(tmp_is_positive)

# convert boolean to type int32
# True is converted to 1
# False is converted to 0
tmp_is_positive_int = tmp_is_positive.astype(np.int32)


# View the array of integers
print("Array of integers")
display(tmp_is_positive_int)

# convert boolean to type float32
tmp_is_positive_float = tmp_is_positive.astype(np.float32)

# View the array of floats
print("Array of floats")
display(tmp_is_positive_float)

In [None]:
tmp_pred.shape

In [None]:
print(f"True == 1: {True == 1}")
print(f"True == 2: {True == 2}")
print(f"False == 0: {False == 0}")
print(f"False == 2: {False == 2}")

# Part 5: Evaluation

In [None]:
# UNQ_C7 (UNIQUE CELL IDENTIFIER, DO NOT EDIT)
# GRADED FUNCTION: compute_accuracy
def compute_accuracy(preds, y, y_weights):
    """
    Input: 
        preds: a tensor of shape (dim_batch, output_dim) 
        y: a tensor of shape (dim_batch, output_dim) with the true labels
        y_weights: a n.ndarray with the a weight for each example
    Output: 
        accuracy: a float between 0-1 
        weighted_num_correct (np.float32): Sum of the weighted correct predictions
        sum_weights (np.float32): Sum of the weights
    """
    ### START CODE HERE (Replace instances of 'None' with your code) ###
    # Create an array of booleans, 
    # True if the probability of positive sentiment is greater than
    # the probability of negative sentiment
    # else False
    is_pos =  preds[:, 1] > preds[:, 0] 

    # convert the array of booleans into an array of np.int32
    is_pos_int = is_pos.astype(np.int32)
    
    # compare the array of predictions (as int32) with the target (labels) of type int32
    correct = is_pos_int == y 

    # Count the sum of the weights.
    sum_weights = np.sum(y_weights)
    
    # convert the array of correct predictions (boolean) into an arrayof np.float32
    correct_float = correct.astype(np.float32)
    
    # Multiply each prediction with its corresponding weight.
    weighted_correct_float = correct_float * y_weights

    # Sum up the weighted correct predictions (of type np.float32), to go in the
    # denominator.
    weighted_num_correct = np.sum(weighted_correct_float)
 
    # Divide the number of weighted correct predictions by the sum of the
    # weights.
    accuracy = weighted_num_correct / sum_weights

    ### END CODE HERE ###
    return accuracy, weighted_num_correct, sum_weights

In [None]:
# test your function
tmp_val_generator = val_generator(64)

# get one batch
tmp_batch = next(tmp_val_generator)

# Position 0 has the model inputs (tweets as tensors)
# position 1 has the targets (the actual labels)
tmp_inputs, tmp_targets, tmp_example_weights = tmp_batch

# feed the tweet tensors into the model to get a prediction
tmp_pred = training_loop.eval_model(tmp_inputs)

tmp_acc, tmp_num_correct, tmp_num_predictions = compute_accuracy(preds=tmp_pred, y=tmp_targets, y_weights=tmp_example_weights)

print(f"Model's prediction accuracy on a single training batch is: {100 * tmp_acc}%")
print(f"Weighted number of correct predictions {tmp_num_correct}; weighted number of total observations predicted {tmp_num_predictions}")

# 5.2 Testing your model on Validation Data

In [None]:
# UNQ_C8 (UNIQUE CELL IDENTIFIER, DO NOT EDIT)
# GRADED FUNCTION: test_model
def test_model(generator, model):
    '''
    Input: 
        generator: an iterator instance that provides batches of inputs and targets
        model: a model instance 
    Output: 
        accuracy: float corresponding to the accuracy
    '''
    
    accuracy = 0.
    total_num_correct = 0
    total_num_pred = 0
    
    ### START CODE HERE (Replace instances of 'None' with your code) ###
    for batch in generator: 
        
        # Retrieve the inputs from the batch
        inputs = batch[0]
        
        # Retrieve the targets (actual labels) from the batch
        targets = batch[1]
        
        # Retrieve the example weight.
        example_weight = batch[2]

        # Make predictions using the inputs
        pred = model(inputs)
        
        # Calculate accuracy for the batch by comparing its predictions and targets
        batch_accuracy, batch_num_correct, batch_num_pred = compute_accuracy(pred, targets, example_weight) 
        
        # Update the total number of correct predictions
        # by adding the number of correct predictions from this batch
        total_num_correct += batch_num_correct
        
        # Update the total number of predictions 
        # by adding the number of predictions made for the batch
        total_num_pred += batch_num_pred

    # Calculate accuracy over all examples
    accuracy = total_num_correct / total_num_pred
    
    ### END CODE HERE ###
    return accuracy

In [None]:
# DO NOT EDIT THIS CELL
# testing the accuracy of your model: this takes around 20 seconds
model = training_loop.eval_model
accuracy = test_model(test_generator(16), model)

print(f'The accuracy of your model on the validation set is {accuracy:.4f}', )

# Part 6: Testing with your own input

In [None]:
# this is used to predict on your own sentnece
def predict(sentence):
    inputs = np.array(tweet_to_tensor(sentence, vocab_dict=Vocab))
    
    # Batch size 1, add dimension for batch, to work with the model
    inputs = inputs[None, :]  
    
    # predict with the model
    preds_probs = model(inputs)
    
    # Turn probabilities into categories
    preds = int(preds_probs[0, 1] > preds_probs[0, 0])
    
    sentiment = "negative"
    if preds == 1:
        sentiment = 'positive'

    return preds, sentiment

In [None]:
# try a positive sentence
sentence = "It's such a nice day, think i'll be taking Sid to Ramsgate fish and chips for lunch at Peter's fish factory and then the beach maybe"
tmp_pred, tmp_sentiment = predict(sentence)
print(f"The sentiment of the sentence \n***\n\"{sentence}\"\n***\nis {tmp_sentiment}.")

print()
# try a negative sentence
sentence = "I hated my day, it was the worst, I'm so sad."
tmp_pred, tmp_sentiment = predict(sentence)
print(f"The sentiment of the sentence \n***\n\"{sentence}\"\n***\nis {tmp_sentiment}.")