In [1]:
import os 
import random as rnd

# import relevant libraries
import trax

# set random seeds to make this notebook easier to replicate
trax.supervised.trainer_lib.init_random_number_generators(31)

# import trax.fastmath.numpy
import trax.fastmath.numpy as np

# import trax.layers
from trax import layers as tl

# import Layer from the utils.py file
from utils import Layer, load_tweets, process_tweet
#from utils import 

INFO:tensorflow:tokens_length=568 inputs_length=512 targets_length=114 noise_density=0.15 mean_noise_span_length=3.0 


[nltk_data] Downloading package twitter_samples to
[nltk_data]     /Users/takshshilarawat/nltk_data...
[nltk_data]   Package twitter_samples is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/takshshilarawat/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Loading in the data

In [2]:
import numpy as np

# Load positive and negative tweets
all_positive_tweets, all_negative_tweets = load_tweets()

# View the total number of positive and negative tweets.
print(f"The number of positive tweets: {len(all_positive_tweets)}")
print(f"The number of negative tweets: {len(all_negative_tweets)}")

# Split positive set into validation and training
val_pos   = all_positive_tweets[4000:] # generating validation set for positive tweets
train_pos  = all_positive_tweets[:4000]# generating training set for positive tweets

# Split negative set into validation and training
val_neg   = all_negative_tweets[4000:] # generating validation set for negative tweets
train_neg  = all_negative_tweets[:4000] # generating training set for nagative tweets

# Combine training data into one set
train_x = train_pos + train_neg 

# Combine validation data into one set
val_x  = val_pos + val_neg

# Set the labels for the training set (1 for positive, 0 for negative)
train_y = np.append(np.ones(len(train_pos)), np.zeros(len(train_neg)))

# Set the labels for the validation set (1 for positive, 0 for negative)
val_y  = np.append(np.ones(len(val_pos)), np.zeros(len(val_neg)))

print(f"length of train_x {len(train_x)}")
print(f"length of val_x {len(val_x)}")

The number of positive tweets: 5000
The number of negative tweets: 5000
length of train_x 8000
length of val_x 2000


## Building the vocabulary

The vocabulary will also include some special tokens
- `__PAD__`: padding
- `</e>`: end of line
- `__UNK__`: a token representing any word that is not in the vocabulary.

In [3]:
Vocab = {'__PAD__': 0, '__</e>__': 1, '__UNK__': 2} 

# Note that we build vocab using training data
for tweet in train_x: 
    processed_tweet = process_tweet(tweet)
    for word in processed_tweet:
        if word not in Vocab: 
            Vocab[word] = len(Vocab)
    
print("Total words in vocab are",len(Vocab))
display(Vocab)

Total words in vocab are 9088


{'__PAD__': 0,
 '__</e>__': 1,
 '__UNK__': 2,
 'followfriday': 3,
 'top': 4,
 'engag': 5,
 'member': 6,
 'commun': 7,
 'week': 8,
 ':)': 9,
 'hey': 10,
 'jame': 11,
 'odd': 12,
 ':/': 13,
 'pleas': 14,
 'call': 15,
 'contact': 16,
 'centr': 17,
 '02392441234': 18,
 'abl': 19,
 'assist': 20,
 'mani': 21,
 'thank': 22,
 'listen': 23,
 'last': 24,
 'night': 25,
 'bleed': 26,
 'amaz': 27,
 'track': 28,
 'scotland': 29,
 'congrat': 30,
 'yeaaah': 31,
 'yipppi': 32,
 'accnt': 33,
 'verifi': 34,
 'rqst': 35,
 'succeed': 36,
 'got': 37,
 'blue': 38,
 'tick': 39,
 'mark': 40,
 'fb': 41,
 'profil': 42,
 '15': 43,
 'day': 44,
 'one': 45,
 'irresist': 46,
 'flipkartfashionfriday': 47,
 'like': 48,
 'keep': 49,
 'love': 50,
 'custom': 51,
 'wait': 52,
 'long': 53,
 'hope': 54,
 'enjoy': 55,
 'happi': 56,
 'friday': 57,
 'lwwf': 58,
 'second': 59,
 'thought': 60,
 '’': 61,
 'enough': 62,
 'time': 63,
 'dd': 64,
 'new': 65,
 'short': 66,
 'enter': 67,
 'system': 68,
 'sheep': 69,
 'must': 70,
 'buy':

## Converting a tweet to a tensor

In [4]:
def tweet_to_tensor(tweet, vocab_dict, unk_token='__UNK__', verbose=False):
    
    word_l = process_tweet(tweet)
    if verbose:
        print("List of words from the processed tweet:")
        print(word_l)

    tensor_l = []
    unk_ID = vocab_dict[unk_token]
    
    if verbose:
        print(f"The unique integer ID for the unk_token is {unk_ID}")

    for word in word_l:
        if word in vocab_dict:
            word_ID = vocab_dict[word]
        else:
            word_ID=unk_ID

        tensor_l.append(word_ID) 
    
    return tensor_l

<a name="2.4"></a>
##  Creating a batch generator

In [5]:
def data_generator(data_pos, data_neg, batch_size, loop, vocab_dict, shuffle=False):

    assert batch_size % 2 == 0
    
    # Number of positive examples in each batch is half of the batch size
    # same with number of negative examples in each batch
    n_to_take = batch_size // 2
    
    # Use pos_index to walk through the data_pos array
    # same with neg_index and data_neg
    pos_index = 0
    neg_index = 0
    
    len_data_pos = len(data_pos)
    len_data_neg = len(data_neg)
    
    pos_index_lines = list(range(len_data_pos))
    neg_index_lines = list(range(len_data_neg))
    
    if shuffle:
        rnd.shuffle(pos_index_lines)
        rnd.shuffle(neg_index_lines)
        
    stop = False
    
    
    while not stop:  

        batch = []

        for i in range(n_to_take):
            if pos_index >= len_data_pos: 
                if not loop:
                    stop = True;
                    break;
                pos_index = 0
                
                if shuffle:
                    rnd.shuffle(pos_index_lines)
                    
            tweet = data_pos[pos_index_lines[pos_index]]
            tensor = tweet_to_tensor(tweet, vocab_dict)
            batch.append(tensor)
            pos_index = pos_index + 1

        for i in range(n_to_take):
            if neg_index>=len_data_neg:
                if not loop:
                    stop = True;
                    break;
                neg_index = 0
                
                if shuffle:
                    rnd.shuffle(neg_index_lines)
            tweet = data_neg[neg_index_lines[neg_index]]
            tensor = tweet_to_tensor(tweet, vocab_dict)
            batch.append(tensor)
            neg_index += 1
            
            
        if stop:
            break;

        pos_index += n_to_take
        
        neg_index += n_to_take

        max_len = max([len(t) for t in batch]) 

        tensor_pad_l = []
        
        for tensor in batch:
            n_pad = max_len-len(tensor)
            pad_l = [0]*n_pad
            tensor_pad = tensor+pad_l
            
            tensor_pad_l.append(tensor_pad)

        inputs = np.array(tensor_pad_l)

        target_pos = [1]*n_to_take
        target_neg = [0]*n_to_take
        target_l = target_pos+target_neg

        targets = np.array(target_l)
        
        example_weights = np.ones_like(targets)

        yield inputs, targets, example_weights

In [8]:
# Set the random number generator for the shuffle procedure
rnd.seed(30) 

# Create the training data generator
def train_generator(batch_size, shuffle = False):
    return data_generator(train_pos, train_neg, batch_size, True, Vocab, shuffle)

# Create the validation data generator
def val_generator(batch_size, shuffle = False):
    return data_generator(val_pos, val_neg, batch_size, True, Vocab, shuffle)

# Create the validation data generator
def test_generator(batch_size, shuffle = False):
    return data_generator(val_pos, val_neg, batch_size, False, Vocab, shuffle)

# Get a batch from the train_generator and inspect.
inputs, targets, example_weights = next(train_generator(4, shuffle=True))

# this will print a list of 4 tensors padded with zeros
print(f'Inputs: {inputs}')
print(f'Targets: {targets}')
print(f'Example Weights: {example_weights}')

Inputs: [[2005 4451 3201    9    0    0    0    0    0    0    0]
 [4954  567 2000 1454 5174 3499  141 3499  130  459    9]
 [3761  109  136  583 2930 3969    0    0    0    0    0]
 [ 250 3761    0    0    0    0    0    0    0    0    0]]
Targets: [1 1 0 0]
Example Weights: [1 1 1 1]


# Defining classes

<a name="3.1"></a>
##  ReLU class
<img src = "images/relu.png" style="width:300px;height:150px;"/>

$$ \mathrm{ReLU}(x) = \mathrm{max}(0,x) $$

In [9]:
class Relu(Layer):
    def forward(self, x):
        activation = np.maximum(0,x)
        return activation

## Dense class 

$$\mathrm{forward}(\mathbf{x},\mathbf{W}) = \mathbf{xW} $$


In [10]:
# use the fastmath module within trax
from trax import fastmath

# use the numpy module from trax
np = fastmath.numpy

# use the fastmath.random module from trax
random = fastmath.random

In [11]:
class Dense(Layer):
    def __init__(self, n_units, init_stdev=0.1):
        self._n_units = n_units
        self._init_stdev = init_stdev

    def forward(self, x):
        dense = np.dot(x,self.weights) 
        return dense

    def init_weights_and_state(self, input_signature, random_key):
        input_shape = input_signature.shape
        w = self._init_stdev*random.normal(key = random_key, shape = (input_shape[-1], self._n_units))
        self.weights = w
        return self.weights

## Model

<img src = "images/nn.png" style="width:400px;height:250px;"/>

- [tl.Dense](https://github.com/google/trax/blob/master/trax/layers/core.py#L29): Trax Dense layer implementation

- [tl.Serial](https://github.com/google/trax/blob/master/trax/layers/combinators.py#L26): Combinator that applies layers serially.  
    - We can pass in the layers as arguments to `Serial`, separated by commas. 
    - For example: `tl.Serial(tl.Embeddings(...), tl.Mean(...), tl.Dense(...), tl.LogSoftmax(...))`
- [tl.Embedding](https://github.com/google/trax/blob/1372b903bb66b0daccee19fd0b1fdf44f659330b/trax/layers/core.py#L113): Layer constructor function for an embedding layer.  
    - `tl.Embedding(vocab_size, d_feature)`.
    - `vocab_size` is the number of unique words in the given vocabulary.
    - `d_feature` is the number of elements in the word embedding (some choices for a word embedding size range from 150 to 300, for example).
- [tl.Mean](https://github.com/google/trax/blob/1372b903bb66b0daccee19fd0b1fdf44f659330b/trax/layers/core.py#L276): Calculates means across an axis.  In this case, please choose axis = 1 to get an average embedding vector (an embedding vector that is an average of all words in the vocabulary). 
- [tl.LogSoftmax](https://github.com/google/trax/blob/1372b903bb66b0daccee19fd0b1fdf44f659330b/trax/layers/core.py#L242): Implements log softmax function
- Here, you don't need to set any parameters for `LogSoftMax()`.

**Online documentation**

- [tl.Dense](https://trax-ml.readthedocs.io/en/latest/trax.layers.html#trax.layers.core.Dense)

- [tl.Serial](https://trax-ml.readthedocs.io/en/latest/trax.layers.html#module-trax.layers.combinators)

- [tl.Embedding](https://trax-ml.readthedocs.io/en/latest/trax.layers.html#trax.layers.core.Embedding)

- [tl.Mean](https://trax-ml.readthedocs.io/en/latest/trax.layers.html#trax.layers.core.Mean)

- [tl.LogSoftmax](https://trax-ml.readthedocs.io/en/latest/trax.layers.html#trax.layers.core.LogSoftmax)

In [12]:
# View documentation on tl.Dense
#help(tl.Dense)
#help(tl.Serial)
#help(tl.Embedding)
#help(tl.Mean)
#help(tl.LogSoftmax)


In [13]:
def classifier(vocab_size=len(Vocab), embedding_dim=256, output_dim=2, mode='train'):
    embed_layer = tl.Embedding(
        vocab_size=vocab_size, # Size of the vocabulary
        d_feature=embedding_dim)  # Embedding dimension

    mean_layer = tl.Mean(axis=1)
    
    dense_output_layer = tl.Dense(n_units = output_dim)

    log_softmax_layer = tl.LogSoftmax()

    model = tl.Serial(
      embed_layer, # embedding layer
      mean_layer, # mean layer
      dense_output_layer, # dense output layer 
      log_softmax_layer # log softmax layer
    )
    return model

## Training

To train a model on a task, Trax defines an abstraction [`trax.supervised.training.TrainTask`](https://trax-ml.readthedocs.io/en/latest/trax.supervised.html#trax.supervised.training.TrainTask) which packages the train data, loss and optimizer (among other things) together into an object.

Similarly to evaluate a model, Trax defines an abstraction [`trax.supervised.training.EvalTask`](https://trax-ml.readthedocs.io/en/latest/trax.supervised.html#trax.supervised.training.EvalTask) which packages the eval data and metrics (among other things) into another object.

The final piece tying things together is the [`trax.supervised.training.Loop`](https://trax-ml.readthedocs.io/en/latest/trax.supervised.html#trax.supervised.training.Loop)

In [14]:
#help(trax.supervised.training.TrainTask)
#help(trax.supervised.training.EvalTask)
#help(trax.supervised.training.Loop)
#help(trax.optimizers)

## Training the model

In [15]:
from trax.supervised import training

batch_size = 16
rnd.seed(271)

train_task = training.TrainTask(
    labeled_data=train_generator(batch_size=batch_size, shuffle=True),
    loss_layer=tl.CrossEntropyLoss(),
    optimizer=trax.optimizers.Adam(0.01),
    n_steps_per_checkpoint=10,
)

eval_task = training.EvalTask(
    labeled_data=val_generator(batch_size=batch_size, shuffle=True),
    metrics=[tl.CrossEntropyLoss(), tl.Accuracy()],
)

model = classifier()

In [19]:
output_dir = './model/'
output_dir_expand = os.path.expanduser(output_dir)
print(output_dir_expand)

./model/


In [20]:
def train_model(classifier, train_task, eval_task, n_steps, output_dir):
    training_loop = training.Loop(
                                classifier, # The learning model
                                train_task, # The training task
                                eval_task = eval_task, # The evaluation task
                                output_dir = output_dir) # The output directory

    training_loop.run(n_steps = n_steps)

    return training_loop

In [21]:
training_loop = train_model(model, train_task, eval_task, 100, output_dir_expand)

Step      1: train CrossEntropyLoss |  0.88939184
Step      1: eval  CrossEntropyLoss |  0.68833977
Step      1: eval          Accuracy |  0.50000000
Step     10: train CrossEntropyLoss |  0.61036736
Step     10: eval  CrossEntropyLoss |  0.52182281
Step     10: eval          Accuracy |  0.68750000
Step     20: train CrossEntropyLoss |  0.34137666
Step     20: eval  CrossEntropyLoss |  0.20654777
Step     20: eval          Accuracy |  1.00000000
Step     30: train CrossEntropyLoss |  0.20208919
Step     30: eval  CrossEntropyLoss |  0.21594885
Step     30: eval          Accuracy |  0.93750000
Step     40: train CrossEntropyLoss |  0.19611199
Step     40: eval  CrossEntropyLoss |  0.17582780
Step     40: eval          Accuracy |  1.00000000
Step     50: train CrossEntropyLoss |  0.11203776
Step     50: eval  CrossEntropyLoss |  0.07589274
Step     50: eval          Accuracy |  1.00000000
Step     60: train CrossEntropyLoss |  0.09375445
Step     60: eval  CrossEntropyLoss |  0.09290722


## Making a prediction

In [22]:
tmp_train_generator = train_generator(16)


tmp_batch = next(tmp_train_generator)

# Position 0 has the model inputs (tweets as tensors)
# position 1 has the targets (the actual labels)
tmp_inputs, tmp_targets, tmp_example_weights = tmp_batch

tmp_pred = training_loop.eval_model(tmp_inputs)
print(f"The prediction shape is {tmp_pred.shape}, num of tensor_tweets as rows")
print("Column 0 is the probability of a negative sentiment (class 0)")
print("Column 1 is the probability of a positive sentiment (class 1)")
print()
print("View the prediction array")
tmp_pred

The prediction shape is (16, 2), num of tensor_tweets as rows
Column 0 is the probability of a negative sentiment (class 0)
Column 1 is the probability of a positive sentiment (class 1)

View the prediction array


DeviceArray([[-4.9417334e+00, -7.1678162e-03],
             [-6.5846405e+00, -1.3823509e-03],
             [-5.4463038e+00, -4.3215752e-03],
             [-4.3487496e+00, -1.3007164e-02],
             [-4.9131699e+00, -7.3764324e-03],
             [-4.7097702e+00, -9.0477467e-03],
             [-5.2801600e+00, -5.1045418e-03],
             [-4.1103230e+00, -1.6538382e-02],
             [-1.8327236e-03, -6.3028107e+00],
             [-4.7376156e-03, -5.3545637e+00],
             [-3.4697056e-03, -5.6654320e+00],
             [-1.1444092e-05, -1.1379559e+01],
             [-1.0051131e-02, -4.6050968e+00],
             [-1.0130405e-03, -6.8951969e+00],
             [-6.1047077e-03, -5.1017342e+00],
             [-7.4422359e-03, -4.9043016e+00]], dtype=float32)

In [23]:
tmp_is_positive = tmp_pred[:,1] > tmp_pred[:,0]
for i, p in enumerate(tmp_is_positive):
    print(f"Neg log prob {tmp_pred[i,0]:.4f}\tPos log prob {tmp_pred[i,1]:.4f}\t is positive? {p}\t actual {tmp_targets[i]}")

Neg log prob -4.9417	Pos log prob -0.0072	 is positive? True	 actual 1
Neg log prob -6.5846	Pos log prob -0.0014	 is positive? True	 actual 1
Neg log prob -5.4463	Pos log prob -0.0043	 is positive? True	 actual 1
Neg log prob -4.3487	Pos log prob -0.0130	 is positive? True	 actual 1
Neg log prob -4.9132	Pos log prob -0.0074	 is positive? True	 actual 1
Neg log prob -4.7098	Pos log prob -0.0090	 is positive? True	 actual 1
Neg log prob -5.2802	Pos log prob -0.0051	 is positive? True	 actual 1
Neg log prob -4.1103	Pos log prob -0.0165	 is positive? True	 actual 1
Neg log prob -0.0018	Pos log prob -6.3028	 is positive? False	 actual 0
Neg log prob -0.0047	Pos log prob -5.3546	 is positive? False	 actual 0
Neg log prob -0.0035	Pos log prob -5.6654	 is positive? False	 actual 0
Neg log prob -0.0000	Pos log prob -11.3796	 is positive? False	 actual 0
Neg log prob -0.0101	Pos log prob -4.6051	 is positive? False	 actual 0
Neg log prob -0.0010	Pos log prob -6.8952	 is positive? False	 actual 0

In [24]:
print("Array of booleans")
display(tmp_is_positive)

# convert boolean to type int32
# True is converted to 1
# False is converted to 0
tmp_is_positive_int = tmp_is_positive.astype(np.int32)


# View the array of integers
print("Array of integers")
display(tmp_is_positive_int)

# convert boolean to type float32
tmp_is_positive_float = tmp_is_positive.astype(np.float32)

# View the array of floats
print("Array of floats")
display(tmp_is_positive_float)

Array of booleans


DeviceArray([ True,  True,  True,  True,  True,  True,  True,  True,
             False, False, False, False, False, False, False, False],            dtype=bool)

Array of integers


DeviceArray([1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

Array of floats


DeviceArray([1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0.,
             0.], dtype=float32)

#  Evaluation  

<a name="5.1"></a>
## Computing the accuracy on a batch

In [25]:
def compute_accuracy(preds, y, y_weights):
    
    is_pos =  preds[:, 1] > preds[:, 0] 
    is_pos_int = is_pos.astype(np.int32)
    
    correct = is_pos_int==y
    
    sum_weights = np.sum(y_weights)
    
    correct_float = correct.astype(np.float32)
    weighted_correct_float = correct_float*y_weights
    weighted_num_correct = np.sum(weighted_correct_float)
 
    accuracy = weighted_num_correct/sum_weights

    return accuracy, weighted_num_correct, sum_weights

## Testing model on Validation Data

- The generator allows you to get batches of data. We can use it with a `for` loop:

```
for batch in iterator: 
   # do something with that batch
```

`batch` has dimensions `(X, Y, weights)`. 
- Column 0 corresponds to the tweet as a tensor (input).
- Column 1 corresponds to its target (actual label, positive or negative sentiment).
- Column 2 corresponds to the weights associated (example weights)
- We can feed the tweet into model and it will return the predictions for the batch. 



In [27]:
def test_model(generator, model):

    accuracy = 0.
    total_num_correct = 0
    total_num_pred = 0
    
    for batch in generator: 

        inputs = batch[0]
        targets = batch[1]
        example_weight = batch[2]

        pred = model(inputs)
        
        batch_accuracy, batch_num_correct, batch_num_pred = compute_accuracy(pred, targets, example_weight)
        
        total_num_correct += batch_num_correct
        
        total_num_pred += batch_num_pred
    accuracy = total_num_correct/total_num_pred
    return accuracy

In [None]:
model = training_loop.eval_model
accuracy = test_model(test_generator(16), model)

print(f'The accuracy of your model on the validation set is {accuracy:.4f}', )