Sentiment Classification 
Accuarcy : ~77%

In [None]:
import nltk
nltk.download('twitter_samples')
!pip install -U trax

In [None]:
from nltk.corpus import twitter_samples
import trax
import trax.fastmath as tnp
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import os
import random

Load the data

In [None]:
def load_data():
  all_positive_tweets = twitter_samples.strings('positive_tweets.json')
  all_negative_tweets = twitter_samples.strings('negative_tweets.json')
  return all_positive_tweets,all_negative_tweets

Split data into training and testing

In [None]:
positive_tweets ,negative_tweets = load_data()
pos_tweet_train = positive_tweets[:3000]
neg_tweet_train = negative_tweets[:3000]
pos_tweet_val = positive_tweets[3000:]
neg_tweet_val = negative_tweets[3000:]


Tokenize and create Word Vocabulary

In [None]:
all_train = pos_tweet_train + neg_tweet_train
random.shuffle(all_train)
tokenizer = Tokenizer(num_words=1000,oov_token='<OOV>')
tokenizer.fit_on_texts(all_train)
word_index = tokenizer.word_index

In [None]:
pos_seq_train = tokenizer.texts_to_sequences(pos_tweet_train)
neg_seq_train = tokenizer.texts_to_sequences(neg_tweet_train)
pos_seq_val = tokenizer.texts_to_sequences(pos_tweet_val)
neg_seq_val = tokenizer.texts_to_sequences(neg_tweet_val)

Data Generator for Training and Evaluation

In [None]:
def data_gen(pos,neg,batch_size,epoch_num):
    assert batch_size %2 ==0
    
    pos_index_line = list(range(len(pos)))
    neg_index_line = list(range(len(neg)))
    pos = pad_sequences(pos,maxlen = 25,padding = 'post')
    neg = pad_sequences(neg,maxlen = 25,padding = 'post')
    batch = []
    n_to_take = batch_size //2
    
            
    for epoch in range(epoch_num):
        random.shuffle(pos_index_line)
        random.shuffle(neg_index_line)
        pos_tmp = []
        neg_tmp =[]
        index = 0
        
        while index < len(pos) or index < len(neg):
            for i in range(n_to_take):
                pos_tmp.append(pos[pos_index_line[index]])
                neg_tmp.append(neg[neg_index_line[index]])
                index += 1
             
            input_ = np.concatenate((pos_tmp,neg_tmp),axis=0)
            target = np.append(np.ones(len(pos_tmp)) , np.zeros(len(neg_tmp)) )
            example_weights = np.ones_like(target)
            yield np.array(input_) , target, example_weights

def train_data_gen(batch_size,epoch):
    return data_gen(pos_seq_train,neg_seq_train,batch_size,epoch)
   


def val_data_gen(batch_size,epoch):
    return data_gen(pos_seq_val,neg_seq_val,batch_size,epoch)
  

Create Model

In [None]:
from trax import layers as tl

model = tl.Serial(
    tl.Embedding(vocab_size = len(word_index),d_feature=500),
    tl.Mean(axis = 1),
    tl.Dense(50),
    tl.Dense(50),
    tl.Dense(20),
    tl.Dense(2),
    tl.LogSoftmax()
)

ouput directory for saving model

In [None]:
!mkdir output_dir

Training Loop

In [None]:
from trax.supervised import training

train_task = training.TrainTask(
    labeled_data = train_data_gen(300,100),
    loss_layer = tl.CrossEntropyLoss(),
    optimizer = trax.optimizers.Adam(0.0001),
    n_steps_per_checkpoint = 50,
)

eval_task = training.EvalTask(
    labeled_data = val_data_gen(20,100),
    metrics = [tl.CrossEntropyLoss() , tl.Accuracy()],
    n_eval_batches = 20,
)

output_dir = os.path.expanduser('~/output_dir/')

training_loop = training.Loop(
  model,
  train_task,
  eval_tasks = [eval_task],
  output_dir= output_dir
)

In [None]:
output_dir

In [None]:
training_loop.run(600)