In [1]:
# Import the required libraries.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re, random
from copy import copy,deepcopy
# Download nltk stopwords corpus.
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from sklearn.model_selection import StratifiedShuffleSplit
from nltk import FreqDist
from sklearn.metrics import precision_recall_fscore_support


[nltk_data] Downloading package stopwords to
[nltk_data]     /home/stud/btech/cse/2016/rsrivatsa.cs16/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
random.seed(42)
np.random.seed(42)

In [3]:
class FullyConnected():
    layer_count = 0

    def __init__(self, out_features=100, in_features=100):
        '''
            Fully Connected layer
            out_features: No of Output Features
            no_of_inp_featues: No of Input Features
        '''
        self.out_features = out_features
        self.in_features = in_features
        self.weight = np.random.normal(
            loc=0.0, scale=1/np.sqrt(self.out_features), size=(self.out_features, self.in_features))
        self.bias = np.zeros((self.out_features, 1), dtype=np.float32)
        self.weight_grad = None
        self.bias_grad = None
        self.layer_id = FullyConnected.layer_count
        FullyConnected.layer_count += 1

    def __str__(self, ):
        return 'Fully Connected Layer ID = {}: (In:{}, Out:{})'.format(self.layer_id, self.in_features, self.out_features)

    def forward(self, x):
        '''
            x: Input (N, F_in) where N = No. of samples, F_in: No. of input features
            
            returns:
                Wx + B (N, F_out) where W, B are trainable weight and bias where N: No. of samples, F_out Output dimension/features.
        '''
        assert x.shape[1] == self.in_features, "Input dimension Mismatch"
        wx = np.matmul(self.weight, np.transpose(x, (1, 0)))
        wx_plus_b = np.add(wx, self.bias)
        return np.transpose(wx_plus_b, (1, 0))

    def backward(self, y_grad, y, x):
        '''
            y_grad: Gradiant of output (N, F_out)
            y: Output of layer (N, F_out)
            x: input of layer (N, F_in)
        '''
        grad_x = np.matmul(y_grad, self.weight)

        weight_grad = np.matmul(np.transpose(y_grad, (1, 0)), x)
        if self.weight_grad is not None:
          self.weight_grad += weight_grad
        else:
          self.weight_grad = weight_grad
        assert y_grad.shape == y.shape, "y_grad.shape: {}, y.shape: {}".format(
            y_grad.shape, y.shape)

        bias_grad = np.sum(np.transpose(y_grad, (1, 0)), axis=1, keepdims=True)
        assert bias_grad.shape == self.bias.shape, "bias_grad.shape: {}, self.bias.shape: {}".format(
            bias_grad.shape, self.bias.shape)
        if self.bias_grad is not None:
          self.bias_grad += bias_grad
        else:
          self.bias_grad = bias_grad
        return grad_x

    def apply_gradients(self, lr=0.001):

        self.weight -= lr * self.weight_grad
        self.bias -= lr * self.bias_grad

        convergence_criteria = np.sum(np.absolute(
            self.weight_grad)) + np.sum(np.absolute(self.bias_grad))
        self.weight_grad = None
        self.bias_grad = None
        # deb(convergence_criteria)

        if convergence_criteria < 1e-3:
            return True
        else:
            return False

    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)


class SoftmaxCrossEntropyLoss():
  """
    Combines a softmax layer with the cross-entropy loss
  """
  layer_count = 0

  def __init__(self):
      self.layer_id = SoftmaxCrossEntropyLoss.layer_count
      SoftmaxCrossEntropyLoss.layer_count += 1

  def __str__(self, ):
      return 'Softmax Cross Entropy Loss Layer ID = {}'.format(self.layer_id)

  def forward(self, x, y_true):
    '''
      x: Input (N, C) where N = No. of samples, C = No. of classes
      y_true: Target (N, C), boolean values
    '''
    # Softmax
    x_stable = x - np.max(x, axis=1, keepdims=True)
    p = np.exp(x_stable) / np.sum(np.exp(x_stable), axis=1, keepdims=True)

    # Cross Entropy Loss
    loss = y_true * \
        (- x_stable + np.log(np.sum(np.exp(x_stable), axis=1, keepdims=True)))
    loss = np.mean(np.sum(loss, axis=-1))
    return p, loss

  def backward(self, y_pred, y_true):
    '''
      y_pred: (N, C)
      y_true: (N, C)
    '''
    # Using y_pred.shape[0] as a workaround for batch_size.
    return (y_pred - y_true) / y_pred.shape[0]

  def __call__(self, *args, **kwargs):
    return self.forward(*args, **kwargs)

  def apply_gradients(self, lr=None):
    return True


class Sigmoid():
  """
  Class for implementing sigmoid layer.
  """
  layer_count = 0

  def __init__(self):
    self.layer_id = Sigmoid.layer_count
    Sigmoid.layer_count += 1

  def __str__(self, ):
      return 'Sigmoid Layer ID = {}'.format(self.layer_id)

  def forward(self, x):
    """
    Function for implemeting the sigmoid expression.

    x: Input(N, C) where N = number of samples, C = number of classes.
    """
    y = 1.0 / (1.0 + np.exp(-x))
    return y

  def backward(self, y_grad, y, x):
    """
    Function for calculating the gradients of the current sigmoid perceptron.

    y_grad: Gradient at the output.
    y: The calculated forward(x).
    """
    w_grad = y_grad * y * (1 - y)
    return w_grad

  def __call__(self, *args, **kwargs):
    return self.forward(*args, **kwargs)

  def apply_gradients(self, lr=None):
    return True


class Tanh():
  """
  Class for implementing tanh layer.
  """
  layer_count = 0

  def __init__(self):
    self.layer_id = Tanh.layer_count
    Tanh.layer_count += 1

  def __str__(self, ):
      return 'Tanh Layer ID = {}'.format(self.layer_id)

  def forward(self, x):
    """
    Function for implemeting the tanh expression.

    x: Input(N, C) where N = number of samples, C = number of classes.
    """
    y = np.tanh(x)
    return y

  def backward(self, y_grad, y, x):
    """
    Function for calculating the gradients of the current sigmoid perceptron.

    y_grad: Gradient at the output.
    y: The calculated forward(x).
    """
    w_grad = y_grad * (1 - (y ** 2))
    return w_grad

  def __call__(self, *args, **kwargs):
    return self.forward(*args, **kwargs)

  def apply_gradients(self, lr=None):
    return True


class SoftmaxLayer():
  """
  Class implementing a softmax layer
  """
  layer_count = 0

  def __init__(self):
    self.layer_id = SoftmaxLayer.layer_count
    SoftmaxLayer.layer_count += 1

  def __str__(self, ):
      return 'Softmax Layer ID = {}'.format(self.layer_id)

  def forward(self, x):
    '''
      x: Input (N, C) where N = No. of samples, C = No. of classes
    '''
    # Softmax
    x_stable = x - np.max(x, axis=1, keepdims=True)
    p = np.exp(x_stable) / np.sum(np.exp(x_stable), axis=1, keepdims=True)

    return p

  def backward(self, y_grad, y, x):
    '''
    y_grad: Gradient at output
    y: (N, C)
    '''
    raise NotImplementedError(
        "Backprop not implemented for SoftmaxLayer, please use SoftmaxCrossEntropyLoss instead.")

  def __call__(self, *args, **kwargs):
    return self.forward(*args, **kwargs)

  def apply_gradients(self, lr=None):
    return True


class SigmoidCrossEntropyLoss():
  """
    Combines a sigmoid layer with the cross-entropy loss.
  """
  layer_count = 0

  def __init__(self):
    self.layer_id = SigmoidCrossEntropyLoss.layer_count
    SigmoidCrossEntropyLoss.layer_count += 1

  def __str__(self, ):
    return 'Softmax Cross Entropy Loss Layer ID = {}'.format(self.layer_id)

  def forward(self, x, y_true):
    '''
      x: Input (N, C) where N = No. of samples, C = No. of classes
      y_true: Target (N, C), boolean values
    '''
    # Sigmoid
    p = 1.0 / (1.0 + np.exp(-x))

    # Cross Entropy Loss
    x_stable = np.maximum(x, 0)
    loss = x_stable - x * y_true + np.log(1 + np.exp(np.absolute(x)))
    loss = np.mean(np.sum(loss, axis=-1))
    return p, loss

  def backward(self, y_pred, y_true):
    '''
      y_pred: (N, C)
      y_true: (N, C)
    '''
    # Using y_pred.shape[0] as a workaround for batch_size.
    return (y_pred - y_true) / y_pred.shape[0]

  def __call__(self, *args, **kwargs):
    return self.forward(*args, **kwargs)

  def apply_gradients(self, lr=None):
    return True


class SigmoidMSELoss():
  """
    Combines a sigmoid layer with the mean squared error loss.
  """
  layer_count = 0

  def __init__(self):
    self.layer_id = SigmoidMSELoss.layer_count
    SigmoidMSELoss.layer_count += 1

  def __str__(self, ):
      return 'Sigmoid Mean Squared Error Loss Layer ID = {}'.format(self.layer_id)

  def forward(self, x, y_true):
    '''
      x: Input (N, C) where N = No. of samples, C = No. of classes
      y_true: Target (N, C), boolean values
    '''
    # Sigmoid
    p = 1.0 / (1.0 + np.exp(-x))

    # Mean Squared Error Loss
    squared_error = (y_true - p) ** 2
    assert len(squared_error.shape) == 2
    loss = (np.sum(squared_error, axis=-1)) / 2
    loss = np.mean(loss, axis=-1)
    return p, loss

  def backward(self, y_pred, y_true):
    '''
      y_pred: (N, C)
      y_true: (N, C)
    '''
    # Using y_pred.shape[0] as a workaround for batch_size.
    return -(y_true - y_pred) * y_pred * (1 - y_pred) / y_pred.shape[0]

  def __call__(self, *args, **kwargs):
    return self.forward(*args, **kwargs)

  def apply_gradients(self, lr=None):
    return True


class MultiLayerPerceptron():
  def __init__(self, layers, loss_layer, final_layer):
    self.layers = layers
    self.loss_layer = loss_layer
    self.final_layer = final_layer
    self.all_outputs = []
    self.train = True

  def __str__(self, ):
      summary = []
      for layer in self.layers:
          summary.append(str(layer))
      if self.final_layer is not None:
          summary.append('Final Layer: ' + str(self.final_layer))
      if self.loss_layer is not None:
        summary.append('Loss Layer: ' + str(self.loss_layer))
      return ' \n'.join(summary)

  def forward(self, x, y_true=None):
    # Training mode
    if self.train:
      assert (self.loss_layer is not None) and (
          y_true is not None), "Training mode, please pass y_true and set a loss layer"
      all_outputs = [x]

      for layer in self.layers:
        #print(layer, x.shape, end=', ')
        x = layer(x)
        all_outputs.append(x)
        #print(x.shape)
        # deb(layer)

      pred, loss = self.loss_layer(x, y_true)
      all_outputs.append((pred, y_true))

      # deb(len(all_outputs))
      # Save outputs for multiple forward passes
      self.all_outputs.append(all_outputs)

    # Evaluation mode
    else:
      assert (self.final_layer is not None) and (
          y_true is None), "Evaluation mode, doesn't take y_true as input"
      all_outputs = [x]

      for layer in self.layers:
        x = layer(x)
        all_outputs.append(x)

      pred = self.final_layer(x)
      loss = None
      all_outputs.append((pred, y_true))

    return all_outputs, loss

  def backward(self):
      assert self.train

      for all_outputs in self.all_outputs[::-1]:
        y_grad = self.loss_layer.backward(*all_outputs[-1])
        all_outputs = all_outputs[:-1]
        for idx, layer in enumerate(self.layers[::-1]):
          # Pass the grad at the output, output of the layer, the input of the layer to backward()
          y_grad = layer.backward(
              y_grad, all_outputs[-idx-1], all_outputs[-idx-2])

      self.all_outputs = []
      return

  def train_mode(self):
      '''
      Set train mode
      '''
      self.train = True

  def eval_mode(self):
      '''
      Set eval mode
      '''
      self.train = False

  def optimize(self, learning_rate):
      '''
      Optimizes
      '''
      conv = True
      for layer in self.layers:
        conv = layer.apply_gradients(learning_rate) and conv
      return conv


In [4]:
def tester(model, x, y_true):
    model.eval_mode()
    out, _ = model.forward(x)
    model.train_mode()
    predictions = out[-1][0] > 0.5
    ground_truth = y_true > 0.5
    acc = np.mean(predictions == ground_truth)
    precision, recall, fscore, _ = precision_recall_fscore_support(
        ground_truth, predictions, average='weighted')
    return acc, precision, recall, fscore

In [5]:
random.seed(42)
np.random.seed(42)

In [6]:
deb = print

In [7]:
# Read the file line by line and clean the text of punctuation.

with open('pos_sentiment.txt', 'r') as file:
    data = file.readlines()
    data = [re.sub(r'([^\w\s]|[0-9])', ' ', line) for line in data]
    data_pos = [re.sub(r'(\s+)', ' ', line) for line in data]

with open('neg_sentiment.txt', 'r') as file:
    data = file.readlines()
    data = [re.sub(r'([^\w\s]|[0-9])', ' ', line) for line in data]
    data_neg = [re.sub(r'(\s+)', ' ', line) for line in data]

In [8]:
y_pos = [1]*len(data_pos)
y_neg= [0]*len(data_neg)

In [9]:
data_x = data_pos + data_neg
data_y = y_pos + y_neg

In [10]:
data_x = [sent.strip().split() for sent in data_x]

In [11]:
stop_words = set(stopwords.words('english'))
stop_words.add('')

data_x = [[w for w in words if w not in stop_words] for words in data_x]

In [12]:
# Function for generating the bag-of-words vocabulary.
def get_vocab(XData):
    vocab = set()

    for line in XData:
        for word in line:
            vocab.add(word)
            
    return vocab

In [13]:
all_tokens = sum(data_x,[])

In [14]:
fd = FreqDist(all_tokens)

In [15]:
vocab =[a[0] for a in fd.most_common() if a[1] >= 2]

In [16]:
len(vocab)

7400

In [17]:
word2idx = { w:i for i, w in enumerate(vocab) }

In [18]:
def vectorize(x_data, w2id):
    x_vec = np.zeros((len(x_data), len(w2id)), dtype=np.float32)
    for i,sent in enumerate(x_data):
        for word in sent:
            if word in w2id.keys():
                x_vec[i, w2id[word]] += 1
    return x_vec

In [19]:
data_x_vec = vectorize(data_x, word2idx)

In [20]:
data_y = np.expand_dims(np.asarray(data_y, dtype=np.float32), axis=1)

In [21]:
data_y.shape

(6662, 1)

In [22]:
train_val_idx, test_idx = next(StratifiedShuffleSplit(n_splits=1,test_size=0.2,random_state=0).split(data_x_vec, data_y))

In [23]:
train_idx, valid_idx = next(StratifiedShuffleSplit(n_splits=1,test_size=0.2,random_state=0).split(train_val_idx,data_y[train_val_idx]))

In [24]:
train_idx = train_val_idx[train_idx]
valid_idx = train_val_idx[valid_idx]

In [25]:
x_train, x_val, x_test = data_x_vec[train_idx], data_x_vec[valid_idx], data_x_vec[test_idx]
y_train, y_val, y_test = data_y[train_idx], data_y[valid_idx], data_y[test_idx]

In [26]:
layers = [
    FullyConnected(48, 7400),
    Tanh(),
    FullyConnected(8, 48),
    Tanh(),
    FullyConnected(1, 8),
]

mymlp = MultiLayerPerceptron(layers, SigmoidCrossEntropyLoss(), Sigmoid())


In [27]:
print(mymlp)

Fully Connected Layer ID = 0: (In:7400, Out:48) 
Tanh Layer ID = 0 
Fully Connected Layer ID = 1: (In:48, Out:8) 
Tanh Layer ID = 1 
Fully Connected Layer ID = 2: (In:8, Out:1) 
Final Layer: Sigmoid Layer ID = 0 
Loss Layer: Softmax Cross Entropy Loss Layer ID = 0


In [28]:
# losses_run = []
# for epno in range(5000):
#     mymlp.train_mode()
#     # Take forward step
#     _, loss = mymlp.forward(x_train, y_train)
#     # Make backward pass accumulating gradients
#     mymlp.backward()
#     # Take an optimization step updating the weights with gradients 
#     _ = mymlp.optimize(0.475)
#     # Print the loss
#     print('Loss', loss)
#     losses_run.append(loss)
#     if epno % 5 == 0:
#         mymlp.eval_mode()
#         out, _ = mymlp.forward(x_val)
#         out = (np.asarray(out) > 0.5)
#         mymlp.train_mode()
#         print('Accuracy {} : {}'.format(np.mean(out == (y_val > 0.5) ), epno)) 
# #         print()


In [29]:
best_mlp = None
best_f1 = 0.0
losses = []
batch_size = 16
n_batch = x_train.shape[0] // batch_size
if x_train.shape[0] % batch_size != 0:
    n_batch+=1
best_epno = -1
for epoch in range(50):
    mymlp.train_mode()
    for i in range(n_batch):
        _, loss = mymlp.forward(x_train[i*batch_size:(i+1)*batch_size ], 
                                y_train[i*batch_size:(i+1)*batch_size ])
        mymlp.backward()
        _ = mymlp.optimize(1)

    train_acc, train_prec, train_rec, train_f1 = tester(
        mymlp, x_train, y_train)
    print("Epoch {} train_acc: {}, train_prec: {}, train_rec: {}, train_f1: {}".format(
        epoch, train_acc, train_prec, train_rec, train_f1))
    if epoch % 1 == 0:
        val_acc, val_prec, val_rec, val_f1 = tester(
            mymlp, x_val, y_val)
        print("Epoch {} val_acc: {}, val_prec: {}, val_rec: {}, val_f1: {}".format(
            epoch, val_acc, val_prec, val_rec, val_f1))

        test_acc, test_prec, test_rec, test_f1 = tester(
            mymlp, x_test, y_test)
        print("Epoch {} test_acc: {}, test_prec: {}, test_rec: {}, test_f1: {}".format(
            epoch, test_acc, test_prec, test_rec, test_f1))

        if val_f1 > best_f1:
            best_mlp = deepcopy(mymlp)
            best_f1 = val_f1
            print("Saving as best model...")
            best_epno = epoch
#         if best_epno + 30 < epoch:
#             print('Validation not improving')
#             break
            
    
    print('Loss', loss)
    losses.append(loss)


Epoch 0 train_acc: 0.6685432793807178, train_prec: 0.7189520310368555, train_rec: 0.6685432793807178, train_f1: 0.6482672850491638
Epoch 0 val_acc: 0.6022514071294559, val_prec: 0.644415720002983, val_rec: 0.6022514071294559, val_f1: 0.5709333100439334
Epoch 0 test_acc: 0.5918979744936234, test_prec: 0.6265236292935525, test_rec: 0.5918979744936234, test_f1: 0.5621476797770871
Saving as best model...
Loss 1.8296735334769134
Epoch 1 train_acc: 0.8336851982172179, train_prec: 0.8369836876223256, train_rec: 0.8336851982172179, train_f1: 0.8332748108950534
Epoch 1 val_acc: 0.6782363977485929, val_prec: 0.6824500590931366, val_rec: 0.6782363977485929, val_f1: 0.6763678366429308
Epoch 1 test_acc: 0.6744186046511628, test_prec: 0.6775453329254992, test_rec: 0.6744186046511628, test_f1: 0.673009785916965
Saving as best model...
Loss 2.91241250402792
Epoch 2 train_acc: 0.8669950738916257, train_prec: 0.8719869071465749, train_rec: 0.8669950738916257, train_f1: 0.8665452339816178
Epoch 2 val_acc

In [30]:
best_f1

0.7157395876878396

In [34]:
test_acc, test_prec, test_rec, test_f1 = tester(
            mymlp, x_test, y_test)
print("Epoch {} test_acc: {}, test_prec: {}, test_rec: {}, test_f1: {}".format(
            epoch, test_acc, test_prec, test_rec, test_f1))

Epoch 49 test_acc: 0.7029257314328582, test_prec: 0.7032751478503576, test_rec: 0.7029257314328582, test_f1: 0.7027899106070267


In [35]:
lrs = [0.5, 1, 3]
lr_losses = []
for lr in lrs:
    layers = [
    FullyConnected(48, 7400),
    Tanh(),
    FullyConnected(8, 48),
    Tanh(),
    FullyConnected(1, 8),
    ]
    mymlp = MultiLayerPerceptron(layers, SigmoidCrossEntropyLoss(), Sigmoid())
    best_mlp = None
    best_f1 = 0.0
    losses = []
    batch_size = 16
    for epoch in range(50):
        mymlp.train_mode()
        for i in range(n_batch):
            _, loss = mymlp.forward(x_train[i*batch_size:(i+1)*batch_size ], 
                                    y_train[i*batch_size:(i+1)*batch_size ])
            mymlp.backward()
            _ = mymlp.optimize(lr)

        train_acc, train_prec, train_rec, train_f1 = tester(
            mymlp, x_train, y_train)
        print("Epoch {} train_acc: {}, train_prec: {}, train_rec: {}, train_f1: {}".format(
            epoch, train_acc, train_prec, train_rec, train_f1))
        if epoch % 1 == 0:
            val_acc, val_prec, val_rec, val_f1 = tester(
                mymlp, x_val, y_val)
            print("Epoch {} val_acc: {}, val_prec: {}, val_rec: {}, val_f1: {}".format(
                epoch, val_acc, val_prec, val_rec, val_f1))

            test_acc, test_prec, test_rec, test_f1 = tester(
                mymlp, x_test, y_test)
            print("Epoch {} test_acc: {}, test_prec: {}, test_rec: {}, test_f1: {}".format(
                epoch, test_acc, test_prec, test_rec, test_f1))

            if val_f1 > best_f1:
                best_mlp = deepcopy(mymlp)
                best_f1 = val_f1
                print("Saving as best model...")
                best_epno = epoch
    #         if best_epno + 30 < epoch:
    #             print('Validation not improving')
    #             break


        print('Loss', loss)
        losses.append(loss)
    lr_losses.append(losses)


Epoch 0 train_acc: 0.6875439831104856, train_prec: 0.7377864619684348, train_rec: 0.6875439831104856, train_f1: 0.6700904756681132
Epoch 0 val_acc: 0.6050656660412758, val_prec: 0.6455091455091455, val_rec: 0.6050656660412758, val_f1: 0.5755739340549181
Epoch 0 test_acc: 0.582145536384096, test_prec: 0.6186598590120247, test_rec: 0.582145536384096, test_f1: 0.5475780656653595
Saving as best model...
Loss 1.2391771322058707
Epoch 1 train_acc: 0.8501055594651654, train_prec: 0.8541902325306051, train_rec: 0.8501055594651654, train_f1: 0.8496698628326832
Epoch 1 val_acc: 0.6960600375234521, val_prec: 0.7026335798714315, val_rec: 0.6960600375234521, val_f1: 0.6935748886562451
Epoch 1 test_acc: 0.6669167291822956, test_prec: 0.6716318173826157, test_rec: 0.6669167291822956, test_f1: 0.6646540377853704
Saving as best model...
Loss 1.9602254107489698
Epoch 2 train_acc: 0.8937368050668544, train_prec: 0.8987475679563564, train_rec: 0.8937368050668544, train_f1: 0.8934003399598865
Epoch 2 val_a

In [38]:
offset = 15
lr_losses = np.asarray(lr_losses)
lr_losses = offset - lr_losses


In [39]:
for lr, run in zip(lrs, lr_losses):
    plt.plot(run, label='lr={}'.format(lr))
    plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Cross Entropy Error')
plt.savefig('sentiment_loss_vs_lr.png')
plt.clf()