# Embedding

In [103]:
import csv
import math
import re
from abc import abstractmethod, ABC
from collections import Counter

import numpy as np

np.random.seed(99)

## Foundation

### Tensor

In [104]:
class Tensor:

    def __init__(self, data):
        self.data = np.array(data)
        self.grad = None
        self.gradient_fn = lambda: None
        self.parents = set()

    def backward(self):
        if self.gradient_fn:
            self.gradient_fn()

        for p in self.parents:
            p.backward()

    def shape(self):
        return self.data.shape

    def size(self):
        return np.prod(self.data.shape[1:])

    def __str__(self):
        return str(self.data)

### Base Dataset

In [105]:
class Dataset(ABC):

    def __init__(self, batch_size=1):
        self.batch_size = batch_size
        self.load()
        self.train()

    @abstractmethod
    def load(self):
        pass

    def train(self):
        self.features = self.train_features
        self.labels = self.train_labels

    def eval(self):
        self.features = self.test_features
        self.labels = self.test_labels

    def shape(self):
        return Tensor(self.features).size(), Tensor(self.labels).size()

    def items(self):
        return Tensor(self.features), Tensor(self.labels)

    def __len__(self):
        return len(self.features) // self.batch_size

    def __getitem__(self, index):
        start = index * self.batch_size
        end = start + self.batch_size
        return Tensor(self.features[start: end]), Tensor(self.labels[start: end])

    @abstractmethod
    def estimate(self, predictions):
        pass

### Base Layer

In [106]:
class Layer(ABC):

    def __init__(self):
        self.training = True

    def __call__(self, x: Tensor):
        return self.forward(x)

    def train(self):
        self.training = True

    def eval(self):
        self.training = False

    @abstractmethod
    def forward(self, x: Tensor):
        pass

    def parameters(self):
        return []

    def __str__(self):
        return ''

### Base Loss Function

In [107]:
class Loss(ABC):

    @abstractmethod
    def __call__(self, p: Tensor, y: Tensor):
        pass

### Base Optimizer

In [108]:
class Optimizer(ABC):

    def __init__(self, parameters, lr):
        self.parameters = parameters
        self.lr = lr

    @abstractmethod
    def backward(self):
        pass

### Base Model

In [109]:
class Model(ABC):

    def __init__(self, layer, loss, optimizer):
        self.layer = layer
        self.loss = loss
        self.optimizer = optimizer

    @abstractmethod
    def train(self, dataset, epochs):
        pass

    @abstractmethod
    def test(self, dataset):
        pass

## Data

### RNN Dataset

In [110]:
class RNNDataset(Dataset):

    def __init__(self, filename):
        self.filename = filename
        super().__init__()

    def load(self):
        self.reviews = []
        self.sentiments = []
        with open(self.filename, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            next(reader)
            for _, row in enumerate(reader):
                self.reviews.append(row[0])
                self.sentiments.append(row[1])

        split_reviews = []
        for line in self.reviews:
            split_reviews.append(self.clean_text(line.lower()).split())

        self.vocabulary = set(word for line in split_reviews for word in line)
        self.word2index = {word: index for index, word in enumerate(self.vocabulary)}
        self.index2word = {index: word for index, word in enumerate(self.vocabulary)}
        self.tokens = [[self.word2index[word] for word in line if word in self.word2index] for line in split_reviews]

    @staticmethod
    def clean_text(text):
        text = re.sub(r'<[^>]+>', '', text)
        text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
        return text

    def train(self):
        self.features = [list(set(index)) for index in self.tokens[:-10]]
        self.labels = [0 if index == "negative" else 1 for index in self.sentiments[:-10]]

    def eval(self):
        self.features = [list(set(index)) for index in self.tokens[-10:]]
        self.labels = [0 if index == "negative" else 1 for index in self.sentiments[-10:]]

    def encode(self, text):
        words = self.clean_text(text.lower()).split()
        return [self.word2index[word] for word in words]

    def decode(self, tokens):
        return " ".join([self.index2word[index] for index in tokens])

    def estimate(self, predictions):
        count = 0
        for i in range(len(predictions)):
            if np.abs(predictions[i].data - self.labels[i]) < 0.5:
                count += 1
        return count / len(predictions)

## Model

### Linear Layer

In [111]:
class Linear(Layer):

    def __init__(self, in_size, out_size):
        super().__init__()
        self.weight = Tensor(np.random.rand(out_size, in_size) / in_size)
        self.bias = Tensor(np.zeros(out_size))

    def forward(self, x: Tensor):
        p = Tensor(x.data @ self.weight.data.T + self.bias.data)

        def gradient_fn():
            self.weight.grad = p.grad.T @ x.data / len(x.data)
            self.bias.grad = np.sum(p.grad, axis=0) / len(x.data)
            x.grad = p.grad @ self.weight.data

        p.gradient_fn = gradient_fn
        p.parents = {self.weight, self.bias, x}
        return p

    def parameters(self):
        return [self.weight, self.bias]

    def __str__(self):
        return f'weight: {self.weight}\nbias: {self.bias}'

### Sequential Layer

In [112]:
class Sequential(Layer):

    def __init__(self, layers):
        super().__init__()
        self.layers = layers

    def train(self):
        for l in self.layers:
            l.train()

    def eval(self):
        for l in self.layers:
            l.eval()

    def forward(self, x: Tensor):
        for l in self.layers:
            x = l(x)
        return x

    def parameters(self):
        return [p for l in self.layers for p in l.parameters()]

    def __str__(self):
        return '\n'.join(str(l) for l in self.layers if str(l))

### Embedding Layer

In [113]:
class Embedding(Layer):

    def __init__(self, vocabulary_size, embedding_size, axis=1):
        super().__init__()
        self.vocabulary_size = vocabulary_size
        self.embedding_size = embedding_size
        self.axis = axis

        self.weight = Tensor(np.random.rand(embedding_size, vocabulary_size) / vocabulary_size)

    def forward(self, x: Tensor):
        p = Tensor(np.sum(self.weight.data.T[x.data], axis=self.axis))

        def gradient_fn():
            if self.weight.grad is None:
                self.weight.grad = np.zeros_like(self.weight.data)
            self.weight.grad.T[x.data] += p.grad

        p.gradient_fn = gradient_fn
        p.parents = {self.weight}
        return p

    def parameters(self):
        return [self.weight]

### ReLU Activation Function

In [114]:
class ReLU(Layer):

    def forward(self, x: Tensor):
        p = Tensor(np.maximum(0, x.data))

        def gradient_fn():
            x.grad = (p.data > 0) * p.grad

        p.gradient_fn = gradient_fn
        p.parents = {x}
        return p

### Tanh Activation Function

In [115]:
class Tanh(Layer):

    def forward(self, x: Tensor):
        p = Tensor(np.tanh(x.data))

        def gradient_fn():
            x.grad = p.grad * (1 - p.data ** 2)

        p.gradient_fn = gradient_fn
        p.parents = {x}
        return p

### Sigmoid Activation Function

In [116]:
class Sigmoid(Layer):

    def __init__(self, clip_range=(-100, 100)):
        super().__init__()
        self.clip_range = clip_range

    def forward(self, x: Tensor):
        z = np.clip(x.data, self.clip_range[0], self.clip_range[1])
        p = Tensor(1 / (1 + np.exp(-z)))

        def gradient_fn():
            x.grad = p.grad * p.data * (1 - p.data)

        p.gradient_fn = gradient_fn
        p.parents = {x}
        return p

### Softmax Activation Function

In [117]:
class Softmax(Layer):

    def __init__(self, axis=1):
        super().__init__()
        self.axis = axis

    def forward(self, x: Tensor):
        exp = np.exp(x.data - np.max(x.data, axis=self.axis, keepdims=True))
        p = Tensor(exp / np.sum(exp, axis=self.axis, keepdims=True))

        def gradient_fn():
            x.grad = np.zeros_like(x.data)
            for idx in range(x.data.shape[0]):
                itm = p.data[idx].reshape(-1, 1)
                x.grad[idx] = (np.diagflat(itm) - itm @ itm.T) @ p.grad[idx]

        p.gradient_fn = gradient_fn
        p.parents = {x}
        return p

### MSE Loss Function

In [118]:
class MSELoss(Loss):

    def __call__(self, p: Tensor, y: Tensor):
        mse = Tensor(((y.data - p.data) ** 2).mean())

        def gradient_fn():
            p.grad = -2 * (y.data - p.data)

        mse.gradient_fn = gradient_fn
        mse.parents = {p}
        return mse

### Cross Entropy Loss Function

In [119]:
class CELoss:

    def __call__(self, p: Tensor, y: Tensor):
        exp = np.exp(p.data - np.max(p.data, axis=-1, keepdims=True))
        softmax = exp / np.sum(exp, axis=-1, keepdims=True)

        log = np.log(np.clip(softmax, 1e-10, 1))
        ce = Tensor(0 - np.sum(y.data * log) / len(p.data))

        def gradient_fn():
            p.grad = (softmax - y.data) / len(p.data)

        ce.gradient_fn = gradient_fn
        ce.parents = {p}
        return ce

### Binary Cross Entropy Loss Function

In [120]:
class BCELoss:

    def __call__(self, p: Tensor, y: Tensor):
        clipped = np.clip(p.data, 1e-7, 1 - 1e-7)
        bce = Tensor(-np.mean(y.data * np.log(clipped)
                              + (1 - y.data) * np.log(1 - clipped)))

        def gradient_fn():
            p.grad = (clipped - y.data) / (clipped * (1 - clipped)) * len(p.data)

        bce.gradient_fn = gradient_fn
        bce.parents = {p}
        return bce

### SGD Optimizer

In [121]:
class SGDOptimizer(Optimizer):

    def backward(self):
        for p in self.parameters:
            p.data -= p.grad * self.lr

### Recurrent Neural Network Model

In [122]:
class RNNModel(Model):

    def train(self, dataset, epochs):
        layer.train()
        dataset.train()

        for epoch in range(epochs):
            for i in range(len(dataset)):
                features, labels = dataset[i]

                predictions = self.layer(features)
                error = self.loss(predictions,
                                  labels)

                error.backward()
                self.optimizer.backward()

    def test(self, dataset):
        layer.eval()
        dataset.eval()

        predictions = []
        for i in range(len(dataset)):
            feature, label = dataset[i]
            predictions.append(self.layer(feature))
        return predictions

## Configuration

### Learning Rate

In [123]:
LEARNING_RATE = 0.01

### Epoch

In [124]:
EPOCHS = 10

## Training

### Iteration Training

In [125]:
dataset = RNNDataset('reviews.csv')
layer = Sequential([Embedding(len(dataset.vocabulary), 64),
                    ReLU(),
                    Linear(64, 16),
                    ReLU(),
                    Linear(16, 1),
                    Sigmoid()])
loss = BCELoss()
optimizer = SGDOptimizer(layer.parameters(),
                         lr=LEARNING_RATE)

model = RNNModel(layer,
                 loss,
                 optimizer)
model.train(dataset,
            EPOCHS)

## Testing

### Estimating

In [126]:
predictions = model.test(dataset)

print(f'Accuracy: {dataset.estimate(predictions)}')

Accuracy: 1.0


### Comparing Word Similarity

In [127]:
def similar(dataset, layer, target='excellent'):
    target_index = dataset.word2index[target]
    scores = Counter()

    for word, index in dataset.word2index.items():
        raw_diff = layer.layers[0].weight.data.T[index] - layer.layers[0].weight.data.T[target_index]
        squared_diff = raw_diff ** 2
        scores[word] = -math.sqrt(sum(squared_diff))

    return scores.most_common(10)


print(similar(dataset, layer, target='excellent'))
print(similar(dataset, layer, target='terrible'))

[('excellent', -0.0), ('fantastic', -0.7996450263147732), ('recommend', -1.3319582519781423), ('enjoyed', -1.9325885047969322), ('amazing', -2.0823739547611595), ('perfect', -2.3507312470353594), ('loved', -2.6569631234869493), ('this', -2.666405620506408), ('great', -3.1031659645224527), ('effect', -3.2448951780812227)]
[('terrible', -0.0), ('hated', -0.8460366283470147), ('awful', -2.062133581933607), ('boring', -2.3486382987499574), ('poor', -2.5547589972166915), ('of', -5.562361020199796), ('horrible', -6.150073773743111), ('mine', -8.11987270553488), ('her', -8.947550998475457), ('time', -8.949494493433342)]
