In [1]:
import re
import json
import math
import pickle

import numpy as np
from matplotlib import pyplot as plt

In [2]:
# defining data parsing functions

def find_dataset(path):
    with open(path) as file:
        data = json.load(file)
        return data

training_sentences = []
training_labels = []
labels = []
responses = []

def prep_data(path):
    """
    :returns: training_sentences, training_labels, responses(list), labels, num_classes
    """
    data = find_dataset(path=path)

    for intent in data['intents']:
        for pattern in intent['patterns']:
            training_sentences.append(pattern)
            training_labels.append(intent['tag'])
        responses.append(intent['responses'])
        
        if intent['tag'] not in labels:
            labels.append(intent['tag'])

    num_classes = len(labels)

    return training_sentences, training_labels, responses, labels, num_classes, data

In [3]:
training_sentences, training_labels, responses, labels, num_classes, data = prep_data('data/data.json')

In [4]:
num_classes

20

In [95]:
# defining Preprocessing Class

class Preprocess:
    """
    Preprocessing Module by Shakhyar, github: https://github.com/shakhyar
    
    Preprocessing class like tensorflow's preprocessing module
    This Preprocessing module doesn't take the help of any third party libraries
    so you have to convert your x and y vector to numpy array later
    
    All methods accept a 2D python list and not numpy array for simplicity
    
    If you want to encode a 1D Y vector to its indexes, use the encode_y_to_idx() method
    
    Propoer preprocessing method for text data:
    1: tokenize
    2: stem/lemmanize
    3: encode
    4: pad
    
    #!NOTE:store the max padding length from _pad2d(), and pass it to _pad1d during inference
    #! to match the training shape(but put the output value inside [] if trained on 2d data)
    """
    def _tokenize2d(self, arr):
        """
        expects a 2d array
        [
        ['s', 'e', 'r']
        ]
        """
        self.x = [sentence.split() for sentence in arr]
        return self.x
    
    
    def _pad2d(self, arr, max_len):
        """
        Expects a 2d tokenized array only
        Return: padded_sequence, max padding length
        sample:
        [
        [4, 3]
        [2, 1, 7]
        ]
        """
        self.max_len = max_len
        self.padded = []
        for i in arr:
            self.l = len(i)
            if self.l>self.max_len:
                self.max_len = self.l
            
        for x in arr:
            self.p = []
            self.l = len(x)
            if self.l < self.max_len:
                for _ in range(self.max_len - self.l):
                    x.append(0)
                self.padded.append(x)
            
        # store the max_len to later use it to pad input for inference
        return self.padded, self.max_len
    
    def _pad1d(self, arr, max_len):
        """
        expected 1D tokenized array:
        ['x', 'y']
        and max_len = the maximum padding length from the 2D padded array 
        """
        self.l = len(arr)
        if self.l < max_len:
            for _ in range(max_len - self.l):
                arr.append(0)
                    
        return arr
    
    
    def stem2d(self, arr):
        """
        expected arr:
        [
        ["x", "y"],
        ["z", "p", "e"]
        ...
        ]
        """
        self.stems = [[re.sub(r"""
        less|ship|ing|les|ly|es|ed|er
        """, '', word) for word in sentence] for sentence in arr]
        return self.stems
    
    
    def lblenc2d(self, arr, min_max_scaler=False):
        """
        arr: any 2D array expected, 
        min_max_scaler: is set to False, Change it to True if required
        
        Return: Encoded sequence, labels dict for decoding
        """
        self.labels = {}
        self.l = []
        self.sequenced = []
        for i in range(len(arr)):
            for r in arr[i]:
                self.l.append(r)
                
        self.l = set(self.l)
    
        if min_max_scaler:
            for idx, wr in enumerate(self.l):
                self.labels[wr] = idx/1000
                
        else:
            for idx, wr in enumerate(self.l):
                self.labels[wr] = idx
        
        for z in arr:
            wr_ch = []
            for wr in z:
                if wr in self.labels:
                    wr_ch.append(self.labels[wr])

            self.sequenced.append(wr_ch)
    
        print("Labels Dict:\n",self.labels)
        pickle.dump(self.labels, open('saved/labels_dict.p', 'wb'))
        
        return self.sequenced
    
#############################################################################    
    def encode_y_to_idx(self, y_1D, max_len=20, padding=False):
        self._y = []
        self.yd = {}
        
        for idx, el in enumerate(y_1D):
            self.yd[el] = idx
            
        for idx, el in enumerate(y_1D):
            self.lbl_pd = []
            self.lbl_pd.append(self.yd[el])
            for _ in range(max_len-1):
                self.lbl_pd.append(0)
                
            self._y.append(self.lbl_pd)
        self._y = np.array(self._y)
        print("Yshape: \n",self._y.shape)
        print("Ydict: \n",self.yd)
        print("Y: \n",self._y)
        return self._y
            
##############################################################################        
    def preprocess_wrap(self, x, y, min_max_scaler=False, max_len=20):
        """
        Return: x, y and max padding length
       
        to customize it in your code, use the following lines:
        x = module._tokenize2d(x)
        x = module.stem2d(x)
        x = module.lblenc2d(x, min_max_scaler=True/False)
        x, max_len = module._pad2d(self.x)
        """
        
        self.x = self.lblenc2d(self.stem2d(self._tokenize2d(x)), min_max_scaler=min_max_scaler)
        self.x, self.max_len = self._pad2d(self.x, max_len=max_len)
        
        self.y = self.encode_y_to_idx(y)
        
        return self.x, self.y, self.max_len
    
    
p = Preprocess()

x, y, max_len = p.preprocess_wrap(training_sentences, training_labels, min_max_scaler=False)
        
        

Labels Dict:
 {'today': 0, 'song': 1, 'some': 2, 'know': 3, 'thanks': 4, 'free': 5, 'humidity': 6, 'x': 7, 'jot': 8, 'day': 9, 'you': 10, 'lol': 11, 'it': 12, 'hop': 13, 'provide': 14, 'are': 15, 'abiliti': 16, 'how': 17, 'write': 18, 'y': 19, 'see': 20, 'awome': 21, 'amaz': 22, 'sdf': 23, 'there': 24, 'work': 25, 'hehe': 26, 'happy': 27, 'was': 28, 'hey': 29, 'tell': 30, 'z': 31, 'later': 32, 'epic': 33, 'latt': 34, 'jazz': 35, 'should': 36, 'whats': 37, 'like': 38, 'glad': 39, 'do': 40, 'me': 41, 'hi': 42, 'joke': 43, 'who': 44, 'the': 45, 'xyz': 46, 'name': 47, 'good': 48, 'crack': 49, 'bye': 50, 'hello': 51, 'super': 52, 'wake': 53, 'this': 54, 'up': 55, 'from': 56, 'did': 57, 'am': 58, 'your': 59, 'a': 60, 'thank': 61, 'haha': 62, 'give': 63, 'call': 64, 'temperature': 65, 'talk': 66, 'howdy': 67, 'music': 68, 'humid': 69, 'time': 70, 'youtube': 71, 'thats': 72, 'when': 73, 'on': 74, 'whom': 75, 'i': 76, 'news': 77, 'what': 78, 'lmao': 79, 'where': 80, 'recent': 81, 'cool': 82, 'r

In [128]:
y[67]

array([72,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0])

In [147]:
class Activations:
    def __init__(self):
        self.e = 2.71828
        
    def cosh(self, x):
        self.z = (self.e**x + self.e**-x)/2
        return self.z
    
    def sech(self, x):
        return 1/self.cosh(x)
        
    def gelu(self, x):
        return np.tanh(x)
        """
        self.l = []
        for ar in x:
            self.n_s = []
            for el in ar:
                self.n_s.append(0.5*el*(1 + math.tanh(math.sqrt(2/math.pi)*(el + 0.044715*el**3))))
            self.l.append(self.n_s)
        return np.array(self.l)"""

    def gelu_derivative(self, x):
        return 1-np.tanh(x)**2
        """
        self.l = []
        for ar in x:
            self.n_s = []
            for el in ar:
                self.n_s.append(0.5*math.tanh(0.0356774*el**3 + 0.797885*el) 
                           + (0.0535161*el**3 + 0.398942*el) 
                *self.sech(0.0356774*el**3 + 0.797885*el)**2 + 0.5)
            
            self.l.append(self.n_s)
        return np.array(self.l)"""
    
    def softmax(self, x):
        return np.exp(x)/sum(np.exp(x))

    
    def one_hot(self, Y):
        """
        this method was inspired from Samson Zhang's YT video, titled:
        "Building a neural network FROM SCRATCH (no Tensorflow/Pytorch, just numpy & math)"
        link: https://youtu.be/w8yWXqWQYmU?t=1157
        
        """
        self.Y = np.array(Y)
        print(self.Y.shape)
        self.one_hot_Y = np.zeros((self.Y.size, self.Y.max()+1))
        self.one_hot_Y[np.arange(self.Y.size), self.Y] = 1
        print(self.one_hot_Y.shape)
        return np.squeeze(np.asarray(self.one_hot_Y))
    
    

In [158]:
class NN(Activations):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.w1 = np.random.rand(output_size, input_size)
        self.b1 = np.random.rand(output_size, 1)
        self.w2 = np.random.rand(output_size, output_size)
        self.b2 = np.random.rand(output_size, 1)
        
        
    def forward(self, w1, b1, w2, b2, _input):
        self.input = _input
        self.z0 = np.dot(w1, self.input) + b1
        self.a0 = self.gelu(self.z0)
        self.z1 = np.dot(w2, self.a0) + b2
        self.a1 = self.softmax(self.z1)
        
        return self.z0, self.a0, self.z1, self.a1

    def back_prop(self, z0, a0, z1, a1, w1, w2, x, y):
        #self.one_hot_y = self.one_hot(y)
        #print(self.one_hot_y.shape)
        self.m, self.n = y.shape
        self.dv_z1 = a0 - y # work left = edit params with self and finish
        self.dv_w2 = 1 / self.m * self.dv_z1.dot(a0.T)
        self.dv_b2 = 1 / self.m * np.sum(self.dv_z1)
        self.dv_z0 = w2.T.dot(self.dv_z1) * self.gelu_derivative(z0)
        self.dv_w1 = 1 / self.m * self.dv_z0.dot(x.T)
        self.dv_b1 = 1 / self.m * np.sum(self.dv_z0)
        return self.dv_w1, self.dv_b1, self.dv_w2, self.dv_b2
    
    def update_params(self, w1, b1, w2, b2, dv_w1, dv_b1, dv_w2, dv_b2, a):
        self.w1 = self.w1 - a * dv_w1
        self.b1 = self.b1 - a * dv_b1    
        self.w2 = self.w2 - a * dv_w2  
        self.b2 = self.b2 - a * dv_b2    
        return self.w1, self.b1, self.w2, self.b2
    
    def get_accuracy(self, y_hat, y):
        return np.sum(y_hat == y) / y.size

    def gradient_descent(self, x, y, alpha, epochs):
        for i in range(epochs):
            self.z0, self.a0, self.z1, self.a1 = self.forward(self.w1, self.b1, self.w2, self.b2, x)
            #print(self.z0.shape, self.a0.shape, self.z1.shape, self.a1.shape)
            self.dv_w1, self.dv_b1, self.dv_w2, self.dv_b2 = self.back_prop(self.z0,
                                                                           self.a0, 
                                                                           self.z1, 
                                                                           self.a1, 
                                                                           self.w1, self.w2, x, y)
            #print(self.dv_w1.shape, self.dv_b1.shape, self.dv_w2.shape, self.dv_b2.shape)
            #print(alpha)
            self.w1, self.b1, self.w2, self.b2 = self.update_params(self.w1, self.b1, 
                                                                    self.w2, self.b2, 
                                                                    self.dv_w1, self.dv_b1,
                                                                    self.dv_w2, self.dv_b2, 0.3)
            if i % 10 == 0:
                print("Iteration: ", i)
                self.y_hat = self.get_y_hat(self.a1)
                print(self.y_hat, y)
                print(self.get_accuracy(self.y_hat, y))
        
        return self.w1, self.b1, self.w2, self.b2

    def get_y_hat(self, a1):
        #print(a1)
        return np.argmax(a1, 0)
    
    def predict(self, x, w1, b1, w2, b2):
        _, _, _, self.a1 = self.forward(w1, b1, w2, b2, x)
        self.y_pred = self.get_y_hat(self.a1)
        return self.y_pred
    
model = NN(len(training_sentences), len(training_labels))
W1, b1, W2, b2 = model.gradient_descent(np.array(x), y, alpha=0.3, epochs=500)



Iteration:  0
[37 37 37 37 37 37 37 37 32 32 32 32 32 32 32 32 32 32 32 32] [[  5   0   0 ...   0   0   0]
 [  5   0   0 ...   0   0   0]
 [  5   0   0 ...   0   0   0]
 ...
 [104   0   0 ...   0   0   0]
 [104   0   0 ...   0   0   0]
 [104   0   0 ...   0   0   0]]
0.0
Iteration:  10
[103 103 103 103 103 103 103   2   0   0   0   0   0   0   0   0   0   0
   0   0] [[  5   0   0 ...   0   0   0]
 [  5   0   0 ...   0   0   0]
 [  5   0   0 ...   0   0   0]
 ...
 [104   0   0 ...   0   0   0]
 [104   0   0 ...   0   0   0]
 [104   0   0 ...   0   0   0]]
0.6
Iteration:  20
[ 64  64  64  64  64  83 103   2   0   0   0   0   0   0   0   0   0   0
   0   0] [[  5   0   0 ...   0   0   0]
 [  5   0   0 ...   0   0   0]
 [  5   0   0 ...   0   0   0]
 ...
 [104   0   0 ...   0   0   0]
 [104   0   0 ...   0   0   0]
 [104   0   0 ...   0   0   0]]
0.6
Iteration:  30
[27 27 27 27 27 29 34  2  0  0  0  0  0  0  0  0  0  0  0  0] [[  5   0   0 ...   0   0   0]
 [  5   0   0 ...   0   0   0]
 



 140
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [[  5   0   0 ...   0   0   0]
 [  5   0   0 ...   0   0   0]
 [  5   0   0 ...   0   0   0]
 ...
 [104   0   0 ...   0   0   0]
 [104   0   0 ...   0   0   0]
 [104   0   0 ...   0   0   0]]
0.95
Iteration:  150
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [[  5   0   0 ...   0   0   0]
 [  5   0   0 ...   0   0   0]
 [  5   0   0 ...   0   0   0]
 ...
 [104   0   0 ...   0   0   0]
 [104   0   0 ...   0   0   0]
 [104   0   0 ...   0   0   0]]
0.95
Iteration:  160
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [[  5   0   0 ...   0   0   0]
 [  5   0   0 ...   0   0   0]
 [  5   0   0 ...   0   0   0]
 ...
 [104   0   0 ...   0   0   0]
 [104   0   0 ...   0   0   0]
 [104   0   0 ...   0   0   0]]
0.95
Iteration:  170
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [[  5   0   0 ...   0   0   0]
 [  5   0   0 ...   0   0   0]
 [  5   0   0 ...   0   0   0]
 ...
 [104   0   0 ...   0   0   0]
 [104   0   0 ...   0   0   0]
 [104   0   0 ...   0   0   

 [104   0   0 ...   0   0   0]]
0.95


In [91]:
training_labels

['greeting',
 'greeting',
 'greeting',
 'greeting',
 'greeting',
 'greeting',
 'goodbye',
 'goodbye',
 'goodbye',
 'goodbye',
 'goodbye',
 'goodbye',
 'hru',
 'hru',
 'hru',
 'hru',
 'hru',
 'wrud',
 'name',
 'name',
 'name',
 'name',
 'name',
 'thank',
 'thank',
 'comment',
 'comment',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'random',
 'random',
 'random',
 'random',
 'random',
 'random',
 'random',
 'random',
 'info',
 'info',
 'info',
 'info',
 'info',
 'info',
 'info',
 'info',
 'info',
 'info',
 'info',
 'info',
 'info',
 'info',
 'memorize',
 'memorize',
 'news',
 'news',
 'news',
 'news',
 'news',
 'news',
 'jotnote',
 'jotnote',
 'jotnote',
 'jotnote',
 'jotnote',
 'jotnote',
 'jotnote',
 'jotnote',
 'jotnote',
 'youtube',
 'youtube',
 'youtube',
 'youtube',
 'youtube',
 'youtube',
 'lofi',
 'lofi',
 'lofi',
 'lofi',
 'lofi',
 'weather',
 'weather',
 'weather',
 'weather',
 'weather',
 'weather',
 'joke',
 'joke',
 'jok

In [9]:
x = np.random.rand(20, 1) -0.5
w = np.random.rand(20, 105) -0.5
b = np.random.rand(7, 1) -0.5

z = np.dot(np.squeeze(np.asarray(x)), w) + b

In [85]:
np.zeros((105, 20)).shape

(105, 20)

In [23]:
def gelu(x):
    l = []
    for ar in x:
        n_s = []
        for el in ar:
            n_s.append(0.5*el*(1 + math.tanh(math.sqrt(2/math.pi)*(el + 0.044715*el**3))))
        l.append(n_s)
    return l

np.array(gelu(w))

array([[-0.00135081, -0.0063563 , -0.05167706, ..., -0.03448022,
        -0.03351578, -0.01717697],
       [ 0.00652018, -0.01305005,  0.24821286, ...,  0.10523572,
         0.13576983,  0.0183976 ],
       [-0.04903455,  0.33451576, -0.01667384, ...,  0.06630084,
         0.02652709,  0.00278336],
       ...,
       [-0.05188661,  0.18294188,  0.10132879, ...,  0.24121708,
        -0.05144671,  0.03756235],
       [ 0.06480318,  0.31617693,  0.04467178, ...,  0.10143641,
         0.2718432 ,  0.10435436],
       [-0.01404002,  0.16085825,  0.33582808, ...,  0.06614964,
         0.27989019, -0.04757342]])

In [25]:
def cosh(x):
    e = 2.71828
    z = (e**x + e**-x)/2
    return z

def sech(x):
    return 1/cosh(x)

def gelu_derivative(x):
    l = []
    for ar in x:
        n_s = []
        for el in ar:
            n_s.append(0.5*math.tanh(0.0356774*el**3 + 0.797885*el) 
                       + (0.0535161*el**3 + 0.398942*el) 
            *sech(0.0356774*el**3 + 0.797885*el)**2 + 0.5)
            
        l.append(n_s)
    return l

np.array(gelu_derivative(w))

array([[0.49783976, 0.48975236, 0.40972354, ..., 0.44167093, 0.44340072,
        0.47180591],
       [0.51029806, 0.47872753, 0.79071765, ..., 0.64492585, 0.68013515,
        0.52853151],
       [0.41477555, 0.85933109, 0.47265498, ..., 0.59603811, 0.5406418 ,
        0.50442201],
       ...,
       [0.40932056, 0.72987368, 0.64023182, ..., 0.78459518, 0.4101661 ,
        0.55662981],
       [0.59405869, 0.84575944, 0.56666868, ..., 0.64036173, 0.81074471,
        0.64387083],
       [0.47707599, 0.70724711, 0.86028277, ..., 0.59583862, 0.81734187,
        0.41754572]])

In [11]:
np.place(w, w<=0 , 0.3*(np.exp(w)-1))
        
print(w)

    

[[-0.00270747 -0.01284422 -0.11363487 ... -0.07323608 -0.07105672
  -0.03535084]
 [ 0.01290743 -0.02666745  0.38249245 ...  0.1836982   0.22978135
   0.03577428]
 [-0.10722489  0.48702945 -0.03428539 ...  0.12095675  0.05098131
   0.00554222]
 ...
 [-0.11414656  0.29667748  0.17761813 ...  0.37361073 -0.11307297
   0.07109519]
 [ 0.11843987  0.46551939  0.08375329 ...  0.17778621  0.41200189
   0.18233048]
 [-0.02873894  0.26594575  0.48855575 ...  0.12070303  0.42188705
  -0.10371392]]


False