In [1]:
import pandas as pd
from PIL import Image
import numpy as np
        
def convert(code):
    if (code >= 1 and code <= 11): return (code - 1)
    elif (code == 12): return 100
    elif (code == 13): return 1000
    elif (code == 14): return 10000
    elif (code == 15): return 100000000
    else: return -1

class image:
    def __init__(self,suite, sample, code):
        self.suite = suite
        self.sample = sample
        self.code = code
        self.value = convert(code)
        img_name = "chinese_data/input_" + str(suite) + "_" + str(sample) + "_" + str(code) + ".jpg"
        infile = Image.open(img_name)
        temp = np.array(infile)
        self.pixels = [col for row in temp for col in row] 




In [2]:
from scipy.stats import norm
#inverse cdf function
def inverse_normal_cdf(num):
    return norm.ppf(norm.cdf(num))

#necessary tensor functions
def is_1d(tensor):
    return not isinstance(tensor[0], list)

def tensor_sum(tensor):
    if is_1d(tensor):
        return sum(tensor)
    else:
        return sum(tensor_sum(tensor_i)      # Call tensor_sum on each row
                   for tensor_i in tensor)   # and sum up those results.
    
def tensor_combine(f, t1, t2):
    if is_1d(t1):
        return [f(x, y) for x, y in zip(t1, t2)]
    else:
        return [tensor_combine(f, t1_i, t2_i)
                for t1_i, t2_i in zip(t1, t2)]
    
def true_encode(tag):
    return [1.0 if tag - 1 == i else 0.0 for i in range(15)]

def dot(l1, l2):
    sum = 0.0
    for x,y in zip(l1,l2):
        sum += x * y
    return sum

def tensor_apply(f, tensor):
    if is_1d(tensor):
        return [f(x) for x in tensor]
    else:
        return [tensor_apply(f, t) for t in tensor]
    
def zeros_like(tensor):
    return tensor_apply(lambda _: 0.0, tensor)


In [3]:
def findMaxIndex(tensor):
    max = tensor[0]
    maxIndex = 0
    for i in range(len(tensor)):
        if tensor[i] > max:
            max = tensor[i]
            maxIndex = i
    return maxIndex

In [4]:
import math
# optimizer class

class GradientDescent:
    def __init__(self, learning_rate: float = 0.1):
        self.rate = learning_rate
        
    # step for each layer
    def step(self, layer):
        for param, grad in zip(layer.params(), layer.grads()):
            # Update param using a gradient step
            param[:] = tensor_combine(
                lambda param, grad: param - grad * self.rate, param, grad)
            

class Momentum():
    def __init__(self,
                 learning_rate,
                 momentum: float = 0.9):
        self.lr = learning_rate
        self.mo = momentum
        self.updates = []  # running average

    def step(self, layer):
        # If we have no previous updates, start with all zeros
        if not self.updates:
            self.updates = [zeros_like(grad) for grad in layer.grads()]

        for update, param, grad in zip(self.updates,
                                       layer.params(),
                                       layer.grads()):
            # Apply momentum
            update[:] = tensor_combine(
                lambda u, g: self.mo * u + (1 - self.mo) * g,
                update,
                grad)

            # Then take a gradient step
            param[:] = tensor_combine(
                lambda p, u: p - self.lr * u,
                param,
                update)

   

In [5]:
# loss class

class SSE:
    def loss(self, predict, real):
        squared_errors = tensor_combine(
            lambda predicted, actual: (predicted - actual) ** 2,
            predict, real)
        return tensor_sum(squared_errors)
    def gradient(self, predict, real):
        return tensor_combine(lambda predicted, actual:  2 * (predicted - actual),
            predict, real)
    

    
def softmax(tensor):
    if is_1d(tensor):
        # Subtract largest value for numerical stability.
        largest = max(tensor)
        exps = [math.exp(x - largest) for x in tensor]

        sum_of_exps = sum(exps)                 # This is the total "weight."
        return [exp_i / sum_of_exps             # Probability is the fraction
                for exp_i in exps]              # of the total weight.
    else:
        return [softmax(tensor_i) for tensor_i in tensor]    
class SoftmaxCrossEntropy():

    def loss(self, predicted, actual):
        # Apply softmax to get probabilities
        probabilities = softmax(predicted)

        likelihoods = tensor_combine(lambda p, act: math.log(p + 1e-30) * act,
                                     probabilities,
                                     actual)

        # And then we just sum up the negatives.
        return -tensor_sum(likelihoods)

    def gradient(self, predicted, actual):
        probabilities = softmax(predicted)

        return tensor_combine(lambda p, actual: p - actual,probabilities,actual)    


In [6]:
import random
# MNIST class
class mnist:
    def __init__(self, partition):
        self.img_array = []
        #read in all images
        for i in range(1, 31):
            for j in range (1, 11):
                for k in range (1, 16):
                    self.img_array.append(image(i,j,k))
        random.shuffle(self.img_array)
        cut = int(len(self.img_array) * partition)
        # partition the data into train and test
        self.train_tag = [self.img_array[i].code for i in range(cut)]
        train_img = [self.img_array[i].pixels for i in range(cut)]
        self.test_tag = [self.img_array[i].code for i in range(cut, len(self.img_array))]
        test_img = [self.img_array[i].pixels for i in range(cut, len(self.img_array))]
        # flatten the pixels in train and test
        train_avg = tensor_sum(train_img)/ len(train_img) / 64/ 64
        test_avg = tensor_sum(test_img)/ len(test_img) / 64/ 64
        self.train_img = [[(pixel - train_avg)/256 for pixel in image] for image in train_img]
        self.test_img = [[(pixel - test_avg)/256 for pixel in image] for image in test_img]
        

In [16]:
# Dropout class
class Dropout(Layer):
    def __init__(self, p):
        self.p = p
        self.train = True

    def forward(self, input):
        if self.train:
            # Create a mask of 0s and 1s shaped like the input
            # using the specified probability.
            self.mask = tensor_apply(
                lambda _: 0 if random.random() < self.p else 1,
                input)
            # Multiply by the mask to dropout inputs.
            return tensor_combine(operator.mul, input, self.mask)
        else:
            # During evaluation just scale down the outputs uniformly.
            return tensor_apply(lambda x: x * (1 - self.p), input)

    def backward(self, gradient):
        if self.train:
            # Only propagate the gradients where mask == 1.
            return tensor_combine(operator.mul, gradient, self.mask)
        else:
            raise RuntimeError("don't call backward when not in train mode")

In [15]:
import random
import math
import operator
# Tensor class

def random_uniform(dims):
    if len(dims) == 1:
        return [random.random() for _ in range(dims[0])]
    else:
        return [random_uniform(dims[1:]) for _ in range(dims[0])]

def random_normal(dims, mean: float = 0.0, variance: float = 1.0):
    if len(dims) == 1:
        return [mean + variance * inverse_normal_cdf(random.random())
                for _ in range(dims[0])]
    else:
        return [random_normal(dims[1:], mean, variance)for _ in range(dims[0])]
    
def random_tensor(dims, init: str ="normal"):
    if init == 'normal':
        return random_normal(dims)
    elif init == 'uniform':
        return random_uniform(dims)
    elif init == 'xavier':
        variance = len(dims) / sum(dims)
        return random_normal(dims, variance=variance)
    else:
        raise ValueError(f"unknown init: {init}")
        
# tensor layers setup

# base class
class Layer():
    def forward(self, input):
        return []
    def backward(self, gradient):
        return []
    def params(self): return []
    def grads(self): return []
    

# Sigmoid class
def sigmoid(num):
    return 1 / (1 + math.exp(num * -1))

class Sigmoid(Layer):
    # help bring the result into the range between 0 and 1
    def forward(self, input):
        self.sigmoids = tensor_apply(sigmoid, input)
        return self.sigmoids

    def backward(self, gradient):
        return tensor_combine(lambda sig, grad: sig * (1 - sig) * grad,
                              self.sigmoids,gradient)

# Tanh class
def tanh(x):
    # If x is very large or very small, tanh is (essentially) 1 or -1.
    # We check for this because, e.g., math.exp(1000) raises an error.
    if x < -100:  return -1
    elif x > 100: return 1

    em2x = math.exp(-2 * x)
    return (1 - em2x) / (1 + em2x)

class Tanh(Layer):
    def forward(self, input):
        # Save tanh output to use in backward pass.
        self.tanh = tensor_apply(tanh, input)
        return self.tanh

    def backward(self, gradient):
        return tensor_combine( lambda tanh, grad: (1 - tanh ** 2) * grad,
            self.tanh,
            gradient)
    
    
#Linear Class
class Linear(Layer):
    def __init__(self, input_dim, output_dim, init: str = 'xavier'):
        self.input_dim = input_dim
        self.output_dim = output_dim
        # weights for the each neuron
        # output dimension is the number of neurons, and input dimension is the dimension of each neuron
        self.weight = random_tensor([output_dim, input_dim], init)
        # bias term for each neuron
        self.bias = random_tensor([output_dim, 1], init)
        
    def forward(self, input):
        self.input = input
        # go through the layer
        return [dot(input, self.weight[neuron]) + self.bias[neuron] for neuron in range(self.output_dim)]
        
    def backward(self, gradient):
        self.bias_grad = [[element] for element in gradient]
        # the gradient passed into the argument has a dimension of output_dim
        # we want a weight gradient with a dimension of output_dim by input_dim
        self.weight_grad = [[self.input[i] * gradient[neuron] for i in range(self.input_dim)]
                       for neuron in range(self.output_dim)]

        # In the forward part, every input i was multiplied with weight[o][i] for each neuron o
        # So we reverse that step here to get the backward result
        return [sum(self.weight[o][i] * gradient[o] for o in range(self.output_dim))
                for i in range(self.input_dim)]
        
    def params(self):
        return [self.weight, self.bias]
    def grads(self):
        return [self.weight_grad, self.bias_grad]

    
# The entire model
class Model(Layer):
    # the key model of our neural network to connect each layer together
    # similarly, a derived class of layer in order to incorporate all kinds of potential layers
    def __init__(self, layers):
        self.layers = layers
    def forward(self, input):
        # just go through all the layers
        for layer in self.layers:
            input = layer.forward(input)
        return input
    
    def backward(self, gradient):
        for i in range(len(self.layers)):
            gradient = self.layers[len(self.layers) - i - 1].backward(gradient)
        return gradient
    def params(self): 
        return [param for layer in self.layers for param in layer.params()]
    def grads(self): 
        return [grad for layer in self.layers for grad in layer.grads()]
        


In [18]:
net = Model([Linear(4096, 64), Dropout(0.1), Tanh(), Linear(64, 15)])
data = mnist(0.8)
train_true_encode = [true_encode(tag) for tag in data.train_tag]
test_true_encode = [true_encode(tag) for tag in data.test_tag]
print("Hello")

loss = SoftmaxCrossEntropy()

Hello


In [22]:
import tqdm
optimizer = Momentum(0.1, 0.9)
total_loss = 0
num_correct = 0.0
with tqdm.trange(len(data.train_img)) as t:
    for i in t:
        prediction = net.forward(data.train_img[i])
        if (findMaxIndex(prediction) == findMaxIndex(train_true_encode[i])):
            num_correct += 1
        total_loss += loss.loss(prediction, train_true_encode[i])
        gradient = loss.gradient(prediction, train_true_encode[i])
        net.backward(gradient)
        optimizer.step(net)
        avg_loss = total_loss / (i + 1)
        acc = num_correct / (i + 1)
        t.set_description(f" acc: {acc}, mnist loss: {avg_loss}")



 acc: 0.47888888888888886, mnist loss: 1.7414599108366808: 100%|█| 3600/3600 [1


In [18]:
print (len(net.params()[0][0]))

4096


In [23]:
import tqdm
total_loss = 0.0
num_correct = 0.0
with tqdm.trange(len(data.test_img)) as t:
    for i in t:
        prediction = net.forward(data.test_img[i])
        if (findMaxIndex(prediction) == findMaxIndex(test_true_encode[i])):
            num_correct += 1
        total_loss += loss.loss(prediction, test_true_encode[i])
        avg_loss = total_loss / (i + 1)
        acc = num_correct / (i + 1)
        t.set_description(f" acc: {acc}, mnist loss: {avg_loss}")

 acc: 0.4022222222222222, mnist loss: 2.2284028884611105: 100%|█| 900/900 [02:2


In [None]:

prediction = net.forward(data.test_img[0])
print (prediction)
print(test_true_encode[0])
print(loss.loss(prediction, test_true_encode[0]))

In [21]:
f = open("temp_chinese.txt", "w")
for i in range(15):
    for j in range(4096):
        f.write(f"{net.params()[0][i][j]} ")
    f.write("\n")
    
f.write("\n")

for i in range(15):
        f.write(f"{net.params()[1][i][0]} ")
        f.write("\n")
f.close()