In [6]:
import random
import math
import numpy as np

def get_training_samples(batch_size):
    with open("train.csv") as file:
        text = file.read()
    textlines = text.strip().split("\n")
    random.shuffle(textlines)
    start = 0
    while start < len(textlines):
        labels = []
        targets = []
        inputs = []
        end = start + batch_size
        for textline in textlines[start:end]:
            cells = textline.split(",")
            labels.append(int(cells[0]))
            targets.append([float(c) for c in cells[1:11]])
            inputs.append([float(c) for c in cells[11:]])
        yield labels, targets, inputs
        start += batch_size

def get_test_samples():
    with open("test.csv", "r") as file:
        text = file.read()
    textlines = text.strip().split("\n")
    labels = []
    targets = []
    inputs = []
    for textline in textlines:
        cells = textline.split(",")
        value = int(cells[0])
        labels.append(int(cells[0]))
        targets.append([float(c) for c in cells[1:11]])
        inputs.append([float(c) for c in cells[11:]])
    return labels, targets, inputs

def plot_number(inputs):
    line = ""
    for p in inputs:
        line += ".░▒▓█"[round(p * 4)]
        if len(line) > 27:
            print(line)
            line = ""

In [36]:
def softmax(predictions):
    m = max(predictions)
    temp = [math.exp(p - m) for p in predictions]
    total = sum(temp)
    return [t / total for t in temp]

#def sigmoid(x):
#    return 1 / (1 + np.exp(-x))

def sigmoid(x):
    if x >= 0:
        z = np.exp(-x)
        return 1 / (1 + z)
    else:
        z = np.exp(x)
        return z / (1 + z)
    
#def log_loss(activations, targets):
#    losses = [-t * math.log(a) - (1 - t) * math.log(1 - a) for a, t in zip(activations, targets)]
#    return sum(losses)

def log_loss(activations, targets):
    # Clipping values to avoid math domain error
    clipped_activations = [max(1e-15, min(a, 1 - 1e-15)) for a in activations]
    losses = [-t * math.log(a) - (1 - t) * math.log(1 - a) for a, t in zip(clipped_activations, targets)]
    return sum(losses)

In [41]:
epochs = 3
batch_size = 300
learning_rate = 0.01
input_count, hidden_count, output_count = 784, 260, 784

w_i_h = [[random.random() - 0.5 for _ in range(input_count)] for _ in range(hidden_count)]
w_h_o = [[random.random() - 0.5 for _ in range(hidden_count)] for _ in range(output_count)]
b_i_h = [0 for _ in range(hidden_count)]
b_h_o = [0 for _ in range(output_count)]

In [42]:
for epoch in range(epochs):
    for labels, targets, inputs in get_training_samples(batch_size):
        targets = inputs.copy()
        prediction = []
        for inp in inputs:
            sums = []
            for weights, bias in zip(w_i_h, b_i_h):
                summation = 0  # Reset summation for each neuron
                for w, a in zip(weights, inp):
                    summation += w * a  # Calculate weighted sum for this neuron
                summation += bias  # Add the bias for this neuron
                sums.append(summation)  # Append the summation result for this neuron
            prediction.append(sums)

        activated = [[max(0, val) for val in pred] for pred in prediction]

        outcomes = []
        for value in activated:
            values = []
            for weights, bias in zip(w_h_o, b_h_o):
                summ = 0  # Reset summation for each neuron
                for w, a in zip(weights, value):
                    summ += w * a  # Calculate weighted sum for this neuron
                summ += bias  # Add the bias for this neuron
                values.append(summ)  # Append the summation result for this neuron
            outcomes.append(values)
            
        outputs = [[max(0, value) for value in outcome] for outcome in outcomes]
        
        Loss = sum([log_loss(a, t) for a, t in zip(outputs, targets)]) / len(outputs)
        
        print(Loss)
        
#        errors_d_o = [[(ac - ta) * (oc * (1 - oc)) for ac, ta, oc in zip(acs, tas, outcome)]
#                for acs, tas, outcome in zip(outputs, targets, outcomes)]

        errors_d_o = [[(ac - ta) * (0 if oc <= 0 else 1)  for ac, ta, oc in zip(acs, tas, outcome)]
                for acs, tas, outcome in zip(outputs, targets, outcomes)]
        
        w_h_o_T = list(zip(*w_h_o))
        errors_d_h = [[sum([d * w for d, w in zip(deltas, weights)]) * (0 if p <= 0 else 1)
            for weights, p in zip(w_h_o_T, pred)] for deltas, pred in zip(errors_d_o, prediction)]
        
        ######################################################
        ######################################################
        
        # Gradient hidden->output
        act_h_T = list(zip(*activated))
        errors_d_o_T = list(zip(*errors_d_o))
        w_h_o_d = [[sum([d * a for d, a in zip(deltas, act)]) for deltas in errors_d_o_T] for act in act_h_T]
        #print(w_h_o_d)
        b_h_o_d = [sum([d for d in deltas]) for deltas in errors_d_o_T]

        # Gradient input->hidden
        inputs_T = list(zip(*inputs))
        errors_d_h_T = list(zip(*errors_d_h))
        w_i_h_d = [[sum([d * a for d, a in zip(deltas, act)]) for deltas in errors_d_h_T] for act in inputs_T]
        b_i_h_d = [sum([d for d in deltas]) for deltas in errors_d_h_T]

        # Update weights and biases for all layers
        w_h_o_d_T = list(zip(*w_h_o_d))
        for y in range(output_count):
            for x in range(hidden_count):
                w_h_o[y][x] -= learning_rate * w_h_o_d_T[y][x] / len(inputs)
            b_h_o[y] -= learning_rate * b_h_o_d[y] / len(inputs)

        w_i_h_d_T = list(zip(*w_i_h_d))
        for y in range(hidden_count):
            for x in range(input_count):
                w_i_h[y][x] -= learning_rate * w_i_h_d_T[y][x] / len(inputs)
            b_i_h[y] -= learning_rate * b_i_h_d[y] / len(inputs)

9694.815861695506
3444.5341967283207
3171.2498244708954
3214.563191606346
3183.0413944862185
3194.019484853848
3270.0181070872063
3209.5654406126914
3132.2700323220265
3105.4493117360025
3104.2204026707554
3179.389156687253
3110.428478632383
3127.4932720402235
3187.2033286188002


KeyboardInterrupt: 

In [13]:
for epoch in range(epochs):
    for labels, targets, inputs in get_training_samples(batch_size):
        # In an autoencoder, the target is typically the input itself
        targets = inputs.copy()
        prediction = []
        for inp in inputs:
            sums = []
            for weights, bias in zip(w_i_h, b_i_h):
                summation = sum(w * a for w, a in zip(weights, inp)) + bias
                sums.append(summation)
            prediction.append(sums)

        activated = [[sigmoid(val) for val in pred] for pred in prediction]  # ReLU activation

        outcomes = []
        for value in activated:
            values = []
            for weights, bias in zip(w_h_o, b_h_o):
                summ = sum(w * a for w, a in zip(weights, value)) + bias
                values.append(summ)
            outcomes.append(values)

        outputs = [[sigmoid(val) for val in outcome] for outcome in outcomes]  # ReLU for output layer
        Loss = sum(log_loss(a, t) for a, t in zip(outputs, targets)) / len(outputs)
        print(Loss)

        # Calculate error for the output layer
        errors_d_o = [[(ac - ta) * (0 if p <= 0 else 1) for ac, ta, outcome in zip(ac, ta, output)]
                      for ac, ta, output in zip(outputs, targets, outcomes)]

        # Transpose weight matrix for hidden to output layer
        w_h_o_T = list(zip(*w_h_o))
        # Calculate error for the hidden layer
        errors_d_h = [[sum(d * w for d, w in zip(deltas, weights)) * ((0 if p <= 0 else 1))
                       for weights, p in zip(w_h_o_T, pred)] for deltas, pred in zip(errors_d_o, prediction)]

        # Calculate gradients for weights and biases
        act_h_T = list(zip(*activated))
        errors_d_o_T = list(zip(*errors_d_o))
        w_h_o_d = [[sum(d * a for d, a in zip(deltas, act)) for deltas in errors_d_o_T] for act in act_h_T]
        b_h_o_d = [sum(deltas) for deltas in errors_d_o_T]

        inputs_T = list(zip(*inputs))
        errors_d_h_T = list(zip(*errors_d_h))
        w_i_h_d = [[sum(d * a for d, a in zip(deltas, act)) for deltas in errors_d_h_T] for act in inputs_T]
        b_i_h_d = [sum(deltas) for deltas in errors_d_h_T]

        # Update weights and biases for all layers
        w_h_o_d_T = list(zip(*w_h_o_d))
        for y in range(output_count):
            for x in range(hidden_count):
                w_h_o[y][x] -= learning_rate * w_h_o_d_T[y][x] / len(inputs)
            b_h_o[y] -= learning_rate * b_h_o_d[y] / len(inputs)

        w_i_h_d_T = list(zip(*w_i_h_d))
        for y in range(hidden_count):
            for x in range(input_count):
                w_i_h[y][x] -= learning_rate * w_i_h_d_T[y][x] / len(inputs)
            b_i_h[y] -= learning_rate * b_i_h_d[y] / len(inputs)


  return 1 / (1 + np.exp(-x))


ValueError: math domain error

In [316]:
labels, targets, inputs = get_test_samples()
print(len(inputs))
pred_h = [[sum([w * a for w, a in zip(weights, inp)]) +
    bias for weights, bias in zip(w_i_h, b_i_h)] for inp in inputs]
act_h = [[max(0, p) for p in pred] for pred in pred_h]
pred_o = [[sum([w * a for w, a in zip(weights, act)]) +
    bias for weights, bias in zip(w_h_o, b_h_o)] for act in act_h]
act_o = [[max(0, p) for p in pred] for pred in pred_o]


for v, i in zip(inputs[:10], act_o[:10]):
    #print(v)
    plot_number(v)
    plot_number(i)

1000
............................
............................
............................
............................
............................
............................
............................
......░▓▒▒░░................
......▓█████▓▓▓▓▓▓▓▓▓░......
......░▒░▒▓██████████▒......
............░.░░░░.██▒......
..................░█▓.......
..................██░.......
.................▒██░.......
................░██░........
................▒█▓.........
................▓█░.........
...............▒█▓..........
..............░██░..........
..............▓█▓...........
.............▓█▓░...........
............░██░............
............██▒.............
...........▒██░.............
..........░███░.............
..........▒██▓░.............
..........▒█▓...............
............................


IndexError: string index out of range

In [None]:
for val, image in enumerate((inputs)):
    plt.subplot(3, 20, i + 1)
    plt.imshow(input.reshape(28,28), cmap="gray")
    plt.axis("off")
    plt.grid(b=False)
plt.title('Original')    

for i in range(20):
    plt.subplot(3, 20, i + 1 + 40)
    plt.imshow(decodtrain[i].reshape(28,28), cmap="gray")
    plt.axis("off")
    plt.grid(b=False)
  
    
plt.show()

In [None]:
for epoch in range(epochs):
    for labels, targets, inputs in get_training_samples(batch_size):       
        pred_h = [[sum([w * a for w, a in zip(weights, inp)]) +
            bias for weights, bias in zip(w_i_h, b_i_h)] for inp in inputs]
        print((pred_h))
        act_h = [[max(0, p) for p in pred] for pred in pred_h]
        pred_o = [[sum([w * a for w, a in zip(weights, inp)]) +
            bias for weights, bias in zip(w_h_o, b_h_o)] for inp in act_h]
        act_o = [softmax(predictions) for predictions in pred_o]

        cost = sum([log_loss(a, t) for a, t in zip(act_o, targets)]) / len(targets)
        #print(f"epoch:{epoch} cost:{cost:.4f}")

        # Error derivatives
        errors_d_o = [[a - t for a, t in zip(ac, ta)] for ac, ta in zip(act_o, targets)]
        #print(errors_d_o)
        w_h_o_T = list(zip(*w_h_o))
        #errors_d_h = [[print(sum([d * w for d, w in zip(deltas, weights)])) #* (0 if p <= 0 else 1)
        #    for weights, p in zip(w_h_o_T, pred)] for deltas, pred in zip(errors_d_o, pred_h)]
        errors_d_h = [[sum([d * w for d, w in zip(deltas, weights)]) * (0 if p <= 0 else 1)
            for weights, p in zip(w_h_o_T, pred)] for deltas, pred in zip(errors_d_o, pred_h)]
        print(len(w_h_o_T))
        print('---------------------------')
        print(len(errors_d_o))

        # Gradient hidden->output
        act_h_T = list(zip(*act_h))
        errors_d_o_T = list(zip(*errors_d_o))
        w_h_o_d = [[sum([d * a for d, a in zip(deltas, act)]) for deltas in errors_d_o_T] for act in act_h_T]
        #print(w_h_o_d)
        b_h_o_d = [sum([d for d in deltas]) for deltas in errors_d_o_T]

        # Gradient input->hidden
        inputs_T = list(zip(*inputs))
        errors_d_h_T = list(zip(*errors_d_h))
        w_i_h_d = [[sum([d * a for d, a in zip(deltas, act)]) for deltas in errors_d_h_T]
            for act in inputs_T]
        b_i_h_d = [sum([d for d in deltas]) for deltas in errors_d_h_T]

        # Update weights and biases for all layers
        w_h_o_d_T = list(zip(*w_h_o_d))
        for y in range(output_count):
            for x in range(hidden_count):
                w_h_o[y][x] -= learning_rate * w_h_o_d_T[y][x] / len(inputs)
            b_h_o[y] -= learning_rate * b_h_o_d[y] / len(inputs)

        w_i_h_d_T = list(zip(*w_i_h_d))
        for y in range(hidden_count):
            for x in range(input_count):
                w_i_h[y][x] -= learning_rate * w_i_h_d_T[y][x] / len(inputs)
            b_i_h[y] -= learning_rate * b_i_h_d[y] / len(inputs)


In [282]:
labels, targets, inputs = get_test_samples()
pred_h = [[sum([w * a for w, a in zip(weights, inp)]) +
    bias for weights, bias in zip(w_i_h, b_i_h)] for inp in inputs]
act_h = [[max(0, p) for p in pred] for pred in pred_h]
pred_o = [[sum([w * a for w, a in zip(weights, act)]) +
    bias for weights, bias in zip(w_h_o, b_h_o)] for act in act_h]
act_o = [softmax(predictions) for predictions in pred_o]

for a, t, i in zip(act_o, targets, inputs):
    ma_neuron = a.index(max(a))
    ma_target = t.index(max(t))
    if ma_neuron == ma_target:
        correct += 1
    else:
        print(f"digit:{ma_target}, guessed:{ma_neuron}")
        reader.plot_number(i)
print(f"Correct: {correct}/{len(inputs)} ({correct / len(inputs):%})")

AttributeError: module 'mnistreader' has no attribute 'get_test_samples'