In [2]:
import os       # os.path.exists
import math     # math.log, math.exp
import random   # random.seed, random.choices, random.gauss, random.shuffle
random.seed(42) # Let there be order among chaos

In [3]:
filename = 'The_Creative_Act.txt'

if not os.path.exists(filename):
    raise FileNotFoundError(f"Required file not found: {filename}")

with open(filename, 'r', encoding='utf-8') as f:
    docs = [line.strip() for line in f if line.strip()] #docs is becomes a list
    
if not docs:
    raise ValueError(f"No valid documents found in {filename}")

print(f"num docs: {len(docs)}")
print(docs[0:5])

num docs: 19
['THE CREATIVE ACT', 'by Marcel Duchamp', 'Let us consider two important factors, the two poles of the creation of art: the artist on the one hand, and on the other the spectator who later becomes the posterity.', 'To all appearances, the artist acts like a mediumistic being who, from the labyrinth beyond time and space, seeks his way out to a clearing. If we give the attributes of a medium to the artist, we must then deny him the state of consciousness on the esthetic plane about what he is doing or why he is doing it. All his decisions in the artistic execution of the work rest with pure intuition and cannot be translated into a self-analysis, spoken or written, or even thought out.', 'T.S. Eliot, in his essay on "Tradition and Individual Talent", writes: "The more perfect the artist, the more completely separate in him will be the man who suffers and the mind which creates; the more perfectly will the mind digest and transmute the passions which are its material."']


In [7]:
# Let there be a Tokenizer to translate strings to discrete symbols and back
uchars = sorted(set(''.join(docs))) # unique characters in the dataset become token ids 0..n-1
#BOS = len(uchars) # token id for the special Beginning of Sequence (BOS) token
vocab_size = len(uchars) + 1 # total number of unique tokens, +1 is for BOS
print(uchars)
print(f"vocab size: {vocab_size}") #unique characters sorted alphabetically


[' ', '"', "'", ',', '-', '.', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'H', 'I', 'L', 'M', 'R', 'S', 'T', 'V', 'W', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'à', 'é', '–']
vocab size: 53


In [8]:
class Value:
    __slots__ = ('data', 'grad', '_children', '_local_grads') # Python optimization for memory usage

    def __init__(self, data, children=(), local_grads=()):
        self.data = data                # scalar value of this node calculated during forward pass
        self.grad = 0                   # derivative of the loss w.r.t. this node, calculated in backward pass
        self._children = children       # children of this node in the computation graph
        self._local_grads = local_grads # local derivative of this node w.r.t. its children

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        return Value(self.data + other.data, (self, other), (1, 1))

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        return Value(self.data * other.data, (self, other), (other.data, self.data))

    def __pow__(self, other): return Value(self.data**other, (self,), (other * self.data**(other-1),))
    def log(self): return Value(math.log(self.data), (self,), (1/self.data,))
    def exp(self): return Value(math.exp(self.data), (self,), (math.exp(self.data),))
    def relu(self): return Value(max(0, self.data), (self,), (float(self.data > 0),))
    def __neg__(self): return self * -1
    def __radd__(self, other): return self + other
    def __sub__(self, other): return self + (-other)
    def __rsub__(self, other): return other + (-self)
    def __rmul__(self, other): return self * other
    def __truediv__(self, other): return self * other**-1
    def __rtruediv__(self, other): return other * self**-1

    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._children:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        self.grad = 1
        for v in reversed(topo):
            for child, local_grad in zip(v._children, v._local_grads):
                child.grad += local_grad * v.grad