In [167]:
# Dataset: source code for 'requests' python libraray
text = open("data/requests.txt").read()

In [177]:
import numpy as np

# sorted list of all unique characters in the text
chars = sorted(list(set(text)))
vocab_size = len(chars)

# string-to-integer mapping
stoi = { char:i for i,char in enumerate(chars) }

# integer-to-string mapping
itos = { i:char for i,char in enumerate(chars) }

# lookup functions for the mappings
encode = lambda s: [stoi[c] for c in s]
decode = lambda l: ''.join([itos[i] for i in l])

# encode the entire text file and convert to a numpy array
data = np.array(encode(text))

In [178]:
import numpy as np

# Each character has weights of a 32 long vector, defined by n_embed (embedding dimension)
n_embd = 32

# Initialize embedding & unembedding matrix
embedding_matrix = np.random.randn(vocab_size, n_embd)
unembedding_matrix = np.random.randn(n_embd, vocab_size)



In [179]:
# Standard expansion factor of four
ffwd_expansion_factor = 4

# Initialize hidden layer and output layer
# Use Kaiming init to intelligently scale the layer's random weights
W1 = np.random.randn(n_embd, n_embd * ffwd_expansion_factor) * np.sqrt(2.0 / n_embd)
W2 = np.random.randn(n_embd * ffwd_expansion_factor, n_embd) * np.sqrt(2.0 / n_embd)
 

In [180]:
class Model:
    def __init__(self,embedding_matrix, unembedding_matrix, W1, W2):
        self.embedding_matrix = embedding_matrix
        self.unembedding_matrix = unembedding_matrix
        self.W1 = W1
        self.W2 = W2

    def forward(self, x): 

        embd_x = self.embedding_matrix[x]

        # Embedded data passes through the hidden layer of the FFN
        hidden = embd_x @ self.W1

        hidden_activated = np.maximum(0, hidden)

        proccessed_vector = hidden_activated @ self.W2 

        logits = proccessed_vector @ self.unembedding_matrix
        
        return logits

    def pred (self, x):

        logits = self.forward(x)

        ## Apply softmax function to logits
        stable_logits = logits - np.max(logits) # This ensures the largest logit is 0
        raw_preds = np.exp(stable_logits) / np.sum(np.exp(stable_logits))        
        preds = {}

        for idx, raw_pred in enumerate(raw_preds):

            preds[itos[idx]] = raw_pred
        
        return preds

        



In [181]:
model = Model(embedding_matrix, unembedding_matrix, W1, W2)
# Get next character predictions for 'd'
preds = model.pred(stoi['d'])

  hidden = embd_x @ self.W1
  hidden = embd_x @ self.W1
  hidden = embd_x @ self.W1
  proccessed_vector = hidden_activated @ self.W2
  proccessed_vector = hidden_activated @ self.W2
  proccessed_vector = hidden_activated @ self.W2
  logits = proccessed_vector @ self.unembedding_matrix
  logits = proccessed_vector @ self.unembedding_matrix
  logits = proccessed_vector @ self.unembedding_matrix


In [206]:
# Check if the list actually contains numbers
if encoded_list and isinstance(encoded_list[0], int):
    print(encoded_list)
    print("   - SUCCESS: The encoded list correctly contains integers.")
    print(f"   - First 20 encoded integers are: {encoded_list[:20]}")

if encoded_list:
    data = np.array(encoded_list)
    print("Step 4: Converted list to NumPy array successfully.")
    print(f"   - The final 'data' variable's dtype is: {data.dtype}")
    print(f"   - The final 'data' variable's shape is: {data.shape}")
    if 'int' in str(data.dtype):
         print("   - SUCCESS: The final NumPy array has an integer dtype.")
    else:
        print("   - ❗❗❗ ERROR: The final NumPy array does NOT have an integer dtype.")



[74, 78, 81, 80, 83, 85, 1, 68, 80, 81, 90, 0, 74, 78, 81, 80, 83, 85, 1, 71, 74, 77, 70, 68, 78, 81, 0, 74, 78, 81, 80, 83, 85, 1, 80, 84, 0, 74, 78, 81, 80, 83, 85, 1, 85, 66, 83, 71, 74, 77, 70, 0, 74, 78, 81, 80, 83, 85, 1, 91, 74, 81, 71, 74, 77, 70, 0, 71, 83, 80, 78, 1, 68, 80, 77, 77, 70, 68, 85, 74, 80, 79, 84, 1, 74, 78, 81, 80, 83, 85, 1, 69, 70, 82, 86, 70, 0, 71, 83, 80, 78, 1, 74, 80, 1, 74, 78, 81, 80, 83, 85, 1, 35, 90, 85, 70, 84, 42, 48, 0, 71, 83, 80, 78, 1, 86, 79, 74, 85, 85, 70, 84, 85, 1, 74, 78, 81, 80, 83, 85, 1, 78, 80, 68, 76, 0, 0, 74, 78, 81, 80, 83, 85, 1, 81, 90, 85, 70, 84, 85, 0, 0, 71, 83, 80, 78, 1, 83, 70, 82, 86, 70, 84, 85, 84, 1, 74, 78, 81, 80, 83, 85, 1, 68, 80, 78, 81, 66, 85, 0, 71, 83, 80, 78, 1, 83, 70, 82, 86, 70, 84, 85, 84, 15, 64, 74, 79, 85, 70, 83, 79, 66, 77, 64, 86, 85, 74, 77, 84, 1, 74, 78, 81, 80, 83, 85, 1, 86, 79, 74, 68, 80, 69, 70, 64, 74, 84, 64, 66, 84, 68, 74, 74, 0, 71, 83, 80, 78, 1, 83, 70, 82, 86, 70, 84, 85, 84, 15, 68

[[[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 1. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 1. 0. ... 0. 0. 0.]
  [0. 1. 0. ... 0. 0. 0.]
  [0. 1. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]]


In [202]:
# Training Hyperparameters
max_iters = 5000
learning_rate = 1e-3 # A common starting point for learning rate
eval_interval = 500  # How often we'll print the loss
batch_size = 4  # How many independent sequences in a batch
block_size = 8  # The length of each sequence


# --- The Main Training Loop ---
for step in range(max_iters):
    
    # Get a mini-batch of data
    x_batch, y_batch = get_batch(dataset, block_size, batch_size)
    
    # Get predictions for x_batch 
    preds = model.pred(x_batch)

    # Calculate loss cross entropy
    loss = get_batch_loss(preds, y_batch)
    print(loss)
    
          
    # Backward Pass
    # model.backward()
    
    # Optimizer step (Updated weights with gradients)
    for param in model.parameters():
        param -= learning_rate * param.grad 
        
    # Print out the loss periodically
    if step % eval_interval == 0:
        print(f"Step {step}: Training Loss = {loss}")

  hidden = embd_x @ self.W1
  hidden = embd_x @ self.W1
  hidden = embd_x @ self.W1
  proccessed_vector = hidden_activated @ self.W2
  proccessed_vector = hidden_activated @ self.W2
  proccessed_vector = hidden_activated @ self.W2
  logits = proccessed_vector @ self.unembedding_matrix
  logits = proccessed_vector @ self.unembedding_matrix
  logits = proccessed_vector @ self.unembedding_matrix


TypeError: unhashable type: 'numpy.ndarray'