In [1]:
# Import the PyTorch library
import torch

# Set the device to 'mps' if CUDA is available, otherwise set it to 'cpu'
# 'mps' stands for Metal Performance Shaders, a framework for GPU-accelerated computation on Apple devices
# Please note that this requires a version of PyTorch or a library that supports the 'mps' device
device = "mps" if torch.backends.mps.is_available() else "cpu"

# Print the device that will be used for computations
print(device)

mps


In [2]:
block_size = 8
batch_size = 4

In [3]:
# Open the file 'wizard_of_oz.txt' in read mode with utf-8 encoding
with open("wizard_of_oz.txt", 'r', encoding='utf-8') as f:
    # Read the entire content of the file into the variable 'text'
    text = f.read()

# Create a sorted set of unique characters present in the 'text'
chars = sorted(set(text))

# Calculate the size of the vocabulary, which is the number of unique characters in the 'text'
vocabular_size = len(chars)

In [4]:
# Create a dictionary to map each character to a unique integer
string_to_int = { ch:i for i, ch in enumerate(chars) }

# Create a dictionary to map each integer back to its corresponding character
int_to_string = { i:ch for i, ch in enumerate(chars) }

# Define a function to encode a string into a list of integers
encode = lambda s: [string_to_int[c] for c in s]

# Define a function to decode a list of integers back into a string
decode = lambda l: ''.join([int_to_string[i] for i in l])

# Encode the entire text into a tensor of integers
data = torch.tensor(encode(text), dtype=torch.long)

# Print the first 100 elements of the encoded data
data[:100]

tensor([ 1,  1, 28, 39, 42, 39, 44, 32, 49,  1, 25, 38, 28,  1, 44, 32, 29,  1,
        47, 33, 50, 25, 42, 28,  1, 33, 38,  1, 39, 50,  0,  0,  1,  1, 26, 49,
         0,  0,  1,  1, 36, 11,  1, 30, 42, 25, 38, 35,  1, 26, 25, 45, 37,  0,
         0,  1,  1, 25, 45, 44, 32, 39, 42,  1, 39, 30,  1, 44, 32, 29,  1, 47,
        33, 50, 25, 42, 28,  1, 39, 30,  1, 39, 50,  9,  1, 44, 32, 29,  1, 36,
        25, 38, 28,  1, 39, 30,  1, 39, 50,  9])

In [5]:
# Calculate the number of elements that correspond to 80% of the total data length
n = int(0.8*len(data))

# Split the data into training and validation sets
# The first 'n' elements (80% of the data) are used for training
# The remaining elements (20% of the data) are used for validation
train_data, val_data = data[:n], data[n:]

In [6]:
# Set the block size to 8. This is the number of elements to consider at a time from the training data.
block_size = 8

# Split the training data into two parts: 'x' and 'y'. 'x' is the first 'block_size' elements and 'y' is the next 'block_size' elements.
x, y = train_data[:block_size],  train_data[1:block_size+1]

# Loop over the range of block_size
for t in range(block_size):
    # The context is the first 't+1' elements of 'x'
    context = x[:t+1]
    # The target is the 't'-th element of 'y'
    target = y[t]
    # Print the context and target
    print('when input is', context, 'target is', target)

when input is tensor([1]) target is tensor(1)
when input is tensor([1, 1]) target is tensor(28)
when input is tensor([ 1,  1, 28]) target is tensor(39)
when input is tensor([ 1,  1, 28, 39]) target is tensor(42)
when input is tensor([ 1,  1, 28, 39, 42]) target is tensor(39)
when input is tensor([ 1,  1, 28, 39, 42, 39]) target is tensor(44)
when input is tensor([ 1,  1, 28, 39, 42, 39, 44]) target is tensor(32)
when input is tensor([ 1,  1, 28, 39, 42, 39, 44, 32]) target is tensor(49)
