In [48]:
# Import the PyTorch library
import torch
import torch_directml # This uses pytorch with DirectML (For AMD GPU)
import torch.nn as nn

from torch.nn import functional as F


# Print the version of CUDA (Compute Unified Device Architecture), which is a parallel computing platform and application programming interface model created by Nvidia
print(torch.version.cuda)

# Sets dml as the DirectML device
dml = torch_directml.device()
print(dml)

# 
print(torch_directml.device_count())

# Check if a GPU (cuda) is available, otherwise use CPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Check if a GPU (cuda) is available, otherwise use Directl ML, or CPU
if torch.cuda.is_available():
    device = 'cuda'
elif torch_directml.is_available():
    device = torch_directml.device()
    print("Using Direct ML")
else:
    device = 'cpu'

# Print the selected device (either 'cuda' or 'cpu')
print(f"selected device: {device}")

block_size = 8
batch_size = 4


11.7
privateuseone:0
1
Using Direct ML
selected device: privateuseone:0


In [49]:
with open('pg420.txt', 'r', encoding='utf-8') as f:
    text = f.read()

# Checking file contents
print(len(text))
print(text[:200])
chars = sorted(set(text))
print(chars)
print(len(chars))
vocabulary_size = len(chars)

230469
﻿

Dorothy and the Wizard in Oz


  A Faithful Record of Their Amazing Adventures
    in an Underground World; and How with the
     Aid of Their Friends Zeb Hugson, Eureka
       the Kitten, and Jim 
['\n', ' ', '!', '"', '&', "'", '(', ')', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\ufeff']
76


In [50]:
# Tokenizer

# Create a dictionary mapping characters to integers
string_to_int = {ch: i for i, ch in enumerate(chars)}

# Create a dictionary mapping integers back to characters
int_to_string = {i: ch for i, ch in enumerate(chars)}

# Define a function to encode a string into a list of corresponding integers
encode = lambda s: [string_to_int[c] for c in s]

# Define a function to decode a list of integers into the original string
decode = lambda l: ''.join([int_to_string[i] for i in l])

# Example: Encode the string 'hello'
print(encode('hello'))

# Example: Decode the encoded sequence back to the original string
encoded_hello = encode('hello')
decoded_hello = decode(encoded_hello)
print(decoded_hello)

# Convert the entire text into a tensor of encoded integers
data = torch.tensor(encode(text), dtype=torch.long)

# Print the first 100 elements of the encoded tensor
print(data[:100])


[56, 53, 60, 60, 63]
hello
tensor([75,  0,  0, 27, 63, 66, 63, 68, 56, 73,  1, 49, 62, 52,  1, 68, 56, 53,
         1, 46, 57, 74, 49, 66, 52,  1, 57, 62,  1, 38, 74,  0,  0,  0,  1,  1,
        24,  1, 29, 49, 57, 68, 56, 54, 69, 60,  1, 41, 53, 51, 63, 66, 52,  1,
        63, 54,  1, 43, 56, 53, 57, 66,  1, 24, 61, 49, 74, 57, 62, 55,  1, 24,
        52, 70, 53, 62, 68, 69, 66, 53, 67,  0,  1,  1,  1,  1, 57, 62,  1, 49,
        62,  1, 44, 62, 52, 53, 66, 55, 66, 63])


In [51]:
# Select data to Train/Validate

# Select 80% of the data to train
n = int(0.8*len(data))
train_data = data[:n]
# Validate the model with the remaining 20%
val_data = data[n:]


In [52]:
def get_batch(split):
    # Choose the appropriate dataset based on the split ('train' or 'val')
    data = train_data if split == 'train' else val_data
    
    # Generate random indices to sample a batch
    ix = torch.randint(len(data) - block_size, (batch_size,))
    print(ix)
    
    # Stack sequences of data to form the input batch x
    x = torch.stack([data[i:i+block_size] for i in ix])
    
    # Stack sequences shifted by one to form the target batch y
    y = torch.stack([data[i+1:i+block_size] for i in ix])
    
    x, y = x.to(device), y.to(device)
    
    return x, y
# Break the data in blocks
# x = train_data[:block_size]
# y = train_data[1:block_size+1]

# for t in range(block_size):
#     context = x[:t+1]
#     target = y[t]
#     print('when input is', context, 'target is', target)

# Train the model
x, y = get_batch('train')

print('inputs:')
# print(x.shape)
print(x)
decode_sequences = lambda sequences: [''.join([int_to_string[i] for i in seq]) for seq in sequences]
decoded_x = decode_sequences(x.tolist())
print(f"Training set: {decoded_x}")

print('targets:')
print(y)
decoded_x = decode_sequences(y.tolist())
print(f"Validation set: {decoded_x}")


tensor([ 35853, 148521, 151431, 142626])
inputs:
tensor([[ 1,  1,  3, 46, 53,  1, 36, 49],
        [ 0,  3, 31, 63, 71,  1, 63, 60],
        [ 1, 71, 57, 68, 56,  1, 56, 49],
        [53, 66,  1, 50, 53, 54, 63, 66]], device='privateuseone:0')
Training set: ['  "We Ma', '\n"How ol', ' with ha', 'er befor']
targets:
tensor([[ 1,  3, 46, 53,  1, 36, 49],
        [ 3, 31, 63, 71,  1, 63, 60],
        [71, 57, 68, 56,  1, 56, 49],
        [66,  1, 50, 53, 54, 63, 66]], device='privateuseone:0')
Validation set: [' "We Ma', '"How ol', 'with ha', 'r befor']


In [61]:
class BigramLanguageModel(nn.Module):
    def __init__(self, vocabulary_size):
        super().__init__()
        # Define an embedding layer with vocabulary_size as both input and output dimensions
        # Each token (word) will be represented as a vector of size vocabulary_size
        self.token_embedding_table = nn.Embedding(vocabulary_size, vocabulary_size)
    
    def forward(self, index, targets):
        # sourcery skip: inline-immediately-returned-variable
        # Perform an embedding lookup for the input indices (index)
        # The result is a tensor of shape (batch_size, sequence_length, embedding_dim)
        logits = self.token_embedding_table(index)
        
        # Return the resulting logits (embedding vectors)
        return logits




BigramLanguageModel(
  (token_embedding_table): Embedding(76, 76)
)


TypeError: embedding(): argument 'indices' (position 2) must be Tensor, not int