In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
import time  # Importing time module for tracking

# Define the GPT model
class micro3GPT(nn.Module):
    def __init__(self, vocab_size, embedding_size, hidden_size, num_layers):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_size)
        self.transformer = nn.Transformer(hidden_size, num_layers)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, src, tgt):
        src = self.embedding(src)
        tgt = self.embedding(tgt)
        out = self.transformer(src, tgt)
        out = self.fc(out)
        return out

In [16]:
# Define the dataset
data = ["The quick brown fox jumps over the lazy dog.",
        "The quick brown fox jumps over the lazy dog again.",
        "The quick brown fox jumps over the lazy dog one more time.",
        "The quick brown fox jumps over the lazy dog once more time and .",]

In [17]:

# Convert text to numerical data
word_to_index = {}
index_to_word = {}
for sentence in data:
    for word in sentence.split():
        if word not in word_to_index:
            index = len(word_to_index)
            word_to_index[word] = index
            index_to_word[index] = word

X = torch.tensor([word_to_index[word] for sentence in data for word in sentence.split()[:-1]])
Y = torch.tensor([word_to_index[word] for sentence in data for word in sentence.split()[1:]])

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move tensors to the device
X = X.to(device)
Y = Y.to(device)

# Define model parameters
vocab_size = len(word_to_index)
embedding_size = 128
hidden_size = 128
num_layers = 1

# Define the model, loss function, and optimizer
model = micro3GPT(vocab_size, embedding_size, hidden_size, num_layers).to(device)  # Move model to device
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model and measure the time
start_time = time.time()  # Start timer
for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    outputs = model(X, Y)
    loss = criterion(outputs.view(-1, vocab_size), Y)
    loss.backward()
    optimizer.step()
    #print(f"Epoch {epoch}: loss={loss.item()}")
end_time = time.time()  # End timer
training_time = end_time - start_time
print(f"Training completed in {training_time:.2f} seconds")


Training completed in 3.73 seconds


In [18]:
# Generate text given a prompt and measure the time
prompt = "The quick"
start_time = time.time()  # Start timer for text generation
prompt_tensor = torch.tensor([word_to_index[word] for word in prompt.split()]).to(device)  # Move input to device
output = model(prompt_tensor.unsqueeze(0), prompt_tensor.unsqueeze(0))
next_word_index = torch.argmax(output[-1]).item()
next_word = index_to_word.get(next_word_index, "<unk>")
end_time = time.time()  # End timer for text generation
generation_time = end_time - start_time
print(f"Generated text: {prompt + ' ' + next_word}")
print(f"Text generation completed in {generation_time:.4f} seconds")

Generated text: The quick <unk>
Text generation completed in 0.0149 seconds
