In [None]:
import torch
import torch.nn as nn
from torch.nn import functional as F
import numpy as np
import time
import random



print(f"PyTorch version: {torch.__version__}")  
print(f"CUDA (GPU) available: {torch.cuda.is_available()}")  

# If CUDA is available, print GPU details
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")  
else:
    print("⚠️ GPU not detected (CUDA missing or PyTorch installed without GPU support)")

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
block_size = 32
batch_size = 128
max_iters = 200
learning_rate = 3e-4
eval_iters = 100
n_embd = 384
n_head = 8
n_layer = 8
dropout = 0.2
#print(torch.__version__)
    

In [None]:
%%time
start_time = time.time()
# Matrix operations here
zeros = torch.zeros(1, 1)
end_time = time.time()

elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")


In [None]:
torch_rand1 = torch.rand([10000, 10000]).to(device)
torch_rand2 = torch.rand(10000, 10000).to(device)
np_rand1 = torch.rand(10000, 10000)
np_rand2 = torch.rand(10000, 10000)

start_time = time.time()

rand = torch_rand1 @ torch_rand2

end_time = time.time()

elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")

start_time = time.time()

rand = np_rand1 * np_rand2
end_time = time.time()
elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")

In [None]:
# Define a probability tensor
probabilities = torch.tensor([0.1, 0.9])
# 10% or 0.1 => 0, 90% or 0.9 => 1. each probability points to the index of the probability in the tensor
# Draw 5 samples from the multinomial distribution
samples = torch.multinomial(probabilities, num_samples=10, replacement=True)
print(samples)

In [None]:
# Concatenate 
tensor = torch.tensor([1,2,3,4])
out = torch.cat((tensor, torch.tensor([5])), dim=0)
out

In [None]:
# Triangle lower
out = torch.tril(torch.ones(5, 5))
out

In [None]:
# Triangle upper
out = torch.triu(torch.ones(5,5))
out

In [None]:
out = torch.zeros(5, 5).masked_fill(torch.tril(torch.ones(5, 5)) == 0, float('-inf'))
out

In [None]:
torch.exp(out)

In [None]:
input = torch.zeros(2, 3, 4)
out = input.transpose(0, 2)
out.shape

In [None]:
tensor1 = torch.tensor([1, 2, 3])
tensor2 = torch.tensor([4, 5, 6])
tensor3 = torch.tensor([7, 8, 9])

# Stack the tensors along a new dimension
stacked_tensor = torch.stack([tensor1, tensor2, tensor3])
stacked_tensor

In [None]:
sample = torch.tensor([10., 10., 10.])
linear = nn.Linear(3, 3, bias=False)
print(linear(sample))

In [None]:
# Create a tensor
tensor1 = torch.tensor([1.0, 2.0, 3.0])

# Apply softmax using torch.nn.functional.sofmax()
softmax_output = F.softmax(tensor1, dim=0)

print(softmax_output)

In [None]:
# type int64
int_64 = torch.randint(1, (3, 2)).float()
# type float32
float_32 = torch.rand(2, 3)
result = torch.matmul(int_64, float_32)
print(result)

In [None]:
x = torch.tensor([-0.05], dtype=torch.float32)
y = F.sigmoid(x)
print(y)