# MAIN NOTEBOOK
Notebook untuk menjalankan program utama

### Cek CUDA
- Apakah sudah berjalan di perangkat yang benar
- Berapa alokasi memori GPU
- Menghapus cache GPU

In [2]:
import torch

# Cek GPU yang sedang aktif (default)
current_device = torch.cuda.current_device()
print(f"Current active GPU: {current_device} ({torch.cuda.get_device_name(current_device)})")

# Pindahkan tensor ke GPU tertentu
tensor = torch.tensor([1, 2, 3])
gpu_tensor = tensor.to(f'cuda:{current_device}')
print(f"Tensor is on device: {gpu_tensor.device}")



Current active GPU: 0 (NVIDIA GeForce RTX 4050 Laptop GPU)
Tensor is on device: cuda:0


In [None]:
# GPU yang digunakan
device = torch.device("cuda:0")

# Total memori GPU
total_memory = torch.cuda.get_device_properties(device).total_memory / (1024 ** 3)  # Dalam GB
print(f"Total GPU Memory: {total_memory:.2f} GB")

# Memori yang sudah dialokasikan oleh PyTorch
allocated_memory = torch.cuda.memory_allocated(device) / (1024 ** 3)  # Dalam GB
print(f"Allocated GPU Memory: {allocated_memory:.2f} GB")

max_reserved_memory = torch.cuda.max_memory_reserved(device) / (1024 ** 3)  # Dalam GB
print(f"Max Reserved GPU Memory: {max_reserved_memory:.2f} GB")

# Memori GPU yang dicadangkan oleh PyTorch
reserved_memory = torch.cuda.memory_reserved(device) / (1024 ** 3)  # Dalam GB
print(f"Reserved GPU Memory: {reserved_memory:.2f} GB")

# Memori GPU yang tersedia
free_memory = reserved_memory - allocated_memory
print(f"Free GPU Memory: {free_memory:.2f} GB")

In [None]:
# Clean cuda GPU cache
torch.cuda.empty_cache()

## Meng-*generate* Jawaban

In [1]:
import time

from generate import generate_answer_with_logprobs
model_name = "meta-llama/Llama-3.2-1B-Instruct"

start = time.time()
generate_answer_with_logprobs(model_name, loop_range=50, max_new_tokens=75)
end = time.time()
duration = end - start

print(f"Durasi: {duration} detik")


Loading model on cuda
Loading model on cuda with torch.float16


Processing dataset:   0%|          | 0/50 [00:00<?, ?it/s]From v4.47 onwards, when a model cache is to be returned, `generate` will return a `Cache` instance instead by default (as opposed to the legacy tuple of tuples format). If you want to keep returning the legacy format, please set `return_legacy_cache=True`.
Processing dataset: 100%|██████████| 50/50 [01:08<00:00,  1.37s/it]

Processing completed. Results saved to meta-llama_Llama-3.2-1B-Instruct-50_results.json
Durasi: 78.52929472923279 detik





## Membuat dataset

In [None]:
## 