# Try some LLM models

## TinyLlama/TinyLlama-1.1B-Chat-v1.0

In [5]:
import os, pathlib

Check for GPU compatibility

In [6]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU found")

2.5.1+cu121
True
NVIDIA GeForce RTX 4070 SUPER


In [7]:
from huggingface_hub import login
with open("../.huggingfacetoken.txt", "r") as f:
    token = f.read().strip()
    
login(token)

Load model

In [30]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda"                     # or "cuda:0"

model_path = "../models/TinyLlama"           # relative or absolute

tokenizer = AutoTokenizer.from_pretrained(model_path)

model     = AutoModelForCausalLM.from_pretrained(
               model_path,
               torch_dtype=torch.float16,
               device_map=device
           )

In [35]:
prompt = "### Human: Explain black holes in 2–3 sentences.\n### Assistant:"
inputs = tokenizer(prompt, return_tensors="pt").to(device)

outputs = model.generate(
    **inputs,
    max_new_tokens=512,
    eos_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.1
)

response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)

if not response.strip().endswith((".", "!", "?", "”", "\"")):
    print("⚠️ Likely cut off — consider increasing max_new_tokens")

### Human: Explain black holes in 2–3 sentences.
### Assistant: A black hole is a region of space where gravity has become so strong that nothing, not even light, can escape from its gravitational pull. As a result, it has an intense and singular shape, with a point at one end and a swirling mass of material at the other. Black holes are often associated with supermassive stars or neutron stars, which emit radiation and produce magnetic fields that make them attracted to each other. In this way, they form their own neighborhoods within the galaxy.


## Run on CPU

In [26]:
device = "cpu"
model = AutoModelForCausalLM.from_pretrained(
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    torch_dtype=torch.float32,      # fp16 won’t work on pure CPU
    device_map=device
)

tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

prompt = "### Human: What is quantum entanglement?\n### Assistant:"
inputs = tokenizer(prompt, return_tensors="pt").to(device)   # SAME device

outputs = model.generate(**inputs, max_new_tokens=80)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

### Human: What is quantum entanglement?
### Assistant: Quantum entanglement is a phenomenon where two particles, such as photons or electrons, become connected in such a way that their states are correlated even when separated by great distances. This means that the state of one particle can be predicted with certainty from the state of the other particle, even if they are not directly connected. This is a fundamental property of quantum mechanics and has
