In [2]:
"""
This is THE model.

It combines improvements from the LLAMA3 series, with others from Gemma2.
It has:
- GQA
- GeGLU (thinking to maybe use SwiGLU or ReGLU)
- RoPE

I still would need to implement KV-caching to improve inference type.
"""
from dataclasses import dataclass
import torch
import torch.nn as nn
from torch.nn import CrossEntropyLoss
from torch.nn import functional as F
import math
import inspect

def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
    """
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    """
    batch, num_key_value_heads, slen, head_dim = hidden_states.shape
    if n_rep == 1:
        return hidden_states
    hidden_states = hidden_states[:, :, None, :, :].expand(batch, num_key_value_heads, n_rep, slen, head_dim)
    return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)


"""
class CausalSelfAttentionGQA(nn.Module):
    def __init__(self,config):
        super().__init__()
        assert config.n_embd % config.n_head == 0

        self.head_dim = config.n_embd // config.n_head
        self.n_kv_heads = config.n_kv_heads # Nombre de grups de query
        self.n_head = config.n_head

        shape = (config.n_head + 2 * config.n_kv_heads) * self.head_dim

        self.wq = nn.Linear(config.n_embd, config.n_head * self.head_dim, bias=False )
        self.wk = nn.Linear(config.n_embd, self.n_kv_heads * self.head_dim, bias=False)
        self.wv = nn.Linear(config.n_embd, self.n_kv_heads * self.head_dim, bias=False)
        self.wo = nn.Linear(config.n_head * self.head_dim, config.n_embd, bias=False)
        self.cache = None
        self.queries_per_kv = self.n_head // self.n_kv_heads

    def forward(self, x, freqs_cis, mask, return_attention=False):
        B, T, C = x.shape

        xq, xk, xv = self.wq(x), self.wk(x), self.wv(x)
        xq = xq.view(B, T, self.n_head, self.head_dim)
        xk = xk.view(B, T, self.n_kv_heads, self.head_dim)
        xv = xv.view(B, T, self.n_kv_heads, self.head_dim)

        xq = apply_rope(xq, freqs_cis)
        xk = apply_rope(xk, freqs_cis)

        xk = repeat_kv(xk, self.queries_per_kv)
        xv = repeat_kv(xv, self.queries_per_kv)
        print("aquí si")
        
        xq, xk, xv = (x.transpose(1, 2) for x in (xq, xk, xv))

        scores = torch.matmul(xq, xk.transpose(2, 3)) / math.sqrt(self.head_dim)
        if mask is not None:
            scores = scores + mask  # (bs, n_local_heads, seqlen, cache_len + seqlen)
        scores = F.softmax(scores.float(), dim=-1).type_as(xq)
        output = torch.matmul(scores, xv)

        output = output.transpose(1, 2).contiguous().view(B, T, -1)
        # output projection
        proj = self.wo(output)
        if return_attention:
            return proj, scores
        return proj
"""


def rotate_half(x):
    """Rotates half the hidden dims of the input."""
    x1 = x[..., : x.shape[-1] // 2]
    x2 = x[..., x.shape[-1] // 2 :]
    return torch.cat((-x2, x1), dim=-1)

def apply_rope(q, k, cos, sin, unsqueeze_dim=1):
    cos = cos.unsqueeze(unsqueeze_dim)
    sin = sin.unsqueeze(unsqueeze_dim)
    q_embed = (q * cos) + (rotate_half(q) * sin)
    k_embed = (k * cos) + (rotate_half(k) * sin)
    return q_embed, k_embed


class CausalSelfAttentionGQA(nn.Module):
    def __init__(self,config):
        super().__init__()
        assert config.n_embd % config.n_head == 0

        self.config = config
        self.num_heads = config.n_head
        self.num_kv_heads = config.n_kv_heads
        self.head_dim = config.n_embd // self.num_heads
        self.num_kv_groups = self.num_heads // self.num_kv_heads
        

        self.q_proj = nn.Linear(config.n_embd, self.num_heads * self.head_dim, bias=False)
        self.k_proj = nn.Linear(config.n_embd, self.num_kv_heads * self.head_dim, bias=False)
        self.v_proj = nn.Linear(config.n_embd, self.num_kv_heads * self.head_dim, bias=False)
        self.o_proj = nn.Linear(self.num_heads * self.head_dim, config.n_embd, bias=False)


        #self.sliding_window_size = config.sliding_window_size
        self.max_seq_len = config.block_size

    def forward(self, x, cos_sin: tuple, return_attention = None):
        B, T, C = x.size()

        q = self.q_proj(x)
        k = self.k_proj(x)
        v = self.v_proj(x)

        q = q.view(B, T, self.num_heads, self.head_dim).transpose(1,2)
        k = k.view(B, T, self.num_kv_heads, self.head_dim).transpose(1,2)
        v = v.view(B, T, self.num_kv_heads, self.head_dim).transpose(1,2)

        cos, sin = cos_sin
        q, k = apply_rope(q, k, cos, sin)

        k = repeat_kv(k, self.num_kv_groups)
        v = repeat_kv(v, self.num_kv_groups)

        scores = torch.matmul(q, k.transpose(2,3)) / math.sqrt(self.head_dim)

        mask = True
        if mask is not None:  # the mask is not correct, it needs to be [B, C, T, T]
            mask = torch.full((T, T), float("-inf"), device=x.device)
            mask = torch.triu(mask, diagonal=1)
            mask = mask.unsqueeze(0).unsqueeze(1)
            scores = scores + mask

        scores = nn.functional.softmax(scores, dim=-1, dtype=torch.float32).to(q.dtype)
        output = torch.matmul(scores, v)

        if output.size() != (B, self.num_heads, T, self.head_dim):
            raise ValueError(f"ALGO HA ANAT MALAMENT, output té dimensions {output.size()}")
        
        output = output.transpose(1,2).contiguous()
        output = output.reshape(B, T, -1)

        output = self.o_proj(output)

        if return_attention:
            return output, scores
        return output, None
        

class RMSNorm(nn.Module):
    def __init__(self, d, eps=1e-6):
        super().__init__()
        self.eps = eps
        self.weight = nn.Parameter(torch.ones(d)) # weight

    def forward(self, x):
        input_dtype = x.dtype
        x = x.to(torch.float32)
        variance = x.pow(2).mean(-1, keepdim=True)
        hidden_states = x * torch.rsqrt(variance + self.eps)
        return self.weight * hidden_states.to(input_dtype)


def compute_rope_default(config, device):
    base = config.rope_theta
    dim = config.n_embd // config.n_head

    inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.int64).float().to(device)/dim))
    return inv_freq, 1.0



class LlamaRotaryEmbedding(nn.Module):
    def __init__(self, config, device = None):
        super().__init__()
        self.max_position_embeddings = config.max_position_embeddings
        self.factor = config.scaling_factor
        self.base = config.rope_theta
        self.dim = config.n_embd // config.n_head
        self.config = config
        self.rope_init_fn = compute_rope_default

        inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device)
        self.register_buffer("inv_freq", inv_freq, persistent=False)
        self.original_inv_freq = self.inv_freq
    
    @torch.no_grad()
    def forward(self, x, position_ids):
        inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1)
        position_ids_expanded = position_ids[:, None, :].float()

        freqs = (inv_freq_expanded @ position_ids_expanded).transpose(1, 2)
        emb = torch.cat((freqs, freqs), dim=-1)
        cos = emb.cos()
        sin = emb.sin()

        # Advanced RoPE types (e.g. yarn) apply a post-processing scaling factor, equivalent to scaling attention
        cos = cos * self.attention_scaling
        sin = sin * self.attention_scaling

        return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)

        

class MLP(nn.Module):
    """
    class GeGLU(nn.Module):
    def forward(self, x):
        x, gate = x.chunk(2, dim=-1)
        # Silu és el mateix que swift function
        return F.gelu(gate) * x
    
    """
    def __init__(self, config):
        super().__init__()
        self.config = config
        self.gate_proj   = nn.Linear(config.n_embd, config.intermediate_size, bias=False) # gate_proj
        self.down_proj = nn.Linear(config.intermediate_size, config.n_embd, bias=False) # down proj
        self.up_proj = nn.Linear(config.n_embd, config.intermediate_size, bias=False) # up proj
    
    def forward(self, x):
        return self.down_proj(F.gelu(self.gate_proj(x), approximate="tanh")* self.up_proj(x)) 


class Block(nn.Module):

    def __init__(self,config):
        super().__init__()
        self.input_layernorm = RMSNorm(config.n_embd, config.norm_eps) # input_layernorm
        self.self_attn = CausalSelfAttentionGQA(config)
        self.post_attention_layernorm = RMSNorm(config.n_embd, config.norm_eps) # post_attention_layernorm
        self.mlp = MLP(config)
    
    def forward(self, x, cos_sin):
        residual = x
        x = self.input_layernorm(x)
        x, attn_weights = self.self_attn(x, cos_sin)

        x = residual + x

        residual = x
        x = self.post_attention_layernorm(x)
        x = self.mlp(x)
        x = residual + x

        return x

class Aloja(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.config = config


        self.padding_idx = config.pad_token_id
        self.embed_tokens =nn.Embedding(config.vocab_size, config.n_embd, self.padding_idx)

        self.layers = nn.ModuleList([Block(config) for i in range(config.n_layer)])

        self.norm =  RMSNorm(config.n_embd, config.norm_eps)
        self.rotary_emb = LlamaRotaryEmbedding(config=config)
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)

        self.embed_tokens.weight = self.lm_head.weight # Linkeddddd

        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            std = 0.02
            torch.nn.init.normal_(module.weight, mean = 0.0, std=std)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()

    def forward(self, idx, targets=None):
        B, T = idx.size()
        assert T <= self.config.block_size, f"Cannot forward sequence of length {T}, block size is smaller"

        x = self.embed_tokens(idx)


        position_ids = torch.arange(T, device = x.device).unsqueeze(0)

        


        cos_sin = self.rotary_emb(x, position_ids)

        for block in self.layers:
            x = block(x, cos_sin)
        
        x = self.norm(x)

        logits = self.lm_head(x).float()

        loss = None
        if targets is not None:
            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1)
        
        return logits, loss
    
    def configure_optimizers(self, learning_rate, weight_decay=0.1, betas=(0.9, 0.95), device_type='cuda'):
        param_dict = {pn: p for pn, p in self.named_parameters()}
        param_dict = {pn: p for pn, p in param_dict.items() if p.requires_grad}
        # create optim groups. Any parameters that is 2D will be weight decayed, otherwise no.
        # i.e. all weight tensors in matmuls + embeddings decay, all biases and layernorms don't.
        decay_params = [p for n, p in param_dict.items() if p.dim() >= 2]
        nodecay_params = [p for n, p in param_dict.items() if p.dim() < 2]
        optim_groups = [
            {'params': decay_params, 'weight_decay': weight_decay},
            {'params': nodecay_params, 'weight_decay': 0.0}
        ]
        num_decay_params = sum(p.numel() for p in decay_params)
        num_nodecay_params = sum(p.numel() for p in nodecay_params)
        print(f"num decayed parameter tensors: {len(decay_params)}, with {num_decay_params:,} parameters")
        print(f"num non-decayed parameter tensors: {len(nodecay_params)}, with {num_nodecay_params:,} parameters")
        # Create AdamW optimizer and use the fused version if it is available
        fused_available = 'fused' in inspect.signature(torch.optim.AdamW).parameters
        use_fused = fused_available and device_type == "cuda"
        print(f"using fused AdamW: {use_fused}")
        optimizer = torch.optim.AdamW(optim_groups, lr=learning_rate, betas=betas, eps=1e-8, fused=use_fused)
        return optimizer


@dataclass
class AlojaConfig:
    block_size: int = 2048
    vocab_size: int = 65536
    n_layer: int = 30
    n_head: int = 8
    n_embd: int = 768 # hidden_size
    intermediate_size = 2048 # 3072
    n_kv_heads: int = 4 # nombre de grups de query
    norm_eps: int = 1e-05
    rope_theta: float = 10000.0
    use_scaled_rope: bool = False
    scaling_factor: int = 1
    max_batch_size: int = 16
    max_seq_len:int = 2048
    max_position_embeddings:int = 2048
    pad_token_id:int = 3


In [None]:
def load_checkpoint(model, optimizer, dataloader, checkpoint_path, device):
    checkpoint = torch.load(checkpoint_path, map_location=device)
    model.config(checkpoint['config'])
    model.load_state_dict(checkpoint['model'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    step = checkpoint['step']
    print(f"Checkpoint loaded from step {step}")
    return step

In [6]:
checkpoint = torch.load("./model_02500.pt", map_location="cpu")
model = Aloja(checkpoint['config'])

  checkpoint = torch.load("./model_02500.pt", map_location="cpu")


In [8]:
state_dict = checkpoint['model']
new_state_dict = {}
for key, value in state_dict.items():
    new_key = key.replace("_orig_mod.", "")
    new_state_dict[new_key] = value

model.load_state_dict(new_state_dict)

<All keys matched successfully>

In [15]:
import tiktoken
import torch
from torch.nn import functional as F
import numpy as np
from tokenizers import Tokenizer

device = "cpu"
if torch.cuda.is_available():
    device = "cuda"
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
    device = "mps"

enc = Tokenizer.from_file("./tokenizer/byte-level-bpe.tokenizer.json")


num_return_sequences = 10
max_lenght = 1024

tokens = np.array(enc.encode("Hola, sóc un model de llenguatge,").ids)
tokens = torch.tensor(tokens, dtype=torch.long)
tokens = tokens.unsqueeze(0).repeat(num_return_sequences, 1)
#x = tokens.to('cuda')

model.to(device)

x = tokens.to(device)

model.eval()

torch.manual_seed(42)
torch.cuda.manual_seed(42)
while x.size(1) < max_lenght:
    with torch.no_grad():
        logits, _ = model(x)
        logits = logits[:, -1, :]
        probs = F.softmax(logits, dim=-1)

        topk_probs, topk_indices = torch.topk(probs, 50, dim=-1)
        ix = torch.multinomial(topk_probs, 1)
        xcol = torch.gather(topk_indices, -1, ix)
        x = torch.cat((x, xcol), dim=1)

for i in range(num_return_sequences):
    tokens = x[i, :max_lenght].tolist()
    decoded = enc.decode(tokens)
    print(">", decoded)

KeyboardInterrupt: 

In [19]:
tokens = x[2, :max_lenght].tolist()
decoded = enc.decode(tokens)
print(decoded)

Hola, sóc un model de llenguatge, i avui he donat voltes a les coses que vull expressar quan un nen diu que li encanten. No he tingut mai aquesta visió d'estar sol en un ambient familiar sense una bona disposició a l'aula, ni coneixements mínims per ensenyar-l i que sovint no se sol ni se sent gaire complicat trobar solucions a la seva pròpia situació.
Doncs això és exactament el que vull dir. Perquè és important fer reflexionar a l'infant, perquè, si el seu pensament no és coherent, l'ajuda a trobar ajuda per deixar-se endur, llavors és tan important. I aquesta implicació és una part important de la responsabilitat de la intel·ligència. Per això, he


In [21]:
checkpoint['config']

AlojaConfig(block_size=2048, vocab_size=65536, n_layer=30, n_head=8, n_embd=768, n_kv_heads=4, norm_eps=1e-05, rope_theta=10000.0, use_scaled_rope=False, scaling_factor=1, max_batch_size=16, max_seq_len=2048, max_position_embeddings=2048, pad_token_id=3)

In [29]:
from transformers import LlamaConfig,  LlamaForCausalLM

configuration = LlamaConfig(
    vocab_size = 65536,
    hidden_size = 768,
    intermediate_size = 2048,
    num_hidden_layers = 30,
    num_attention_heads= 8,
    num_key_value_heads= 4,
    hidden_act = "gelu",
    max_position_embeddings = 2048,
    initializer_range = 0.02,
    rms_norm_eps = 1e-05,
    tie_word_embeddings= True,
    rope_theta = 10000.0
)

In [30]:
model = LlamaForCausalLM(configuration)

In [32]:
state_dict = checkpoint['model']
new_state_dict = {}
for key, value in state_dict.items():
    new_key = key.replace("_orig_mod.", "model.")
    if new_key == "model.lm_head.weight":
        new_key = "lm_head.weight"
    new_state_dict[new_key] = value

model.load_state_dict(new_state_dict)

<All keys matched successfully>

In [33]:
tokenizer = Tokenizer.from_file("./tokenizer/byte-level-bpe.tokenizer.json")

In [137]:
from transformers import GenerationConfig
from transformers import EosTokenCriteria, StoppingCriteriaList
genconf = GenerationConfig(
    do_sample = True,
    max_length = 500,
    top_k = 50,
    num_return_sequences = 1,
    eos_token_id = 1,
    temperature = 0.8,
    top_p = 0.9,
    pad_token_id = 3,
    repetition_penalty = 1.5,

)

In [145]:
tokens = np.array(enc.encode("Aquest matí he anat").ids)
tokens = torch.tensor(tokens, dtype=torch.long)
tokens = tokens.unsqueeze(0)

a = model.generate(tokens, genconf)

In [146]:
for response in range(len(a)):
    print("> ", tokenizer.decode(a[response].tolist()))

>  Hi havia una vegada un noi anomenat Roger Baiges. Un bon dia, en Roger osset va veure el seu avi al bosc i li digué: "Per què has vingut aquí?"
"Aquest és l'avi de la iaia", deia Pau amb veu tremolosa mentre intentava parlar-li a ell mateix per telèfon des d'un racó del jardí on es trobava amagat sota terra. En Pere no sabia com arribar allà! Va mirar cap amunt pel camí que conduïa fins dalt dels arbres alts i frondosos. La seva mirada era baixa però ferma alhora. El riu brillava més fort quan els ocells cantaven melodies estranyes durant hores sense parar atenció als seus moviments. No podia deixar escapar aquesta sensació tan trista ni màgica perquè semblava impossible trobar algú desconegut dins d'una caixa misteriosa amagada entre les arrels d'aquest arbre màgic.  

>  Hi havia una vegada un noi anomenat Roger Baiges. Un bon dia, en Roger  va veure a l'home assegut al seu costat amb el dit índex i li deia: "El meu nom és Oliver".
En Jordi es feia passar per home d'una altra èpoc

In [294]:
checkpoint_4 = torch.load("./model_11000.pt", map_location="cpu")
state_dict_4 = checkpoint_4['model']
new_state_dict_4 = {}
for key, value in state_dict_4.items():
    new_key = key.replace("_orig_mod.", "model.")
    if new_key == "model.lm_head.weight":
        new_key = "lm_head.weight"
    new_state_dict_4[new_key] = value


from transformers import LlamaConfig,  LlamaForCausalLM

configuration = LlamaConfig(
    vocab_size = 65536,
    hidden_size = 768,
    intermediate_size = 2048,
    num_hidden_layers = 30,
    num_attention_heads= 8,
    num_key_value_heads= 4,
    hidden_act = "gelu",
    max_position_embeddings = 2048,
    initializer_range = 0.02,
    rms_norm_eps = 1e-05,
    tie_word_embeddings= True,
    rope_theta = 10000.0
)
model_4 = LlamaForCausalLM(configuration)

model_4.load_state_dict(new_state_dict_4)

  checkpoint_4 = torch.load("./model_11000.pt", map_location="cpu")


<All keys matched successfully>

In [337]:
genconf = GenerationConfig(
    do_sample = True,
    max_length = 350,
    top_k = 50,
    num_return_sequences = 1,
    eos_token_id = 1,
    temperature = 0.6,
    top_p = 0.9,
    pad_token_id = 3,
    repetition_penalty = 1.2,

)
tokens = np.array(enc.encode("Hi havia una vegada").ids)
tokens = torch.tensor(tokens, dtype=torch.long)
tokens = tokens.unsqueeze(0)
a = model_4.generate(tokens, genconf)
for response in range(len(a)):
    print("> ", tokenizer.decode(a[response].tolist()))

>  Hi havia una vegada un gat que volia ser cuiner. Un dia, va veure un ratolí petit i gris amb els ulls brillants a la seva cua. El ratolí tenia molta gana! 

"Hola gatet!" Va dir el ratolí amb veu dolça. "M'agradaria ajudar-te a trobar menjar."

El ratolí es va posar molt content de saber què era el peixet blau. Va sortir corrent cap al bosc encantat on vivia la Bruixa Lila. Ella li va explicar tot sobre les galetes màgiques del llac. La bruixa va escoltar atentament, però no va poder evitar preguntar: "Què són aquestes galetes?"

La Marta, sorpresa, va mirar el ratolí blau. Era tan gran com ella! I quin sabor feia?

"Aquestes són galetes per als humans," va continuar el ratolí. "Són com petites notes que diuen 'Benvingut al Bosc Encantat'. Són com petits missatges secrets que ens ajuden a descobrir coses noves!"

La Marta va quedar fascinada. No sabia que hi hagués tants tipus diferents d'aliments! Però també estava emocionada per aprendre més sobre aquest món màgic.  



In [327]:
from transformers import AutoTokenizer, AutoModelForCausalLM

In [328]:
tokenizer = AutoTokenizer.from_pretrained("./tokenizer/byte-level-bpe.tokenizer.json")

OSError: Incorrect path_or_model_id: './tokenizer/byte-level-bpe.tokenizer.json'. Please provide either the path to a local folder or the repo_id of a model on the Hub.

In [330]:
from transformers import PreTrainedTokenizerFast

wrapped_tokenizer = PreTrainedTokenizerFast(
    tokenizer_file="./tokenizer/byte-level-bpe.tokenizer.json"
)


In [324]:
from huggingface_hub import HfApi, HfFolder, Repository, notebook_login

notebook_login()

repo_name = "pauhidalgoo/cucafera"
model.push_to_hub(repo_name)


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

model.safetensors:   0%|          | 0.00/980M [00:00<?, ?B/s]

AttributeError: 'tokenizers.Tokenizer' object has no attribute 'push_to_hub'

In [333]:
from huggingface_hub import HfApi, HfFolder, Repository, notebook_login

notebook_login()
wrapped_tokenizer.push_to_hub(repo_name)

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


CommitInfo(commit_url='https://huggingface.co/pauhidalgoo/cucafera/commit/ba154d6a0028da9fc30eace50bb0fe9be49e7e47', commit_message='Upload tokenizer', commit_description='', oid='ba154d6a0028da9fc30eace50bb0fe9be49e7e47', pr_url=None, pr_revision=None, pr_num=None)

In [339]:
from transformers import AutoTokenizer, AutoModelForCausalLM

moodel = AutoModelForCausalLM.from_pretrained("./acontinua")

In [350]:
genconf = GenerationConfig(
    max_length = 350,
    repetition_penalty = 1.2,

)
tokens = np.array(enc.encode("La intel·ligència artificial tindrà la capacitat de").ids)
tokens = torch.tensor(tokens, dtype=torch.long)
tokens = tokens.unsqueeze(0)
a = moodel.generate(tokens, genconf)
for response in range(len(a)):
    print("> ", tokenizer.decode(a[response].tolist()))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:1 for open-end generation.


>  La intel·ligència artificial tindrà la capacitat de generar noves idees i solucions a partir d'un conjunt complex de dades. Aquesta capacitat obre un ventall de possibilitats per a l'aprenentatge automàtic, permetent-nos explorar patrons complexos en el món real sense necessitat de ser programats explícitament.
Un dels aspectes més importants del potencial de la IA per al desenvolupament de productes és la seva capacitat per a generar noves idees. La IA pot analitzar grans quantitats de dades, identificar patrons i tendències que no es poden capturar amb precisió, com ara els models de predicció de mercat o les xarxes socials. Això permet als desenvolupadors crear productes que siguin fàcils d'entendre i aplicar, cosa que pot conduir a una major eficiència i rendibilitat. 
Per exemple, imagineu un robot que ha de navegar pel bosc. El seu objectiu principal seria trobar un equilibri entre la seguretat i la satisfacció del client. En lloc de buscar informació sobre arbres, podria util

In [120]:
from eval.hellaswag import iterate_examples, render_example

In [121]:
def get_most_likely_row(tokens, mask, logits):
    """
    Funció per triar quina de les opcions múltiples escull el model
    (es fa servir per avaluar-lo utilitzant hellaswag)
    """
    # evaluate the autoregressive loss at all positions
    shift_logits = (logits[..., :-1, :]).contiguous()
    shift_tokens = (tokens[..., 1:]).contiguous()
    flat_shift_logits = shift_logits.view(-1, shift_logits.size(-1))
    flat_shift_tokens = shift_tokens.view(-1)
    shift_losses = F.cross_entropy(flat_shift_logits, flat_shift_tokens, reduction='none')
    shift_losses = shift_losses.view(tokens.size(0), -1)
    # now get the average loss just for the completion region (where mask == 1), in each row
    shift_mask = (mask[..., 1:]).contiguous().to(logits.device) # we must shift mask, so we start at the last prompt token
    masked_shift_losses = shift_losses * shift_mask
    # sum and divide by the number of 1s in the mask
    sum_loss = masked_shift_losses.sum(dim=1)
    avg_loss = sum_loss / shift_mask.sum(dim=1)
    # now we have a loss for each of the 4 completions
    # the one with the lowest loss should be the most likely
    pred_norm = avg_loss.argmin().item()
    return pred_norm

In [123]:
num_correct_norm = 0
num_total = 0
for i, example in enumerate(iterate_examples("val")):
    _, tokens, mask, label = render_example(example)
    tokens = tokens.to(device)
    maks = mask.to(device)

    with torch.no_grad():
        outputs = model(tokens)
        logits = outputs.logits
        pred_norm = get_most_likely_row(tokens, mask, logits)
    num_total += 1
    num_correct_norm += int(pred_norm == label)
acc_norm = num_correct_norm / num_total
print(f"HellaSwag accuracy: {num_correct_norm}/{num_total} = {acc_norm:.4f}")


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


UnicodeDecodeError: 'charmap' codec can't decode byte 0x8d in position 883: character maps to <undefined>

In [102]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

tokenizer = AutoTokenizer.from_pretrained("pauhidalgoo/cucafera-chat")
model = AutoModelForCausalLM.from_pretrained("pauhidalgoo/cucafera-chat")

tokenizer_config.json:   0%|          | 0.00/2.81k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


tokenizer.json:   0%|          | 0.00/2.88M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/729 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/980M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/144 [00:00<?, ?B/s]

In [103]:
import numpy as np
import torch

In [184]:
from transformers import GenerationConfig
genconf = GenerationConfig(
    max_length = 150,
    repetition_penalty = 1.2,
    temperature = 0.6,
    top_k = 50,
    top_p = 0.9,
    do_sample = True,

)
tokens = np.array(tokenizer.encode("""<|im_start|>user
Hola!<|im_end|>
<|im_start|>assistant
Bon dia!<|im_end|>
<|im_start|>user
Què és un pingüí?<|im_end|>
<|im_start|>assistant"""))
tokens = torch.tensor(tokens, dtype=torch.long)
tokens = tokens.unsqueeze(0)
a = model.generate(tokens, genconf)
for response in range(len(a)):
    print("> ", tokenizer.decode(a[response].tolist()))

>  <|im_start|>user
Hola!<|im_end|>
<|im_start|>assistant
Bon dia!<|im_end|>
<|im_start|>user
Què és un pingüí?<|im_end|>
<|im_start|>assistant
Un pingüí és una figura molt peculiar, però no és un animal.  És un mamífer?<|im_end|>


In [105]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

tokenizer_instruct = AutoTokenizer.from_pretrained("pauhidalgoo/cucafera-instruct")
model_instruct = AutoModelForCausalLM.from_pretrained("pauhidalgoo/cucafera-instruct")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [174]:
from transformers import GenerationConfig
genconf = GenerationConfig(
    max_length = 150,
    repetition_penalty = 1.2,
    temperature = 0.6,
    top_k = 50,
    top_p = 0.9,
    do_sample = True,

)
tokens = np.array(tokenizer_instruct.encode("<|im_start|>user\n Fes una endevinalla<|im_end|>\n<|im_start|>assistant\n"))
tokens = torch.tensor(tokens, dtype=torch.long)
tokens = tokens.unsqueeze(0)
a = model_instruct.generate(tokens, genconf)
for response in range(len(a)):
    print("> ", tokenizer_instruct.decode(a[response].tolist()))

>  <|im_start|>user
 Fes una endevinalla<|im_end|>
<|im_start|>assistant
No hi ha cap dia més a la vida. Hi havia un noi anomenat Jordi que vivia sol al poble d'Elvis, mentre feia el seu esmorzar de pa amb tomàquet i suc de taronja.<|im_end|>


In [59]:
import torch

# We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating
messages = [
    {"role": "user", "content": " Quines són les lleis de Newton "},
]

# prepare the messages for the model
input_ids = tokenizer.apply_chat_template(messages, truncation=True, add_generation_prompt=True, return_tensors="pt")

# inference
outputs = model.generate(
        input_ids=input_ids,
        max_new_tokens=256,
        do_sample=True,
        temperature=0.6,
        top_k=50,
        top_p=0.9,
)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])

system

user
 Quines són les lleis de Newton 
assistant
Les lleis de Newton són:

1. La Llei de Newton: Aquesta llei estableix que el moviment d'un objecte és proporcional al quadrat de la seva massa.
2. La Llei de Newton: Aquesta llei estableix que la força de la gravetat és directament proporcional al quadrat de la massa.
3. La Llei de Newton: Aquesta llei estableix que la força de la gravetat és directament proporcional al quadrat de la massa.
