In [1]:
from src.database import MongoDB
from sklearn.model_selection import train_test_split
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from torch.utils.data import Dataset
import torch
import os

In [2]:
cache_dir = "/home/hillary_kipkemoi/cache_dir" # adding a cache dir is optional

Fetch data from mongodb

In [3]:
from sklearn.model_selection import train_test_split
from src.database import MongoDB

In [4]:
db_name = 'clean_data'
collection_name = 'alain_news_clean'
connection_string = 'mongodb://localhost:27017/'
amharic_db = MongoDB(db_name=db_name, collection_name=collection_name, connection_string=connection_string)

In [5]:
data = list(amharic_db.collection.find({}))

In [6]:
texts = [item['content'] for item in data]
labels = [item['category'] for item in data]

In [7]:
# Split the data into training and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(texts, labels, test_size=0.2, random_state=42)

### Load the Garri pretrained model

In [8]:
from transformers import LlamaTokenizer

checkpoint = "iocuydi/llama-2-amharic-3784m"
commit_hash = "04fcac974701f1dab0b8e39af9d3ecfce07b3773"
# The commit hash is needed, because the model repo was rearranged after this commit (files -> finetuned/files),
# and I couldn't load the model from the new structure

tokenizer = LlamaTokenizer.from_pretrained(checkpoint, revision =commit_hash, cache_dir= cache_dir)

In [23]:
print(tokenizer.encode("ሰላም፣ አንዴት ነሽ?"))
print(tokenizer.tokenize("ሰላም፣ አንዴት ነሽ?"))

[1, 46702, 37562, 50737, 42367, 29973]
['▁ሰላም፣', '▁አንዴ', 'ት', '▁ነሽ', '?']


In [24]:
tokenizer.vocab_size

51008

In [25]:
import torch

torch.cuda.is_available() # Need GPU to use load_in_8bit

True

In [12]:
from peft import PeftModel
from transformers import LlamaForCausalLM, GenerationConfig

llama_model = LlamaForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-hf",
    load_in_8bit=True,
    device_map="auto",
    cache_dir= cache_dir, # optional
)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
llama_model.resize_token_embeddings(len(tokenizer)) # needed because the fine-tuned model extended the tokenizer

Embedding(51008, 4096)

In [14]:
# this is the model we want:
model = PeftModel.from_pretrained(llama_model, "iocuydi/llama-2-amharic-3784m",revision =commit_hash, cache_dir= cache_dir)

In [15]:
model.is_quantized

True

In [17]:
print(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): ModulesToSaveWrapper(
          (original_module): Embedding(51008, 4096)
          (modules_to_save): ModuleDict(
            (default): Embedding(51008, 4096)
          )
        )
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear8bitLt(
                (base_layer): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): Para

In [18]:
layers = model.base_model.model.model.layers

In [20]:
layers

ModuleList(
  (0-31): 32 x LlamaDecoderLayer(
    (self_attn): LlamaAttention(
      (q_proj): lora.Linear8bitLt(
        (base_layer): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
        (lora_dropout): ModuleDict(
          (default): Dropout(p=0.05, inplace=False)
        )
        (lora_A): ModuleDict(
          (default): Linear(in_features=4096, out_features=8, bias=False)
        )
        (lora_B): ModuleDict(
          (default): Linear(in_features=8, out_features=4096, bias=False)
        )
        (lora_embedding_A): ParameterDict()
        (lora_embedding_B): ParameterDict()
      )
      (k_proj): lora.Linear8bitLt(
        (base_layer): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
        (lora_dropout): ModuleDict(
          (default): Dropout(p=0.05, inplace=False)
        )
        (lora_A): ModuleDict(
          (default): Linear(in_features=4096, out_features=8, bias=False)
        )
        (lora_B): ModuleDict(
          (default)

In [21]:
q_proj = model.base_model.model.model.layers[0].self_attn.q_proj

In [22]:
q_proj

lora.Linear8bitLt(
  (base_layer): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
  (lora_dropout): ModuleDict(
    (default): Dropout(p=0.05, inplace=False)
  )
  (lora_A): ModuleDict(
    (default): Linear(in_features=4096, out_features=8, bias=False)
  )
  (lora_B): ModuleDict(
    (default): Linear(in_features=8, out_features=4096, bias=False)
  )
  (lora_embedding_A): ParameterDict()
  (lora_embedding_B): ParameterDict()
)