Info om modell:

https://huggingface.co/ltg/norbert3-base


Installations:
Using Python 3.11.6

python3.11 -m pip install torch
python3.11 -m pip install transformers
python3.11 -m pip install sentencepiece

In [10]:
import torch
from transformers import AutoTokenizer, AutoModelForMaskedLM, AutoModelForQuestionAnswering





In [6]:
tokenizer = AutoTokenizer.from_pretrained("ltg/norbert3-xs")
model = AutoModelForMaskedLM.from_pretrained("ltg/norbert3-xs", trust_remote_code=True)

mask_id = tokenizer.convert_tokens_to_ids("[MASK]")
input_text = tokenizer("Nå ønsker de seg en[MASK] bolig.", return_tensors="pt")
output_p = model(**input_text)
output_text = torch.where(input_text.input_ids == mask_id, output_p.logits.argmax(-1), input_text.input_ids)

# should output: '[CLS] Nå ønsker de seg en ny bolig.[SEP]'
print(tokenizer.decode(output_text[0].tolist()))

[CLS] Nå ønsker de seg en ny bolig.[SEP]


In [14]:
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

tokenizer = AutoTokenizer.from_pretrained("ltg/norbert3-xs")
model = AutoModelForQuestionAnswering.from_pretrained("ltg/norbert3-xs", trust_remote_code=True)

print(model)

# Konteksttekst og spørsmål
context = "Nå ønsker de seg en moderne bolig. Den bør være romslig og godt isolert."
question = "Hva slags bolig ønsker de seg?"

# Kode for å behandle og kjøre modellen
input_dict = tokenizer.encode_plus(question, context, return_tensors='pt')
input_ids = input_dict["input_ids"].tolist()[0]

outputs = model(**input_dict)
answer_start_scores, answer_end_scores = outputs.start_logits, outputs.end_logits

# Finner start- og sluttposisjonen for svaret i kontekstteksten
answer_start = torch.argmax(answer_start_scores)
answer_end = torch.argmax(answer_end_scores) + 1

# Konverterer ID-ene tilbake til tekst
# answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids))

print(answer)


[CLS] Hva slags bolig ønsker de seg?[SEP] Nå ønsker de seg en moderne bolig. Den bør være romslig og godt isolert.[SEP]


In [41]:
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

tokenizer = AutoTokenizer.from_pretrained("ltg/norbert3-large")
model = AutoModelForQuestionAnswering.from_pretrained("ltg/norbert3-large", trust_remote_code=True)

In [45]:


# Konteksttekst og spørsmål
# context = "Værest er bra, og det er sol"
# question = "Hvordan er været?"
context = """
Stig er sjef i Aboveit. Han er en veldig flink sjef,  noen ganger ler han av vitser.
"""
question = "Hei, hvem er sjeg i aboveit?"


# Kode for å behandle og kjøre modellen
input_dict = tokenizer.encode_plus(question, context, return_tensors='pt')
input_ids = input_dict["input_ids"].tolist()[0]

outputs = model(**input_dict)
answer_start_scores, answer_end_scores = outputs.start_logits, outputs.end_logits

# Finner start- og sluttposisjonen for svaret i kontekstteksten
answer_start = torch.argmax(answer_start_scores)
answer_end = torch.argmax(answer_end_scores) + 1

# Konverterer ID-ene tilbake til tekst for bare svaret
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids))
# answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))

print(answer)



[CLS] Hei, hvem er sjeg i aboveit?[SEP] [UNK]Stig er sjef i Aboveit. Han er en veldig flink sjef,  noen ganger ler han av vitser.[UNK][SEP]


In [26]:

# Kode for å behandle og kjøre modellen
input_dict = tokenizer.encode_plus(question, context, return_tensors='pt')
input_ids = input_dict["input_ids"].tolist()[0]

outputs = model(**input_dict)
answer_start_scores, answer_end_scores = outputs.start_logits, outputs.end_logits

# Skriver ut de 5 beste start- og sluttscorene
print("Top 5 start scores:", torch.topk(answer_start_scores, 5))
print("Top 5 end scores:", torch.topk(answer_end_scores, 5))

# Finner start- og sluttposisjonen for svaret i kontekstteksten
answer_start = torch.argmax(answer_start_scores)
answer_end = torch.argmax(answer_end_scores) + 1

# Konverterer ID-ene tilbake til tekst for bare svaret
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))

print("Svar:", answer)


Top 5 start scores: torch.return_types.topk(
values=tensor([[0., 0., 0., 0., 0.]], grad_fn=<TopkBackward0>),
indices=tensor([[0, 1, 2, 3, 4]]))
Top 5 end scores: torch.return_types.topk(
values=tensor([[0., 0., 0., 0., 0.]], grad_fn=<TopkBackward0>),
indices=tensor([[0, 1, 2, 3, 4]]))
Svar: [CLS]


In [21]:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
from transformers import AlbertTokenizer, AlbertForQuestionAnswering

# Load the pre-trained model and tokenizer
# model = AutoTokenizer.from_pretrained('WizardLM/WizardLM-70B-V1.0')
# tokenizer = AutoModelForQuestionAnswering.from_pretrained('WizardLM/WizardLM-70B-V1.0')

# model = AutoTokenizer.from_pretrained('WizardLM/WizardCoder-Python-34B-V1.0')
# tokenizer = AutoModelForQuestionAnswering.from_pretrained('WizardLM/WizardCoder-Python-34B-V1.0')
# https://huggingface.co/WizardLM/WizardLM-70B-V1.0

# model_name = "albert-large-v2"  # You can replace this with 'albert-large-v2', 'albert-xlarge-v2', etc.
# model_name = "WizardLM/WizardLM-70B-V1.0"  

# tokenizer = AlbertTokenizer.from_pretrained(model_name)
# model = AlbertForQuestionAnswering.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
tokenizer = AutoTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')


# tokenizer = AutoTokenizer.from_pretrained("ltg/norbert3-large")
# model = AutoModelForQuestionAnswering.from_pretrained("ltg/norbert3-large", trust_remote_code=True)
# Display the model's architecture
print(model)

Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BertForQuestionAnswering(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 1024, padding_idx=0)
      (position_embeddings): Embedding(512, 1024)
      (token_type_embeddings): Embedding(2, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-23): 24 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (LayerNorm): LayerNorm((1024,), ep

In [18]:
def get_answer(text, question):
    # Tokenize the input text and question
    inputs = tokenizer(question, text, return_tensors='pt', max_length=512, truncation=True)
    outputs = model(**inputs)
    # Get the start and end scores for the answer
    answer_start = torch.argmax(outputs.start_logits)
    answer_end = torch.argmax(outputs.end_logits) + 1
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][0][answer_start:answer_end]))
    return answer

In [19]:
# text = """
# The Eiffel Tower, located in Paris, France, is one of the most iconic landmarks in the world. It was designed by Gustave Eiffel and completed in 1889. The tower stands at a height of 324 meters and was the tallest man-made structure in the world at the time of its completion.
# """
context = """
Stig is the boss at Aboveit. He is a very capable boss, and sometimes he laughs at jokes. Always use a capital letter at the beginning when referring to people..
"""
question = "Hello, who is the boss at Aboveit?"

# Get the answer to the question
answer = get_answer(context, question)
print(f"The answer to the question is: {answer}")
# Output: The answer to the question is: Gustave Eiffel

The answer to the question is: stig


In [25]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model_name_or_path = "mistralai/Mixtral-8x7B-v0.1"
# To use a different branch, change revision
# For example: revision="gptq-4bit-128g-actorder_True"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                            #  device_map="auto",
                                             trust_remote_code=False,
                                             revision="main")

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

prompt = "Write a story about llamas"
system_message = "You are a story writing assistant"
prompt_template=f'''{prompt}
'''

print("\n\n*** Generate:")

input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
print(tokenizer.decode(output[0]))

# Inference can also be done using transformers' pipeline

print("*** Pipeline:")
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1
)

print(pipe(prompt_template)[0]['generated_text'])


config.json: 100%|██████████| 720/720 [00:00<00:00, 8.30MB/s]
model.safetensors.index.json: 100%|██████████| 92.7k/92.7k [00:00<00:00, 862kB/s]
model-00001-of-00019.safetensors: 100%|██████████| 4.89G/4.89G [05:10<00:00, 15.7MB/s]
model-00002-of-00019.safetensors: 100%|██████████| 4.98G/4.98G [04:30<00:00, 18.4MB/s]
model-00003-of-00019.safetensors: 100%|██████████| 4.98G/4.98G [04:59<00:00, 16.6MB/s]
model-00004-of-00019.safetensors: 100%|██████████| 4.90G/4.90G [07:43<00:00, 10.6MB/s]
model-00005-of-00019.safetensors: 100%|██████████| 4.98G/4.98G [05:29<00:00, 15.1MB/s]
model-00006-of-00019.safetensors: 100%|██████████| 4.98G/4.98G [07:02<00:00, 11.8MB/s]
model-00007-of-00019.safetensors: 100%|██████████| 4.90G/4.90G [06:31<00:00, 12.5MB/s]
model-00008-of-00019.safetensors: 100%|██████████| 4.98G/4.98G [05:53<00:00, 14.1MB/s]
Downloading shards:  42%|████▏     | 8/19 [47:23<1:08:08, 371.69s/it]

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model_name_or_path = "mistralai/Mixtral-8x7B-v0.1"
# To use a different branch, change revision
# For example: revision="gptq-4bit-128g-actorder_True"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                            #  device_map="auto",
                                             trust_remote_code=False,
                                             revision="main")

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

prompt = "Write a story about llamas"
system_message = "You are a story writing assistant"
prompt_template=f'''{prompt}
'''

print("\n\n*** Generate:")

input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
print(tokenizer.decode(output[0]))

# Inference can also be done using transformers' pipeline

print("*** Pipeline:")
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1
)

print(pipe(prompt_template)[0]['generated_text'])


config.json: 100%|██████████| 720/720 [00:00<00:00, 8.30MB/s]
model.safetensors.index.json: 100%|██████████| 92.7k/92.7k [00:00<00:00, 862kB/s]
model-00001-of-00019.safetensors: 100%|██████████| 4.89G/4.89G [05:10<00:00, 15.7MB/s]
model-00002-of-00019.safetensors: 100%|██████████| 4.98G/4.98G [04:30<00:00, 18.4MB/s]
model-00003-of-00019.safetensors: 100%|██████████| 4.98G/4.98G [04:59<00:00, 16.6MB/s]
model-00004-of-00019.safetensors: 100%|██████████| 4.90G/4.90G [07:43<00:00, 10.6MB/s]
model-00005-of-00019.safetensors: 100%|██████████| 4.98G/4.98G [05:29<00:00, 15.1MB/s]
model-00006-of-00019.safetensors: 100%|██████████| 4.98G/4.98G [07:02<00:00, 11.8MB/s]
model-00007-of-00019.safetensors: 100%|██████████| 4.90G/4.90G [06:31<00:00, 12.5MB/s]
model-00008-of-00019.safetensors: 100%|██████████| 4.98G/4.98G [05:53<00:00, 14.1MB/s]
Downloading shards:  42%|████▏     | 8/19 [47:23<1:08:08, 371.69s/it]