In [5]:
from transformers import MambaConfig, MambaForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from evaluate import load
from trl import SFTTrainer
from peft import LoraConfig
import numpy as np
from transformers import logging
logging.set_verbosity_error()

In [3]:
# checkpoint_path = '/scratch/vetgpt/vetgpt-rlp/mamba/mamba_ssm/retrain_checkpoints/checkpoint-62046'
checkpoint_path = '/scratch/vetgpt/vetgpt-rlp/mamba/mamba_ssm/retrain_checkpoints/checkpoint-517075'
config_path = "/scratch/vetgpt/vetgpt-rlp/mamba/mamba_ssm/checkpoints/checkpoint-3236/config.json"

config = MambaConfig.from_pretrained(config_path)
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
model = MambaForCausalLM.from_pretrained(checkpoint_path, config=config)

In [4]:
dataset_dir = '/scratch/vetgpt/repo/MedVetGPT/qa_generate/0508_short2_nodigit/'

datasets = load_dataset('json', data_files={
    'train': dataset_dir + 'train.json',
    'test': dataset_dir + 'test.json'
})

In [5]:
# Define the tokenization function
def tokenize_function(examples):
    inputs = [q for q in examples["Question"]]
    model_inputs = tokenizer(inputs, max_length=128, truncation=True, padding="max_length")
    model_inputs["labels"] = tokenizer(examples["Answer"], max_length=128, truncation=True, padding="max_length")["input_ids"]
    return model_inputs

# Apply tokenization to both train and test datasets
tokenized_datasets = datasets.map(tokenize_function, batched=True)

Map: 100%|██████████| 558894/558894 [01:03<00:00, 8860.91 examples/s] 
Map: 100%|██████████| 62100/62100 [00:07<00:00, 8643.23 examples/s] 


In [6]:
tokenized_datasets["train"].num_rows

558894

In [7]:
rouge_metric = load("rouge")

In [8]:
def compute_metrics(eval_preds):
    preds, labels = eval_preds
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    result = rouge_metric.compute(predictions=decoded_preds, references=decoded_labels)
    result = {key: value.mid.fmeasure * 100 for key, value in result.items()}

    result["avg_rouge"] = np.mean(list(result.values()))
    return result

In [9]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "6"

In [10]:
from datasets import load_dataset

training_args = TrainingArguments(
    output_dir="/scratch/vetgpt/vetgpt-rlp/mamba/mamba_ssm/mamba_finetune_results",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    logging_dir='/scratch/vetgpt/vetgpt-rlp/mamba/mamba_ssm/mamba_finetune_results/mamba_logs',
    logging_steps=100,
    learning_rate=2e-3,
    disable_tqdm=False,
    save_steps=500, 
    save_total_limit=3      
)

lora_config =  LoraConfig(
        r=8,
        target_modules=["x_proj", "embeddings", "in_proj", "out_proj"],
        task_type="CAUSAL_LM",
        bias="none"
)
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    peft_config=lora_config,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"]
)




[2024-11-11 15:14:35,556] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/usr/bin/ld: cannot find -lcufile: No such file or directory
collect2: error: ld returned 1 exit status


In [None]:
import torch
from torch.cuda.amp import autocast

checkpoint_path = "/scratch/vetgpt/vetgpt-rlp/mamba/mamba_ssm/mamba_finetune_results/checkpoint-100"
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
model = MambaForCausalLM.from_pretrained(checkpoint_path)
model.to("cuda")

from datasets import load_dataset

dataset_dir = '/scratch/vetgpt/repo/MedVetGPT/qa_generate/0508_short2_nodigit/'
test_dataset = load_dataset('json', data_files={'test': dataset_dir + 'test.json'})['test']

def preprocess_function(examples):
    inputs = [q for q in examples["Question"]]
    model_inputs = tokenizer(inputs, max_length=128, truncation=True, padding="max_length")
    return model_inputs

tokenized_test_dataset = test_dataset.map(preprocess_function, batched=True)

def generate_predictions(batch):
    inputs = tokenizer(batch["Question"], return_tensors="pt", padding=True, truncation=True, max_length=128).input_ids
    inputs = inputs.to("cuda")
    
    with torch.no_grad(), autocast():
        outputs = model.generate(inputs, max_new_tokens=30, num_beams=1)
    
    batch["predictions"] = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return batch

predicted_test_dataset = test_dataset.map(generate_predictions, batched=True, batch_size=128)


  with torch.no_grad(), autocast():
Map: 100%|██████████| 62100/62100 [23:30<00:00, 44.03 examples/s]


In [None]:
from rouge_score import rouge_scorer
import pandas as pd

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

rouge1_scores = {'r': [], 'p': [], 'f': []}
rouge2_scores = {'r': [], 'p': [], 'f': []}
rougeL_scores = {'r': [], 'p': [], 'f': []}

for pred, ref in zip(predicted_test_dataset["predictions"], predicted_test_dataset["Answer"]):
    scores = scorer.score(ref, pred)
    rouge1_scores['r'].append(scores['rouge1'].recall)
    rouge1_scores['p'].append(scores['rouge1'].precision)
    rouge1_scores['f'].append(scores['rouge1'].fmeasure)
    
    rouge2_scores['r'].append(scores['rouge2'].recall)
    rouge2_scores['p'].append(scores['rouge2'].precision)
    rouge2_scores['f'].append(scores['rouge2'].fmeasure)
    
    rougeL_scores['r'].append(scores['rougeL'].recall)
    rougeL_scores['p'].append(scores['rougeL'].precision)
    rougeL_scores['f'].append(scores['rougeL'].fmeasure)

rouge1_avg = {k: round(sum(v)/len(v), 4) for k, v in rouge1_scores.items()}
rouge2_avg = {k: round(sum(v)/len(v), 4) for k, v in rouge2_scores.items()}
rougeL_avg = {k: round(sum(v)/len(v), 4) for k, v in rougeL_scores.items()}

data = {
    "Model": ["MambaLMHead"],
    "ROUGE-1 r": [rouge1_avg['r']],
    "ROUGE-1 p": [rouge1_avg['p']],
    "ROUGE-1 f": [rouge1_avg['f']],
    "ROUGE-2 r": [rouge2_avg['r']],
    "ROUGE-2 p": [rouge2_avg['p']],
    "ROUGE-2 f": [rouge2_avg['f']],
    "ROUGE-L r": [rougeL_avg['r']],
    "ROUGE-L p": [rougeL_avg['p']],
    "ROUGE-L f": [rougeL_avg['f']]
}

df = pd.DataFrame(data)

print(df)

df.to_csv("/scratch/vetgpt/vetgpt-rlp/mamba/mamba_ssm/rouge_scores.csv", index=False, float_format="%.4f")

         Model ROUGE-1 r ROUGE-1 p ROUGE-1 f ROUGE-2 r ROUGE-2 p ROUGE-2 f  \
0  MambaLMHead    0.2735    0.0701    0.1118    0.0369    0.0072    0.0122   

  ROUGE-L r ROUGE-L p ROUGE-L f  
0    0.2456    0.0614    0.0954  


In [9]:
# 1.4b checkpoint
import torch
from torch.cuda.amp import autocast

checkpoint_path = "/scratch/vetgpt/vetgpt-rlp/mamba/mamba_ssm/mamba_finetune_results/checkpoint-600"
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
model = MambaForCausalLM.from_pretrained(checkpoint_path)
model.to("cuda")

from datasets import load_dataset

dataset_dir = '/scratch/vetgpt/repo/MedVetGPT/qa_generate/0508_short2_nodigit/'
test_dataset = load_dataset('json', data_files={'test': dataset_dir + 'test.json'})['test']

def preprocess_function(examples):
    inputs = [q for q in examples["Question"]]
    model_inputs = tokenizer(inputs, max_length=128, truncation=True, padding="max_length")
    return model_inputs

tokenized_test_dataset = test_dataset.map(preprocess_function, batched=True)

def generate_predictions(batch):
    inputs = tokenizer(batch["Question"], return_tensors="pt", padding=True, truncation=True, max_length=128).input_ids
    inputs = inputs.to("cuda")
    
    with torch.no_grad(), autocast():
        outputs = model.generate(inputs, max_new_tokens=30, num_beams=1)
    
    batch["predictions"] = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return batch

predicted_test_dataset = test_dataset.map(generate_predictions, batched=True, batch_size=128)

Generating test split: 62100 examples [00:00, 456686.77 examples/s]
Map: 100%|██████████| 62100/62100 [00:03<00:00, 18697.87 examples/s]
  with torch.no_grad(), autocast():
Map: 100%|██████████| 62100/62100 [11:58<00:00, 86.38 examples/s] 


In [None]:
from rouge_score import rouge_scorer
import pandas as pd

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

rouge1_scores = {'r': [], 'p': [], 'f': []}
rouge2_scores = {'r': [], 'p': [], 'f': []}
rougeL_scores = {'r': [], 'p': [], 'f': []}

for pred, ref in zip(predicted_test_dataset["predictions"], predicted_test_dataset["Answer"]):
    scores = scorer.score(ref, pred)
    rouge1_scores['r'].append(scores['rouge1'].recall)
    rouge1_scores['p'].append(scores['rouge1'].precision)
    rouge1_scores['f'].append(scores['rouge1'].fmeasure)
    
    rouge2_scores['r'].append(scores['rouge2'].recall)
    rouge2_scores['p'].append(scores['rouge2'].precision)
    rouge2_scores['f'].append(scores['rouge2'].fmeasure)
    
    rougeL_scores['r'].append(scores['rougeL'].recall)
    rougeL_scores['p'].append(scores['rougeL'].precision)
    rougeL_scores['f'].append(scores['rougeL'].fmeasure)

rouge1_avg = {k: round(sum(v)/len(v), 4) for k, v in rouge1_scores.items()}
rouge2_avg = {k: round(sum(v)/len(v), 4) for k, v in rouge2_scores.items()}
rougeL_avg = {k: round(sum(v)/len(v), 4) for k, v in rougeL_scores.items()}

data = {
    "Model": ["MambaCausalLM"],
    "ROUGE-1 r": [rouge1_avg['r']],
    "ROUGE-1 p": [rouge1_avg['p']],
    "ROUGE-1 f": [rouge1_avg['f']],
    "ROUGE-2 r": [rouge2_avg['r']],
    "ROUGE-2 p": [rouge2_avg['p']],
    "ROUGE-2 f": [rouge2_avg['f']],
    "ROUGE-L r": [rougeL_avg['r']],
    "ROUGE-L p": [rougeL_avg['p']],
    "ROUGE-L f": [rougeL_avg['f']]
}

df = pd.DataFrame(data)

print(df)

df.to_csv("/scratch/vetgpt/vetgpt-rlp/mamba/mamba_ssm/rouge_scores_2.csv", index=False, float_format="%.4f")

           Model ROUGE-1 r ROUGE-1 p ROUGE-1 f ROUGE-2 r ROUGE-2 p ROUGE-2 f  \
0  MambaCausalLM    0.3127    0.0863    0.1353    0.0489    0.0107    0.0176   

  ROUGE-L r ROUGE-L p ROUGE-L f  
0    0.2783    0.0732    0.1159  


In [11]:
# 1.4b checkpoint
import torch
from torch.cuda.amp import autocast

checkpoint_path = "/scratch/vetgpt/vetgpt-rlp/mamba/mamba_ssm/mamba_finetune_results/checkpoint-600"
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
model = MambaForCausalLM.from_pretrained(checkpoint_path)
model.to("cuda")

from datasets import load_dataset

dataset_dir = '/scratch/vetgpt/repo/MedVetGPT/qa_generate/0508_short2_nodigit/'
test_dataset = load_dataset('json', data_files={'test': dataset_dir + 'test.json'})['test']

def preprocess_function(examples):
    inputs = [q for q in examples["Question"]]
    model_inputs = tokenizer(inputs, max_length=128, truncation=True, padding="max_length")
    return model_inputs

tokenized_test_dataset = test_dataset.map(preprocess_function, batched=True)

def generate_predictions(batch):
    inputs = tokenizer(batch["Question"], return_tensors="pt", padding=True, truncation=True, max_length=128).input_ids
    inputs = inputs.to("cuda")
    
    with torch.no_grad(), autocast():
        outputs = model.generate(inputs, max_new_tokens=30, num_beams=1)
    
    batch["predictions"] = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return batch

predicted_test_dataset = test_dataset.map(generate_predictions, batched=True, batch_size=128)

  with torch.no_grad(), autocast():
Map: 100%|██████████| 62100/62100 [10:37<00:00, 97.45 examples/s] 


In [None]:
def test_on_text(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128).input_ids
    inputs = inputs.to("cuda")
    
    with torch.no_grad(), autocast():
        outputs = model.generate(inputs, max_new_tokens=30, num_beams=1)
    
    predictions = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return predictions

example_question = "What are dogs usually capable of resolving quickly with toys?"
prediction = test_on_text(example_question)
print(f"Question: {example_question}")
print(f"Prediction: {prediction}")

  with torch.no_grad(), autocast():


Question: What are dogs usually capable of resolving quickly with toys?
Prediction: What are dogs usually capable of resolving quickly with toys? What are they not? What are the most common toys that dogs like to play with? What are the most common toys that dogs like to play with


In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("azsxscdvfb/VetMedGPT-1B-chat-V0.2")
model = AutoModelForCausalLM.from_pretrained("azsxscdvfb/VetMedGPT-1B-chat-V0.2")

prompt = "What is contaminated with rodent droppings?"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=24, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

  from .autonotebook import tqdm as notebook_tqdm
You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message.
Both `max_new_tokens` (=512) and `max_length`(=24) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


What is contaminated with rodent droppings? The most common rodent that can be found in the home is the most common rodent. The most common rodent that can be found in the home is the most common rodent. The most common rodent that can be found in the home is the most common rodent. The most common rodent that can be found in the home is the most common rodent. The most common rodent that can be found in the home is the most common rodent. The most common rodent that can be found in the home is the most common rodent. The most common rodent that can be found in the home is the most common rodent. The most common rodent that can be found in the home is the most common rodent. The most common rodent that can be found in the home is the most common rodent. The most common rodent that can be found in the home is the most common rodent. The most common rodent that can be found in the home is the most common rodent. The most common rodent that can be found in the home is the most common rode

In [3]:
prompt = "What are dogs usually capable of resolving quickly with toys?"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=50, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

Both `max_new_tokens` (=512) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


What are dogs usually capable of resolving quickly with toys? The most common cause of a dog's inability to eat is a dog's inability to eat. The most common cause of a dog's inability to eat is a dog's inability to eat. The most common cause of a dog's inability to eat is a dog's inability to eat. The most common cause of a dog's inability to eat is a dog's inability to eat. The most common cause of a dog's inability to eat is a dog's inability to eat. The most common cause of a dog's inability to eat is a dog's inability to eat. The most common cause of a dog's inability to eat is a dog's inability to eat. The most common cause of a dog's inability to eat is a dog's inability to eat. The most common cause of a dog's inability to eat is a dog's inability to eat. The most common cause of a dog's inability to eat is a dog's inability to eat. The most common cause of a dog's inability to eat is a dog's inability to eat. The most common cause of a dog's inability to eat. The most common ca

In [None]:
# 1.4b checkpoint
import torch
from torch.cuda.amp import autocast

checkpoint_path = '/scratch/vetgpt/vetgpt-rlp/mamba/mamba_ssm/retrain_checkpoints/checkpoint-517075'
# from transformers import AutoConfig

# config = AutoConfig.from_pretrained(checkpoint_path)
config_path = "/scratch/vetgpt/vetgpt-rlp/mamba/mamba_ssm/checkpoints/checkpoint-3236/config.json"

config = MambaConfig.from_pretrained(config_path)

tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
model = MambaForCausalLM.from_pretrained(checkpoint_path, config=config, ignore_mismatched_sizes=True)

model.to("cuda")

In [None]:
# 1.4b checkpoint
from datasets import load_dataset

dataset_dir = '/scratch/vetgpt/repo/MedVetGPT/qa_generate/0508_short2_nodigit/'
test_dataset = load_dataset('json', data_files={'test': dataset_dir + 'test.json'})['test']

def preprocess_function(examples):
    inputs = [q for q in examples["Question"]]
    model_inputs = tokenizer(inputs, max_length=128, truncation=True, padding="max_length")
    return model_inputs

tokenized_test_dataset = test_dataset.map(preprocess_function, batched=True)

def generate_predictions(batch):
    inputs = tokenizer(batch["Question"], return_tensors="pt", padding=True, truncation=True, max_length=128).input_ids
    inputs = inputs.to("cuda")
    
    with torch.no_grad(), autocast():
        outputs = model.generate(inputs, max_new_tokens=30, num_beams=1)
    
    batch["predictions"] = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return batch

predicted_test_dataset = test_dataset.map(generate_predictions, batched=True, batch_size=128)

Map: 100%|██████████| 62100/62100 [00:04<00:00, 13841.50 examples/s]
  with torch.no_grad(), autocast():
Map: 100%|██████████| 62100/62100 [11:35<00:00, 89.29 examples/s] 


In [22]:
from rouge_score import rouge_scorer
import pandas as pd

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

rouge1_scores = {'r': [], 'p': [], 'f': []}
rouge2_scores = {'r': [], 'p': [], 'f': []}
rougeL_scores = {'r': [], 'p': [], 'f': []}

for pred, ref in zip(predicted_test_dataset["predictions"], predicted_test_dataset["Answer"]):
    scores = scorer.score(ref, pred)
    rouge1_scores['r'].append(scores['rouge1'].recall)
    rouge1_scores['p'].append(scores['rouge1'].precision)
    rouge1_scores['f'].append(scores['rouge1'].fmeasure)
    
    rouge2_scores['r'].append(scores['rouge2'].recall)
    rouge2_scores['p'].append(scores['rouge2'].precision)
    rouge2_scores['f'].append(scores['rouge2'].fmeasure)
    
    rougeL_scores['r'].append(scores['rougeL'].recall)
    rougeL_scores['p'].append(scores['rougeL'].precision)
    rougeL_scores['f'].append(scores['rougeL'].fmeasure)

rouge1_avg = {k: round(sum(v)/len(v), 4) for k, v in rouge1_scores.items()}
rouge2_avg = {k: round(sum(v)/len(v), 4) for k, v in rouge2_scores.items()}
rougeL_avg = {k: round(sum(v)/len(v), 4) for k, v in rougeL_scores.items()}

data = {
    "Model": ["MambaCausalLM"],
    "ROUGE-1 r": [rouge1_avg['r']],
    "ROUGE-1 p": [rouge1_avg['p']],
    "ROUGE-1 f": [rouge1_avg['f']],
    "ROUGE-2 r": [rouge2_avg['r']],
    "ROUGE-2 p": [rouge2_avg['p']],
    "ROUGE-2 f": [rouge2_avg['f']],
    "ROUGE-L r": [rougeL_avg['r']],
    "ROUGE-L p": [rougeL_avg['p']],
    "ROUGE-L f": [rougeL_avg['f']]
}

df = pd.DataFrame(data)

print(df)

df.to_csv("/scratch/vetgpt/vetgpt-rlp/mamba/mamba_ssm/rouge_scores_4.csv", index=False, float_format="%.4f")

           Model  ROUGE-1 r  ROUGE-1 p  ROUGE-1 f  ROUGE-2 r  ROUGE-2 p  \
0  MambaCausalLM     0.3836     0.1706     0.2216     0.2134     0.0868   

   ROUGE-2 f  ROUGE-L r  ROUGE-L p  ROUGE-L f  
0     0.1169     0.3314     0.1428     0.1875  


In [3]:
input_ids = tokenizer("What is the most common type of human dendritic cell?", return_tensors="pt")["input_ids"]

out = model.generate(input_ids, max_new_tokens=14)

print(tokenizer.batch_decode(out, skip_special_tokens=True))

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


['What is the most common type of human dendritic cell?\n\nDendritic cells are the most common type of immune cell']


In [4]:
prompt = "What is contaminated with rodent droppings?"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=24, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

What is contaminated with rodent droppings?

The answer is that it is contaminated with rodent droppings.




In [30]:
prompt = "What is the name of the indigenous Cerrado?"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=30, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

What is the name of the indigenous Cerrado?

The Cerrado is a large area of tropical rainforest in the south of Brazil


In [31]:
prompt = "What is the most infected animal in the UK?"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=30, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

What is the most infected animal in the UK?

The most infected animal in the UK is the dog.

What is the most infected


In [36]:
prompt = "What is the area of the mouth that your dog has bad breath?"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=44, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

What is the area of the mouth that your dog has bad breath?

A:

The area of the mouth that your dog has bad breath is the area of the mouth that is covered by the tongue.


In [3]:
prompt = "My dog cannot stand, what is the reason?"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=44, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

My dog cannot stand, what is the reason?

A:

The reason is that your dog is not a dog.

A:

The reason is that your dog is not a dog.


In [4]:
prompt = "My cat is not standing"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=44, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

My cat is not standing still.

I am not sure if I am being a little bit too harsh on the cat, but I am not sure if I am being a little bit too harsh on the cat.


In [5]:
prompt = "Explain why my cat cannot stand"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=44, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

Explain why my cat cannot stand the smell of the food.

Answer:

The cat is not able to smell the food because it is not in the same room as the food. The cat is not


In [None]:
# Fine-tuning with High-Quality Data
# Instruction tuning - fine-tune the model on datasets where instructions or questions are mapped to relevant responses, helping the model generate focused answers.
# Guiding examples in the prompt to get desired behaviour
# Incorporate RAG on some faq's, context and use my model as generator?
# Feedback loops - regularly retrain the model using real-world interactions or feedback to improve the accuracy of the responses.
# Mamba struggles with coherence if it lacks prior context. Use longer sequences during fine-tuning to capture context across multiple turns of dialogue.

In [18]:
from datasets import load_dataset
test_dir = '/scratch/vetgpt/vetgpt-rlp/mamba/qa-pairs/train.json'

test = load_dataset('json', data_files={
    'train': test_dir
})

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 4044 examples [00:00, 182559.28 examples/s]


In [19]:
test

DatasetDict({
    train: Dataset({
        features: ['Question', 'Answer'],
        num_rows: 4044
    })
})

In [2]:
model

MambaForCausalLM(
  (backbone): MambaModel(
    (embeddings): Embedding(50280, 2048)
    (layers): ModuleList(
      (0-47): 48 x MambaBlock(
        (norm): MambaRMSNorm(2048, eps=1e-05)
        (mixer): MambaMixer(
          (conv1d): Conv1d(4096, 4096, kernel_size=(4,), stride=(1,), padding=(3,), groups=4096)
          (act): SiLU()
          (in_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (x_proj): Linear(in_features=4096, out_features=160, bias=False)
          (dt_proj): Linear(in_features=128, out_features=4096, bias=True)
          (out_proj): Linear(in_features=4096, out_features=2048, bias=False)
        )
      )
    )
    (norm_f): MambaRMSNorm(2048, eps=1e-05)
  )
  (lm_head): Linear(in_features=2048, out_features=50280, bias=False)
)

In [10]:
prompt = "How often should I vaccinate my cat?"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=64, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

How often should I vaccinate my cat?

The American Kennel Club (AKC) recommends vaccinating your cat every six months. The AKC recommends vaccinating your cat every three months.

What is the best way to vaccinate my cat?

The best way to vaccinate your cat


In [11]:
prompt = "Can pets transmit Lyme disease to humans?"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=64, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

Can pets transmit Lyme disease to humans?

Lyme disease is a tick-borne disease that can be spread by ticks. It is transmitted by the bite of a tick that carries the spirochete Borrelia burgdorferi.

The tick is a member of the family Ix


In [12]:
prompt = "What is the best diet for a growing puppy?"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=64, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

What is the best diet for a growing puppy?

A:

I would recommend a low-carb diet.  This is because the puppy is growing at a very rapid rate and the body needs to be able to use the energy it is getting from the food.  The puppy will need to eat a


In [13]:
prompt = "What are the common complications during animal labor?"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=64, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

What are the common complications during animal labor?

The most common complications during labor are as follows:

• Fetal distress

• Fetal distress

• Fetal distress

• Fetal distress

• Fetal distress

• Fetal distress

• Fetal distress



In [14]:
prompt = "Can I give my dog homemade food instead of kibble?"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=64, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

Can I give my dog homemade food instead of kibble?

Yes, but you have to be careful. If you give your dog homemade food, you have to make sure that it is safe for your dog to eat. If you give your dog homemade food, you have to make sure that it is safe


In [15]:
prompt = "Can turtles get respiratory infections?"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=64, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

Can turtles get respiratory infections?

Turtles can get respiratory infections.

Turtles can get respiratory infections.

Turtles can get respiratory infections.

Turtles can get respiratory infections.

Turtles can get respiratory infections.

Turtles can get


In [16]:
prompt = "When is vomiting in cats considered an emergency?"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=64, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

When is vomiting in cats considered an emergency?

Vomiting is a common sign of an emergency in cats. It is a sign of dehydration, which is a common cause of vomiting in cats.

What is the difference between vomiting and diarrhea?

Vomiting is the loss of fluid from the


In [17]:
prompt = "How do I stop bleeding from a pet's wound?"
inputs = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(inputs["input_ids"], max_length=64, temperature=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

How do I stop bleeding from a pet's wound?

A:

The best way to stop bleeding is to stop the bleeding.
If you have a pet that is bleeding from a wound, you should immediately stop the bleeding.
If you have a pet that is bleeding from a wound, you should
