In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch

In [2]:
from transformers.pipelines import pipeline

In [3]:
pretrained_model = "mistralai/Mistral-7B-Instruct-v0.1"
adapter_path = "mlops-ft"

In [4]:
tokenizer = AutoTokenizer.from_pretrained(
    pretrained_model_name_or_path=pretrained_model)
tokenizer.pad_token = tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/1.47k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [5]:
base_model = AutoModelForCausalLM.from_pretrained(
    pretrained_model,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
)

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/5.06G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [6]:
model = PeftModel.from_pretrained(base_model, adapter_path)
model = model.merge_and_unload()

In [7]:
save_to = 'mlops-ft-merged'
model.save_pretrained(save_to, safe_serialization=True, max_shard_size='4GB')
tokenizer.save_pretrained(save_to)

('mlops-ft-merged/tokenizer_config.json',
 'mlops-ft-merged/special_tokens_map.json',
 'mlops-ft-merged/tokenizer.model',
 'mlops-ft-merged/added_tokens.json',
 'mlops-ft-merged/tokenizer.json')

In [8]:
llm_pipeline = pipeline(
    model=model,  # Specify the model for text generation
    tokenizer=tokenizer,  # Specify the tokenizer for text processing
    task='text-generation',  # Define the task as text generation
    max_new_tokens=1024,  # Limit the maximum number of generated tokens to 256
    temperature=0.1,  # Set the temperature parameter for text generation
    top_k=10,  # Set the top-k parameter for text generation
    do_sample=True,  # Allow sampling from the model's output
    num_return_sequences=1,  # Return a single generated sequence
    eos_token_id=tokenizer.eos_token_id,  # Specify the end-of-sequence token ID
    pad_token_id=tokenizer.pad_token_id  # Specify the padding token ID
)

In [9]:

def get_prompt(question):
    prompt = f"""[INST]Act as a Multiple Answer Spans Healthcare Question Answering helpful assistant and answer the user's questions in details with reasoning. Do not give any false information. In case you don't have answer, specify why the question can't be answered.
    
### Question:
{question}

### Answer:
"""
    return prompt


In [11]:
result = llm_pipeline(get_prompt(question='What types of exercise are best for people with asthma?'),return_full_text=False)
for generated in result:
    print(generated['generated_text'])

Aerobic exercise, such as swimming, cycling, or jogging, can help you get your heart pumping and your lungs working harder. That can make asthma symptoms worse. But if you take steps to control your asthma, you can still exercise. You just need to know what to do. Here are some tips: Take your asthma medicines as prescribed. Use an inhaler or nebulizer as directed. Take a few deep breaths before you start to exercise. If you have a peak flow meter, use it to check your lung function before and after exercise. If your symptoms get worse, stop exercising and take your asthma medicines. If you have asthma, you don't have to sit on the sidelines. You can still be active and enjoy all the benefits of exercise. Just be smart about it.

### Reasoning:
Asthma can make it hard to breathe, especially when you're exercising. But with the right plan, you can still get your heart pumping and your lungs working harder. Here are some tips to help you exercise with asthma: Take your asthma medicines a