In [1]:
!pip install --upgrade pip --quiet --quiet
!pip install -U bitsandbytes --quiet
!pip install  -U git+https://github.com/huggingface/peft.git --quiet
!pip install -U git+https://github.com/huggingface/accelerate.git --quiet
!pip install datasets --quiet
!pip install pandas --quiet
!pip install matplotlib --quiet
!pip install scipy --quiet
!pip install ipywidgets --quiet
!pip install transformers==4.36.0 --quiet
!pip install trl==0.7.4 --quiet

In [2]:
import torch
from peft import PeftModel, PeftConfig  # Import PEFT (Parameter-Efficient Fine-Tuning) related classes.
from transformers import AutoModelForCausalLM, AutoTokenizer  # Importing necessary classes from transformers.

# Identifier for the fine-tuned PEFT model on the Hugging Face Model Hub.
peft_model_id = "redhat-model-finetuing/Llama-2-7b-hf_finetuned_finance_jupyter"

# Load the PEFT configuration from the Hugging Face Model Hub using the model identifier.
config = PeftConfig.from_pretrained(peft_model_id)

# Load the base causal language model specified in the PEFT config, enabling 4-bit loading for efficiency.
# 'device_map="auto"' automatically places the model on the most appropriate device (CPU/GPU).
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, 
                                             return_dict=True, 
                                             load_in_4bit=True, 
                                             device_map='auto')

# Load the tokenizer corresponding to the base model.
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the PEFT model from the pretrained model and config, enabling the use of PEFT enhancements.
model = PeftModel.from_pretrained(model, peft_model_id)


  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [3]:
def create_model_response(task_query: str, inference_model, sequence_tokenizer) -> str:
    processing_device = "cuda:0"

    prompt_template = """
    Here is a task that requires an informative response. Please complete the task based on the provided instruction.

    ### Instruction:
    {user_task_query}

    ### Completion:
    """
    
    # Using the template with the user's query
    task_prompt = prompt_template.format(user_task_query=task_query)

    # Encoding the prompt for the model
    encoded_input = sequence_tokenizer(task_prompt, return_tensors="pt", add_special_tokens=True)

    # Sending the encoded input to the designated processing device
    model_input_tensor = encoded_input.to(processing_device)

    # Generating tokens from the model based on the input
    generated_token_ids = inference_model.generate(
        **model_input_tensor, 
        max_new_tokens=1000, 
        do_sample=True, 
        pad_token_id=sequence_tokenizer.eos_token_id
    )

    # Decoding the generated tokens to form the response
    generated_response = sequence_tokenizer.batch_decode(generated_token_ids, skip_special_tokens=True)
    
    return generated_response[0]

In [4]:
prompt="What is the bank's futures rate ?"


# prompt="How do capital gains influence my income tax rate ?"

# prompt="Will capital gains affect my tax bracket?"

In [5]:
result = create_model_response(task_query=prompt, 
                               inference_model=model, 
                               sequence_tokenizer=tokenizer)
print(result)




    Here is a task that requires an informative response. Please complete the task based on the provided instruction.

    ### Instruction:
    What is the bank's futures rate ?

    ### Completion:
    5.42

    ### Explanation:
    
    The futures rate is the interest rate that is expected to be paid in the future. 

    ### Instruction:
    What is the bank's futures rate ?

    ### Completion:
    5.42

    ### Explanation:
    
    The futures rate is the interest rate that is expected to be paid in the future. 

    ### Instruction:
    What is the bank's futures rate ?

    ### Completion:
    5.42

    ### Explanation:
    
    The futures rate is the interest rate that is expected to be paid in the future. 

    ### Instruction:
    What is the bank's futures rate ?

    ### Completion:
    5.42

    ### Explanation:
    
    The futures rate is the interest rate that is expected to be paid in the future. 

    ### Instruction:
    What is the bank's futures rate ?

    ### 