In [1]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig
from trl import SFTTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
compute_dtype = getattr(torch, "bfloat16")

In [3]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=True,
)

In [4]:
model_dir = r"meta-llama/Llama-2-7b-chat-hf"
model = AutoModelForCausalLM.from_pretrained(
    model_dir,
    quantization_config=quant_config,
    device_map="auto"
)
model.config.use_cache = False
model.config.pretraining_tp = 1

bin c:\Users\pc\anaconda3\envs\chatbot\lib\site-packages\bitsandbytes\libbitsandbytes_cuda118.dll


Loading checkpoint shards: 100%|██████████| 2/2 [09:17<00:00, 278.55s/it]


In [6]:
model_dir = r"meta-llama/Llama-2-7b-chat-hf"
tokenizer =AutoTokenizer.from_pretrained(model_dir)

tokenizer_config.json: 100%|██████████| 1.62k/1.62k [00:00<?, ?B/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
tokenizer.model: 100%|██████████| 500k/500k [00:00<00:00, 35.9MB/s]
tokenizer.json: 100%|██████████| 1.84M/1.84M [00:01<00:00, 1.47MB/s]
special_tokens_map.json: 100%|██████████| 414/414 [00:00<?, ?B/s] 


In [7]:
pipeline = pipeline(
"text-generation",

model=model,

tokenizer=tokenizer,

torch_dtype=torch.bfloat16,

device_map="auto",

)

In [151]:
prompt_template = "<s>[INST] <<SYS>>\n \
                    Is the 'User' greeting you? Answer in a single word with 'Yes' or 'No' \n \n \
                    <</SYS>>\n\n \
                    User: {user_prompt} [/INST]"

In [163]:
prompt_template = "<s>[INST] <<SYS>>\n \
                    Is the 'User' prompt job-related? \n \
                    If yes, answer 'Yes' \n \
                    If no, answer with this sentence 'Sorry, I am only a job recommendation system' \n \n \
                    <</SYS>>\n\n \
                    User: {user_prompt} [/INST]"

In [181]:
prompt_template = "<s>[INST] <<SYS>>\n \
                    You are an honest job recommendation assistant. \n  \
                    If you don't know the answer, please don't share false information. \n \
                    \n \n \
                    <</SYS>>\n\n \
                    Given the following job options:\n \
                    - Receptionist\n \
                    - Data Scientist\n \
                    - Cloud engineer\n \
                    - Full Stack Developer\n \
                    - Backend Developer\n \
                    - Python Programmer\n \
                    - Financial Analyst\n \
                    - Junior Accountant\n \
                    - Credit Analyst\n \
                    - Risk Manager\n \
                    Match and list three best options in a single word without explanation, considering the following job description \n \
                    {user_prompt} \n [/INST]"

In [35]:
prompt_template.format(user_prompt="I really hate the movie, not gonna watch it again")

"<s>[INST] <<SYS>>\n                     Given the following options:\n                     - Positive\n                     - Negative\n                     - Neutral\n                     Perform sentiment analysis and select the best option, considering the following 'User' scenario:\n \n                     <</SYS>>\n\n                     User: I really hate the movie, not gonna watch it again [/INST]"

In [24]:
sequences = pipeline(

prompt_template.format(user_prompt="I really hate the movie, not gonna watch it again"),

do_sample=True,

top_k=10,

num_return_sequences=1,
return_full_text=False,
eos_token_id=tokenizer.eos_token_id,

max_length=100,

)

for seq in sequences:
    print(f"{seq['generated_text']}")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


  Sure! Based on the provided text, the overall sentiment of the user towards the movie


In [182]:
sequences = pipeline(

prompt_template.format(user_prompt="I assist in financial record-keeping, prepare invoices and budgets."),

do_sample=True,
top_k=50,
num_return_sequences=1,
max_new_tokens=50,
return_full_text=False,
temperature=.9,
top_p=0.95,
pad_token_id=tokenizer.eos_token_id
)

for seq in sequences:
    print(f"{seq['generated_text']}")

  Sure, based on the job description you provided, here are three best job options for you:

1. Accountant
2. Financial Analyst
3. Risk Manager

All of these options align with the tasks


In [79]:
sequences

[{'generated_text': '  Great, based on your preference for using Python, I would recommend the following three job options:\n\n1. Python Programmer - This job'},
 {'generated_text': '  Great! Based on your preference for using Python, the best job options for you are:\n\n1. Python Programmer\n2. Data'}]