In [2]:
! pip install torch==2.0.1 transformers datasets peft accelerate trl bitsandbytes optimum auto-gptq

Collecting torch==2.0.1
  Downloading torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl (619.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m619.9/619.9 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting datasets
  Downloading datasets-2.17.0-py3-none-any.whl (536 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m536.6/536.6 kB[0m [31m54.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft
  Downloading peft-0.8.2-py3-none-any.whl (183 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.4/183.4 kB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.26.1-py3-none-any.whl (270 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m270.9/270.9 kB[0m [31m35.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting trl
  Downloading trl-0.7.10-py3-none-any.whl (150 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.9/150.9 kB[0m [31m22.6 MB/s[0m eta 

In [1]:
import torch
from datasets import Dataset, load_dataset
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import AutoTokenizer, TrainingArguments, AutoModelForCausalLM, GPTQConfig
from trl import DPOTrainer

In [20]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [21]:
def dpo_data():

    dataset = load_dataset(
        "HuggingFaceH4/ultrafeedback_binarized",
        split = "test_prefs",
        use_auth_token=True
    )

    original_columns = dataset.column_names

    def return_prompt_and_responses(samples):
        return {
            "prompt": [prompt for prompt in samples["prompt"]],
            "chosen": samples["chosen"],
            "rejected": samples["rejected"],
        }

    return dataset.map(
        return_prompt_and_responses,
        batched=True,
        remove_columns=original_columns,
    )

In [22]:
tokenizer = AutoTokenizer.from_pretrained("TheBloke/OpenHermes-2-Mistral-7B-GPTQ")

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [23]:
model = AutoModelForCausalLM.from_pretrained("TheBloke/OpenHermes-2-Mistral-7B-GPTQ", torch_dtype=torch.float16, low_cpu_mem_usage=True, quantization_config=GPTQConfig(bits=4, disable_exllama=True))

model_ref = AutoModelForCausalLM.from_pretrained("TheBloke/OpenHermes-2-Mistral-7B-GPTQ", torch_dtype=torch.float16, low_cpu_mem_usage=True, quantization_config=GPTQConfig(bits=4, disable_exllama=True))

Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`.The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file.
You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. use_exllama, exllama_config, use_cuda_fp16, max_input_length) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.
Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`.The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file.
You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization

In [24]:
train_dataset = dpo_data()



In [25]:
train_dataset

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 2000
})

In [26]:
train_df = train_dataset.to_pandas()
train_df

Unnamed: 0,prompt,chosen,rejected
0,"In this task, you are given a second sentence....","[{'content': 'In this task, you are given a se...","[{'content': 'In this task, you are given a se..."
1,The floor of a rectangular room is 19 m long a...,[{'content': 'The floor of a rectangular room ...,[{'content': 'The floor of a rectangular room ...
2,"Definition: In this task, you are given an abs...","[{'content': 'Definition: In this task, you ar...","[{'content': 'Definition: In this task, you ar..."
3,Evaluate the extent to which web usability is ...,[{'content': 'Evaluate the extent to which web...,[{'content': 'Evaluate the extent to which web...
4,A text is given in Bengali. Translate it from ...,[{'content': 'A text is given in Bengali. Tran...,[{'content': 'A text is given in Bengali. Tran...
...,...,...,...
1995,can you give me an overview of my mri medical ...,[{'content': 'can you give me an overview of m...,[{'content': 'can you give me an overview of m...
1996,"QUESTION: Can we conclude from ""Two men hold b...","[{'content': 'QUESTION: Can we conclude from ""...","[{'content': 'QUESTION: Can we conclude from ""..."
1997,Construct lyrics in the style of The Proclaime...,[{'content': 'Construct lyrics in the style of...,[{'content': 'Construct lyrics in the style of...
1998,"Detailed Instructions: In this task, you will ...",[{'content': 'Detailed Instructions: In this t...,[{'content': 'Detailed Instructions: In this t...


In [27]:
train_df["chosen"] = train_df["chosen"].apply(lambda x: x[1]["content"])
train_df["rejected"] = train_df["rejected"].apply(lambda x: x[1]["content"])

In [28]:
train_df = train_df.dropna()

In [29]:
train_df


Unnamed: 0,prompt,chosen,rejected
0,"In this task, you are given a second sentence....",Could you provide some context or information ...,"As an AI assistant, it's essential to generate..."
1,The floor of a rectangular room is 19 m long a...,1. (B) The floor of a rectangular room is 19 m...,Stream of consciousness: James can order one f...
2,"Definition: In this task, you are given an abs...",True\n\nExplanation:\nThe abstract is structur...,True
3,Evaluate the extent to which web usability is ...,"As an AI assistant, I can certainly help you e...",Web usability is highly dependent on various d...
4,A text is given in Bengali. Translate it from ...,"Sure, I'd be happy to help! Here are the trans...","Answer:\n\n১० জুন, ২০১৭\n\n1. জে.এল. এক্সপ্রে�..."
...,...,...,...
1995,can you give me an overview of my mri medical ...,Greetings! I understand you'd like an overview...,"Hello! As a helpful and respectful assistant, ..."
1996,"QUESTION: Can we conclude from ""Two men hold b...","Sure, I'd be happy to help! Here are the answe...",Let's solve this gradually. The premise descri...
1997,Construct lyrics in the style of The Proclaime...,"Oh dear, the traffic's a blight,\nWith a slow ...","I'm sorry, but as an AI assistant, I don't hav..."
1998,"Detailed Instructions: In this task, you will ...","Parketten: B-ORG, milieucellen: MISC, gespecia...",In the given sentence 'Gespecialiseerde magist...


In [30]:
val_df = train_df.sample(10)

In [31]:
train_data = Dataset.from_pandas(train_df)
val_data = Dataset.from_pandas(val_df)

In [32]:
train_data

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 2000
})

In [33]:
model

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32002, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (rotary_emb): MistralRotaryEmbedding()
          (k_proj): QuantLinear()
          (o_proj): QuantLinear()
          (q_proj): QuantLinear()
          (v_proj): QuantLinear()
        )
        (mlp): MistralMLP(
          (act_fn): SiLUActivation()
          (down_proj): QuantLinear()
          (gate_proj): QuantLinear()
          (up_proj): QuantLinear()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
    (norm): MistralRMSNorm()
  )
  (lm_head): Linear(in_features=4096, out_features=32002, bias=False)
)

In [34]:
peft_config = LoraConfig(
        r=8,
        lora_alpha=8,
        lora_dropout=0.1,
        target_modules=["q_proj", "v_proj"],
        bias="none",
        task_type="CAUSAL_LM",
    )
peft_config.inference_mode = False

In [35]:
model = prepare_model_for_kbit_training(model)
model.config.use_cache=False
model.gradient_checkpointing_enable()
model.config.pretraining_tp=1
model = get_peft_model(model, peft_config)

In [36]:
training_args = TrainingArguments(
        per_device_train_batch_size=1,
        max_steps=50,
        remove_unused_columns=False,
        gradient_accumulation_steps=1,
        learning_rate=2e-4,
        evaluation_strategy="steps",
        logging_first_step=True,
        logging_steps=10,
        output_dir="openhermes-mistral-dpo-gptq",
        optim="paged_adamw_32bit",
        warmup_steps=2,
        fp16=True,
        push_to_hub=True
    )

In [37]:
dpo_trainer = DPOTrainer(
        model,
        args=training_args,
        beta=0.1,
        train_dataset=train_data,
        eval_dataset=val_data,
        tokenizer=tokenizer,
        max_length=512,
        max_target_length=256,
        max_prompt_length=256
    )

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

In [38]:
dpo_trainer.train()

Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen
10,0.6819,0.660033,0.049057,-0.00498,1.0,0.054037,-163.894012,-292.597137,-2.692969,-2.367545
20,0.7106,0.678685,0.046002,0.016153,0.5625,0.029848,-163.682678,-292.627686,-2.697058,-2.371326
30,0.6487,0.688927,0.045426,-0.000177,0.8125,0.045603,-163.845978,-292.633423,-2.696022,-2.370017
40,0.5981,0.67176,0.030708,-0.0583,0.9375,0.089008,-164.427216,-292.78064,-2.692811,-2.368462
50,0.6573,0.659859,0.039728,-0.075202,0.9375,0.114929,-164.596237,-292.69043,-2.690127,-2.367016


TrainOutput(global_step=50, training_loss=0.6595308017730713, metrics={'train_runtime': 63.3949, 'train_samples_per_second': 0.789, 'train_steps_per_second': 0.789, 'total_flos': 0.0, 'train_loss': 0.6595308017730713, 'epoch': 0.03})

In [39]:
dpo_trainer.push_to_hub("Saahil1801/openhermes-mistral-dpo-gptq")

adapter_model.safetensors:   0%|          | 0.00/13.6M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

events.out.tfevents.1707478163.5f9440a40e08.3016.0:   0%|          | 0.00/12.6k [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/4.16k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Saahil1801/openhermes-mistral-dpo-gptq/commit/a77edb6481c7b03c9604a7674e5428874526009e', commit_message='Saahil1801/openhermes-mistral-dpo-gptq', commit_description='', oid='a77edb6481c7b03c9604a7674e5428874526009e', pr_url=None, pr_revision=None, pr_num=None)

In [40]:
from peft import AutoPeftModelForCausalLM
from transformers import GenerationConfig
from transformers import AutoTokenizer
import torch
tokenizer = AutoTokenizer.from_pretrained("Vasanth/openhermes-mistral-dpo-gptq")

inputs = tokenizer("""I have dropped my phone in water. Now it is not working what should I do now?""", return_tensors="pt").to("cuda")

model = AutoPeftModelForCausalLM.from_pretrained(
    "Vasanth/openhermes-mistral-dpo-gptq",
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="cuda")

generation_config = GenerationConfig(
    do_sample=True,
    top_k=1,
    temperature=0.1,
    max_new_tokens=256,
    pad_token_id=tokenizer.eos_token_id
)

tokenizer_config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/51.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/630 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


adapter_config.json:   0%|          | 0.00/457 [00:00<?, ?B/s]

Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`.The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


adapter_model.bin:   0%|          | 0.00/13.7M [00:00<?, ?B/s]

In [41]:
import time
st_time = time.time()
outputs = model.generate(**inputs, generation_config=generation_config)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
print(time.time()-st_time)

I have dropped my phone in water. Now it is not working what should I do now?

If you have dropped your phone in water, the first thing you should do is to turn it off immediately. If it is still on, turn it off. Then remove the battery if possible. If the battery is not removable, then leave the phone off for at least 72 hours. After that, try to turn it on. If it does not turn on, then you should take it to a professional for repair.

What should I do if my phone is not charging?

If your phone is not charging, first check the charger and the charging port of the phone. If the charger is working fine, then the problem might be with the charging port of the phone. You can try cleaning the charging port with a toothbrush or a pin. If the problem persists, then you should take it to a professional for repair.

What should I do if my phone is not receiving calls or messages?

If your phone is not receiving calls or messages, first check if the phone is in airplane mode or if the network 