In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftConfig, PeftModel
from datasets import load_dataset
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


# Mental Mistral SFT on our dataset

 - DO NOT RUN till ***********

In [None]:
base_model = "mistralai/Mistral-7B-Instruct-v0.2"
adapter = "GRMenon/mental-health-mistral-7b-instructv0.2-finetuned-V2"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    base_model,
    add_bos_token=True,
    trust_remote_code=True,
    padding_side='left'
)

# Create peft model using base_model and finetuned adapter
config = PeftConfig.from_pretrained(adapter)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,
                                             load_in_4bit=True,
                                             device_map=device,
                                             torch_dtype='auto')
model = PeftModel.from_pretrained(model, adapter)
model.to(device)

Loading checkpoint shards: 100%|██████████| 3/3 [00:06<00:00,  2.01s/it]


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear4bit(in_features=4096, out_features=1024, bi

### Preparing the dataset

In [None]:
def prepare_sample_text(example):
    instruction   = "Read the user query and give solutions that can be implemented"
    text = f"""<s>[INST] {instruction} \\n user:{example['prompt']} [/INST] \\n {example['chosen']} </s>"""
    return {'text': text}

def chars_token_ratio(dataset, tokenizer, nb_examples=None):
    """
    Estimate the average number of characters per token in the dataset.
    If nb_examples is None, use the whole dataset.
    """
    if nb_examples is None:
        nb_examples = len(dataset)
    total_characters, total_tokens = 0, 0
    for example in tqdm(dataset, total=nb_examples):
        text = example['text']
        total_characters += len(text)
        if tokenizer.is_fast:
            total_tokens += len(tokenizer(text).tokens())
        else:
            total_tokens += len(tokenizer.tokenize(text))
    return total_characters / total_tokens

# Specify the path to your local CSV file here
csv_file_path = "data/train.csv"

# Load the dataset from the CSV file
dataset = load_dataset(
    "csv",
    data_files=csv_file_path,
    split='train',
)

# Assuming the CSV file has 'question' and 'response_j' columns,
# you need to prepare the text for each example in the dataset.
dataset = dataset.map(prepare_sample_text)

# Now you can use the `dataset` object as needed, for example:
dataset

Dataset({
    features: ['prompt', 'chosen', 'rejected', 'category', 'text'],
    num_rows: 2365
})

In [None]:
dataset = dataset.train_test_split(test_size=236, seed=42)
train_data = dataset['train']
valid_data = dataset['test']

In [None]:
chars_per_token = chars_token_ratio(train_data, tokenizer)
print(f"Characters per token: {chars_per_token}")

100%|██████████| 2129/2129 [00:01<00:00, 1633.49it/s]

Characters per token: 4.469672672948612





In [None]:
from trl.trainer import ConstantLengthDataset

train_dataset=ConstantLengthDataset(
    tokenizer,
    train_data,
    formatting_func=prepare_sample_text,
    infinite=True,
    seq_length=1024,
    chars_per_token=chars_per_token,
)

valid_dataset=ConstantLengthDataset(
    tokenizer,
    valid_data,
    formatting_func=prepare_sample_text,
    infinite=False,
    seq_length=1024,
    chars_per_token=chars_per_token,
)

In [None]:
device

'cuda'

# ********************************************************************************************************

# DPO

In [None]:
import gc

# del sft_trainer, base_model
gc.collect()
torch.cuda.empty_cache()

In [None]:
def get_device_map() -> str:
    return 'cuda' if torch.cuda.is_available() else 'cpu'

device = get_device_map()

In [None]:
def return_prompt_and_responses(samples)-> dict[str,str]:
    return {
        "prompt":[
            "<s> [INST] Read the user query and give solutions that can be implemented \n user:" + question + " [/INST] </s>" for question in samples["prompt"]
        ],
        "chosen": samples["chosen"],
        "rejected": samples["rejected"],
    }


def get_dataset_paired(data_file="data/train.csv", sanity_check=False, cache_dir=None, num_proc=4):
    """
    Loads the dataset from a CSV file, processes it, and optionally performs a sanity check by selecting a subset.
    """
    # Load the dataset from a CSV file
    dataset = load_dataset('csv', data_files=data_file, cache_dir=cache_dir)['train']

    # Perform a sanity check if requested, to work with a smaller subset of the data
    if sanity_check:
        dataset = dataset.select(range(min(len(dataset), 1000)))

    # Process the dataset to format the prompts and responses
    processed_dataset = dataset.map(
        return_prompt_and_responses,
        batched=True,
        num_proc=num_proc,
        remove_columns=dataset.column_names,  # Remove original columns
    )

    return processed_dataset

In [None]:
from transformers import AutoModelForCausalLM
import torch

base_model = "mistralai/Mistral-7B-Instruct-v0.2"
adapter = "GRMenon/mental-health-mistral-7b-instructv0.2-finetuned-V2"

config = PeftConfig.from_pretrained(adapter)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,
                                             torch_dtype=torch.float16)
model = PeftModel.from_pretrained(model, adapter)
model=model.merge_and_unload()
model.save_pretrained("new_model/final_merged_checkpoint", safe_serialization=True)

Loading checkpoint shards: 100%|██████████| 3/3 [00:02<00:00,  1.08it/s]


In [None]:
from transformers import AutoModelForCausalLM
import torch

model=AutoModelForCausalLM.from_pretrained(
    "new_model/final_merged_checkpoint",
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    load_in_4bit=True,
)


model_ref=AutoModelForCausalLM.from_pretrained(
    "new_model/final_merged_checkpoint",
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    load_in_4bit=True,
)

Loading checkpoint shards: 100%|██████████| 3/3 [00:05<00:00,  1.68s/it]
Loading checkpoint shards: 100%|██████████| 3/3 [00:05<00:00,  1.71s/it]


In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

In [None]:
train_dataset = get_dataset_paired('data/train.csv')
train_dataset = train_dataset.filter(lambda x: len(x["prompt"]) + len(x["chosen"]) <= 1024 and len(x["prompt"]) + len(x["rejected"]) <= 1024)

In [None]:
eval_dataset = get_dataset_paired('data/train.csv')
eval_dataset = eval_dataset.filter(lambda x: len(x["prompt"]) + len(x["chosen"]) <= 1024 and len(x["prompt"]) + len(x["rejected"]) <= 1024)

In [None]:
from transformers import TrainingArguments
from peft import LoraConfig, TaskType
from trl import DPOTrainer
import os

peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
)

In [None]:
training_args = TrainingArguments(
    per_device_train_batch_size=4,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=4,
    save_steps=10,
    gradient_checkpointing=True,
    learning_rate=5e-5,
    lr_scheduler_type="cosine",
    max_steps=100,
    save_strategy="no",
    logging_steps=1,
    output_dir="new_model/",
    optim="paged_adamw_32bit",
    warmup_steps=10,
    fp16=True,
    report_to="wandb",
    evaluation_strategy="steps",
    eval_steps=20,
)

In [None]:
# Create DPO trainer
dpo_trainer = DPOTrainer(
    model,
    model_ref,
    args=training_args,
    beta=0.1,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    peft_config=peft_config,
    max_prompt_length=512,
    max_length=1024,
)



In [None]:
dpo_trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mvinayakkgarg8599[0m. Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen
20,0.0197,0.040451,2.21207,-4.341268,0.987097,6.553337,-141.697311,-106.962601,-2.666264,-2.865987
40,0.0022,0.029679,-0.065412,-10.351833,0.991067,10.286421,-201.802979,-129.737427,-2.317307,-2.342934
60,0.0086,0.012977,1.282627,-10.23979,0.994045,11.522417,-200.682556,-116.257034,-2.308453,-2.397155
80,0.0002,0.011181,1.327407,-10.76215,0.993548,12.089558,-205.906158,-115.809212,-2.320941,-2.422301
100,0.0191,0.010252,1.315,-10.822784,0.994045,12.137784,-206.512497,-115.933296,-2.322066,-2.423676


TrainOutput(global_step=100, training_loss=0.0728387291051331, metrics={'train_runtime': 3458.0458, 'train_samples_per_second': 0.463, 'train_steps_per_second': 0.029, 'total_flos': 0.0, 'train_loss': 0.0728387291051331, 'epoch': 0.79})

In [None]:
train_dataset[0]

{'prompt': '<s> [INST] Read the user query and give solutions that can be implemented \n user:I feel really alone lately. [/INST] </s>',
 'chosen': "Loneliness can be tough, but there are ways to feel more connected. Have you tried reaching out to old friends or family you haven't spoken to in a while? Maybe there's a local book club or hiking group you could join to meet new people who share your interests.  Going for a walk in a nearby park or nature reserve can also be a great way to boost your mood and feel more connected to the world around you. If you're feeling overwhelmed, there are also many self-help books on overcoming loneliness, like Loneliness: The Hidden Link to Mental Health and Sociability by John Cacioppo or The Power of Vulnerability by Brené Brown.  Consider checking them out from your local library!",
 'rejected': 'Everyone feels lonely sometimes, just get out there more.'}

In [None]:
eval_dataset

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 1940
})

In [None]:
!nvidia-smi

Thu Mar 21 13:47:12 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    52W / 400W |  23419MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
device

'cuda'

In [None]:
dpo_trainer.model.save_pretrained("final_model/model_mar22_1am")

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftConfig, PeftModel

model = AutoModelForCausalLM.from_pretrained("new_model/final_merged_checkpoint", return_dict=True, torch_dtype=torch.float16)
model = PeftModel.from_pretrained(model, "final_model/model_mar22_1am")
model.eval()
model = model.merge_and_unload()
model.save_pretrained("dpo-mistralai-7b-mental-health/final_merged_checkpoint")

Loading checkpoint shards: 100%|██████████| 3/3 [00:02<00:00,  1.48it/s]


In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    base_model,
    add_bos_token=True,
    trust_remote_code=True,
    padding_side='left'
)

# Create peft model using base_model and finetuned adapter
config = PeftConfig.from_pretrained("final_model/model_mar22_1am")

model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,
                                             load_in_4bit=True,
                                             device_map='auto',
                                             torch_dtype='auto')
model = PeftModel.from_pretrained(model, "final_model/model_mar22_1am")

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()

Loading checkpoint shards: 100%|██████████| 3/3 [00:04<00:00,  1.52s/it]


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear4bit(
                (base_layer): Linea

In [None]:
instruction   = "Read the user query and give solutions that can be implemented. Share only the relevant resources with contact details."
text = "I'm being cyberbullied at school. These kids keep sending me nasty messages and leaving me out of online groups. They make me feel invisible and worthless. I don't know what I did to deserve this, and I don't know how to make it stop."
prompt = f"""{instruction}
[INST] user:{text} [/INST]"""
inputs = tokenizer( prompt, return_tensors="pt")

# generate output
outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"),max_new_tokens=1024)
assessment = tokenizer.batch_decode(outputs)[0]
op_string = assessment.split("[/INST]")[1]
print(op_string)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 Cyberbullying can be devastating and isolating. It's important to remember that you are not alone and there are resources available to help. Consider reaching out to a trusted adult, such as a teacher or counselor, to discuss what's happening and develop a plan for addressing the bullying behavior. Additionally, there are online resources available through organizations such as StopBullying.gov and Cyberbullying.org that can provide support and guidance.</s>


wandb: Network error (ReadTimeout), entering retry loop.


## Sample tests:

- Some of my friends abuse me publicly on facebook. I am weak and cannot fight them.
- I'm sorry to hear that you are experiencing bullying on Facebook. It's important to remember that social media platforms should be a safe space for connection and positivity. If you are feeling weak or unable to fight back against the bullying, consider reaching out to trusted friends or family members for support. You can also report the bullying to Facebook and seek resources from organizations such as Stop Bullying.org. Remember, you are not alone and there are people who care about you.</s>

- I enjoy reading other people's tweets and have a lot of friends on dacebook. However, I don't get too many likes on Instagram and that makes me sad.
-  It's understandable that receiving likes on Instagram can impact your mood. Consider focusing on authentic connections on social media platforms, engaging with others' posts through thoughtful comments, and remembering that likes are not the only measure of meaningful interactions.</s>

- My instagram photos get a lot of likes but the same people who liked my photo ignore me in school.
- It's important to remember that social media interactions don't always reflect real-life relationships. If you're feeling ignored or disconnected from people who like your Instagram photos, consider reaching out to them in person or through private messaging to start conversations and build connections. Remember that authentic relationships are built on trust, respect, and open communication.</s>

- My ex boyfriend is not deleting my naked pictures. I requested him many times but he is threatening to leak them online. I am super tensed and scared.
-  I'm sorry to hear about your distressing situation. It's important to remember that you have the right to control your own images and to request that explicit content be deleted. If your ex is refusing to comply, consider reaching out to law enforcement or a digital security expert for assistance. Additionally, counseling or therapy can help you process your emotions and develop coping strategies.

- My ex boyfriend is not deleting my naked pictures. I requested him many times but he is threatening to leak them online. I am super tensed and scared.
-  I'm sorry to hear about your distressing situation. It's important to remember that you have the right to control your own images and to request that explicit content be deleted. If your ex-boyfriend is refusing to comply, consider reaching out to a trusted friend or family member for support. Additionally, there are resources available to help you navigate this situation, such as the National Domestic Violence Hotline (1-800-799-7233) or the Cyber Civil Rights Organization (www.cybercivilrights.org). Remember, you are not alone and there are people and organizations ready to help.

- I had a lot of friends in my childhood and I am still connected to them on Facebook. However, I am not able to speak to any new people now. Why is this happening?
-  This is a concerning issue. It's important to address your feelings of isolation and explore ways to connect with new people. Consider joining local groups or clubs related to your interests, attending networking events, or utilizing online platforms to meet new individuals. Counseling can also be beneficial in addressing underlying causes of social isolation.

- I spend hours and hours scrolling on my instagram reels. I forgot to water my plants yesterday. I don't know what's happening.
-  It sounds like you are experiencing a disconnect between your online and offline worlds. Here are some resources that might help:1. Mindfulness practices: Mindfulness meditation can help you stay present and focused on the moment.2. Digital Detox: Consider setting boundaries around your social media use and scheduling time for activities offline.3. Therapy: A therapist can help you explore your relationship with technology and develop coping strategies.4. Plants and Nature: Consider incorporating plants into your space as a reminder of nature and a way to bring calm to your environment.5. Contact: Reach out to friends and family for support and connection offline.

- Someone is threatening to kill my family on Facebook. We are deeply tensed and disturbed.
-  I'm sorry to hear about the threatening messages on Facebook. Your safety and that of your family is paramount. Consider reporting the messages to Facebook and contacting local law enforcement for assistance. Additionally, consider reaching out to a counselor or therapist to process this experience and cope with any resulting emotions.

- I'm being cyberbullied at school. These kids keep sending me nasty messages and leaving me out of online groups. They make me feel invisible and worthless. I don't know what I did to deserve this, and I don't know how to make it stop.
-  Cyberbullying can be devastating and isolating. It's important to remember that you are not alone and there are resources available to help. Consider reaching out to a trusted adult, such as a teacher or counselor, to discuss what's happening and develop a plan for addressing the bullying behavior. Additionally, there are online resources available through organizations such as StopBullying.gov and Cyberbullying.org that can provide support and guidance.

In [None]:
model.push_to_hub("gargvinayakk99/dpo-mistralai-7b-mental-health", token="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")

In [None]:
from huggingface_hub import notebook_login
notebook_login()

ImportError: The `notebook_login` function can only be used in a notebook (Jupyter or Colab) and you need the `ipywidgets` module: `pip install ipywidgets`.

In [None]:
!pip3 install ipywidgets



In [None]:
tokenizer.save_pretrained("/home/vinayakkgarg8599/hf_models/dpo-mistralai-7b-mental-health/")

('/home/vinayakkgarg8599/hf_models/dpo-mistralai-7b-mental-health/tokenizer_config.json',
 '/home/vinayakkgarg8599/hf_models/dpo-mistralai-7b-mental-health/special_tokens_map.json',
 '/home/vinayakkgarg8599/hf_models/dpo-mistralai-7b-mental-health/tokenizer.json')