In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
!pip install trl transformers accelerate git+https://github.com/huggingface/peft.git -Uqqq
!pip install datasets bitsandbytes einops wandb -Uqqq

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer



In [4]:
import os
import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from peft import (
    LoraConfig,
    PeftConfig,
    get_peft_model,
    prepare_model_for_kbit_training,
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)

In [5]:
model_name = "ybelkada/falcon-7b-sharded-bf16" # falcon-7b model
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    load_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model =AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config,
)

Downloading (…)lve/main/config.json:   0%|          | 0.00/581 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/16.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00008.bin:   0%|          | 0.00/1.92G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00008.bin:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

Downloading (…)l-00003-of-00008.bin:   0%|          | 0.00/1.91G [00:00<?, ?B/s]

Downloading (…)l-00004-of-00008.bin:   0%|          | 0.00/1.91G [00:00<?, ?B/s]

Downloading (…)l-00005-of-00008.bin:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

Downloading (…)l-00006-of-00008.bin:   0%|          | 0.00/1.91G [00:00<?, ?B/s]

Downloading (…)l-00007-of-00008.bin:   0%|          | 0.00/1.91G [00:00<?, ?B/s]

Downloading (…)l-00008-of-00008.bin:   0%|          | 0.00/921M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token # Setting pad_token same as eos_token

Downloading (…)okenizer_config.json:   0%|          | 0.00/180 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

In [None]:
# model.gradient_checkpointing_enable()

In [7]:
model = prepare_model_for_kbit_training(model)

In [8]:
lora_alpha = 32 # scaling factor for the weight matrices
lora_dropout = 0.05 # dropout probability of the LoRA layers
lora_rank = 32 # dimension of the low-rank matrices

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_rank,
    bias="none",  # setting to 'none' for only training weight params instead of biases
    task_type="CAUSAL_LM",
    target_modules=[         # Setting names of modules in falcon-7b model that we want to apply LoRA to
        "query_key_value",
        "dense",
        "dense_h_to_4h",
        "dense_4h_to_h",
    ]
)

peft_model = get_peft_model(model, peft_config)

In [9]:
from datasets import load_dataset

data = load_dataset("heliosbrahma/mental_health_conversational_dataset")

Downloading readme:   0%|          | 0.00/2.58k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/60.9k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/154 [00:00<?, ? examples/s]

In [10]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [11]:
output_dir = "./falcon-7b-sharded-bf16-finetuned-mental-health-conversational"
per_device_train_batch_size = 4 # reduce batch size by 2x if out-of-memory error
gradient_accumulation_steps = 4  # increase gradient accumulation steps by 2x if batch size is reduced
optim = "paged_adamw_8bit" # activates the paging for better memory management
save_strategy="steps" # checkpoint save strategy to adopt during training
save_steps = 10 # number of updates steps before two checkpoint saves
logging_steps = 10  # number of update steps between two logs if logging_strategy="steps"
learning_rate = 2e-4  # learning rate for AdamW optimizer
max_grad_norm = 0.3 # maximum gradient norm (for gradient clipping)
max_steps = 100        # training will happen for 320 steps
warmup_ratio = 0.03 # number of steps used for a linear warmup from 0 to learning_rate
lr_scheduler_type = "cosine"  # learning rate scheduler

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    bf16=False,
    fp16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
    push_to_hub=True,
)

trainer = SFTTrainer(
    model=peft_model,
    train_dataset=data['train'],
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_arguments,
)

Map:   0%|          | 0/154 [00:00<?, ? examples/s]

In [13]:
peft_model.config.use_cache = False
trainer.train()

Step,Training Loss
10,1.5402
20,1.3457
30,1.0539
40,0.7297
50,0.4876
60,0.3129
70,0.193
80,0.1466
90,0.1124
100,0.1031


TrainOutput(global_step=100, training_loss=0.6025098860263824, metrics={'train_runtime': 1244.4007, 'train_samples_per_second': 1.286, 'train_steps_per_second': 0.08, 'total_flos': 1.0701194650404864e+16, 'train_loss': 0.6025098860263824, 'epoch': 10.26})

In [14]:
trainer.save_model('./chatbot-falcon7b-100eps')

In [15]:
from transformers import GenerationConfig
import warnings
warnings.filterwarnings("ignore")

In [22]:
def generate_answer(query):
    system_prompt = """Answer the following question truthfully.
      If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'.
      If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'."""

    user_prompt = f"""<HUMAN>: {query}
      <ASSISTANT>: """

    final_prompt = system_prompt + "\n" + user_prompt

    device = "cuda:0"
    dashline = "-".join("" for i in range(50))

    encoding = tokenizer(final_prompt, return_tensors="pt").to(device)
    outputs = model.generate(input_ids=encoding.input_ids, generation_config=GenerationConfig(max_new_tokens=256, pad_token_id = tokenizer.eos_token_id, \
                                                                                                                     eos_token_id = tokenizer.eos_token_id, attention_mask = encoding.attention_mask, \
                                                                                                                     temperature=0.4, top_p=0.6, repetition_penalty=1.3, num_return_sequences=1,))
    text_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

    print(dashline)
    print(f'ORIGINAL MODEL RESPONSE:\n{text_output}')
    print(dashline)

    peft_encoding = tokenizer(final_prompt, return_tensors="pt").to(device)
    peft_outputs = peft_model.generate(input_ids=peft_encoding.input_ids, generation_config=GenerationConfig(max_new_tokens=256, pad_token_id = tokenizer.eos_token_id, \
                                                                                                                     eos_token_id = tokenizer.eos_token_id, attention_mask = peft_encoding.attention_mask, \
                                                                                                                     temperature=0.4, top_p=0.6, repetition_penalty=1.3, num_return_sequences=1,))
    peft_text_output = tokenizer.decode(peft_outputs[0], skip_special_tokens=True)

    print(f'PEFT MODEL RESPONSE:\n{peft_text_output}')
    print(dashline)

In [None]:
trainer.model.save_pretrained('.')
trainer.tokenizer.save_pretrained('.')

In [17]:


query = "How can I prevent anxiety and depression?"
generate_answer(query)
     


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...

-------------------------------------------------
ORIGINAL MODEL RESPONSE:
Answer the following question truthfully.
      If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'.
      If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'.
<HUMAN>: How can I prevent anxiety and depression?
      <ASSISTANT>: 1. Regular exercise and meditation helps in preventing anxiety and depression.
2. Keep yourself busy with work or any activity that interests you.
3. Connect with others regularly. Social isolation leads to anxiety and depression.
4. Get enough sleep every night. Lack of sleep causes problems with thinking and concentration during the day.
5. Take care of your physical health. A diet high in fat and low in vegetables contributes to mental illnesses like Depression.
6. Avoid alcohol and illicit drugs. Substances like cocaine and methamphetamine are stimulants and depressants at the same time. They significantl

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...

PEFT MODEL RESPONSE:
Answer the following question truthfully.
      If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'.
      If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'.
<HUMAN>: How can I prevent anxiety and depression?
      <ASSISTANT>: 1. Regular exercise and meditation helps in preventing anxiety and depression.
2. Keep yourself busy with work or any activity that interests you.
3. Connect with others regularly. Social isolation leads to anxiety and depression.
4. Get enough sleep and rest. Avoid caffeine, alcohol and other stimulants.
5. Take medications as prescribed by your doctor. Do not stop abruptly.
6. Attend support groups for patients with similar conditions. Participate actively in their activities. Listening to fellow sufferers could help dispel many myths associated with these illnesses.
7. Learn relaxation techniques like yoga and/or mindfulness-based stress reduction.
8. Join an

In [23]:
query = "I am sufferring from depression from last 2 years. I don't enjoy socialising. I feel lonely most of the times. What should I do to get better.?"
generate_answer(query)

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


-------------------------------------------------


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...

PEFT MODEL RESPONSE:
Answer the following question truthfully.
      If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'.
      If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'.
<HUMAN>: I am sufferring from depression from last 2 years. I don't enjoy socialising. I feel lonely most of the times. What should I do to get better.?
      <ASSISTANT>: 1. Consult a psychiatrist and find out what type of depressive disorder you are suffering from. There are different types of depressve disorders such as major depressive disorder, bipolar disorder etc. depending on the severity of symptoms. Your doctor will devise a treatment plan based on the type of depressive disorder you are suffering from. Treatment may include medication, psychotherapy or both. In rare cases when therapeutic response remains unmanageable, psychiatrists might consider electroconvulsive therapy (ECT). ECT has been found to be safe and effect