In [1]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 datasets

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/244.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/244.2 kB[0m [31m942.3 kB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/244.2 kB[0m [31m1.3 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m235.5/244.2 kB[0m [31m2.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m58.5 MB/s[0m eta 

In [2]:
import pandas as pd
from datasets import Dataset
from datasets import load_dataset
import json
# import os
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

Data preprocessing

In [3]:

import json
def read_json_file(file_name):
    dataset = []
    with open(file_name, 'r') as file:
        for l in file:
            onj = json.loads(l.strip())
            dataset.append(onj)
    return dataset
data = read_json_file('/content/Mental_Health_Dataset.jsonl')

In [4]:


df = pd.DataFrame(columns=['text'])
d = []
for dataset in data:
  for question,answer in zip(dataset["patterns"],dataset["responses"]):
    d.append("<s>[INST]{}[/INST]{}</s>".format(question,answer))
with open('llamaformat.jsonl', 'w') as f:
    for obj in d:
      f.write(json.dumps(obj)+"\n")





In [6]:

df = pd.DataFrame(columns=['text'])
jsonl_data = read_json_file('llamaformat.jsonl')
df["text"] = jsonl_data

dataset = Dataset.from_pandas(df)

In [8]:
print(dataset["text"])



In [9]:


class llamafinetune:
  def __init__(self):
    self.name_model = "NousResearch/Llama-2-7b-chat-hf"
    self.tuned_model = "Llama-2-7b-chat-finetune"
    self.r = 64
    self.alpha=16
    self.dropout = 0.1
    self.enable_4_bit = True
    self.bnb_dtype = "float16"
    self.quant_type = "nf4"
    self.use_nested_quant = False
    self.output_directory = "./results"
    self.device_map={'': 0}
  def model_train(self):
    self.compute_dtype = getattr(torch, self.bnb_dtype)
    self.bits_configuration = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype="float16",
        bnb_4bit_use_double_quant=self.use_nested_quant,
    )
    self.model = AutoModelForCausalLM.from_pretrained(
        self.name_model,
        quantization_config=self.bits_configuration,
        device_map=self.device_map
    )
    self.model.config.use_cache = False
    self.model.config.pretraining_tp = 1
    self.tokenizer = AutoTokenizer.from_pretrained(self.name_model, trust_remote_code=True)
    self.tokenizer.pad_token = self.tokenizer.eos_token
    self.tokenizer.padding_side = "right"
    self.peft_config = LoraConfig(
        lora_alpha=self.alpha,
        lora_dropout=self.dropout,
        r=self.r,
        bias="none",
        task_type="CAUSAL_LM",
    )
    self.training_arguments = TrainingArguments(
        output_dir="./results",
        num_train_epochs=1,
        per_device_train_batch_size=4,
        gradient_accumulation_steps=1,
        optim="paged_adamw_32bit",
        save_steps=0,
        logging_steps=25,
        learning_rate=2e-4,
        weight_decay=0.001,
        fp16=False,
        bf16=False,
        max_grad_norm=0.3,
        max_steps=-1,
        warmup_ratio=0.03,
        group_by_length=True,
        lr_scheduler_type="cosine",
        report_to="tensorboard"
    )
    self.trainer = SFTTrainer(
        model=self.model,
        train_dataset=dataset,
        peft_config=self.peft_config,
        dataset_text_field="text",
        # max_seq_length=max_seq_length,
        tokenizer=self.tokenizer,
        args=self.training_arguments,
        packing=False,
    )
    self.trainer.train()
    return self.trainer,self.model,self.tokenizer
  def save_model(self,trainer):
    trainer.model.save_pretrained(self.tuned_model)
  def testing(self):
    logging.set_verbosity(logging.CRITICAL)
    text = "What is anxiety"
    pipe = pipeline(task="text-generation", model=self.model, tokenizer=self.tokenizer, max_length=200)
    result = pipe(f"<s>[INST] {text} [/INST]")
    r = result[0]['generated_text'].split("[/INST]")
    print(r[1])

t= llamafinetune()
trainer,model,tokenizer = t.model_train()
t.save_model(trainer)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/179 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]



Map:   0%|          | 0/148 [00:00<?, ? examples/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
25,3.6284


In [10]:
!pip install gradio

Collecting gradio
  Downloading gradio-4.25.0-py3-none-any.whl (17.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m40.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.110.1-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.9/91.9 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==0.15.0 (from gradio)
  Downloading gradio_client-0.15.0-py3-none-any.whl (313 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m313.4/313.4 kB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━

In [12]:
import gradio as g
def generate_response(text):
    logging.set_verbosity(logging.CRITICAL)
    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
    result = pipe(f"<s>[INST] {text} [/INST]")
    r = result[0]["generated_text"].split("[/INST]")
    return r[1]
def chatbot(input, prev):
    if(prev ==None):
      prev = []
    prompt = " "
    for text in prev:
      for data in text:
        prompt= ' '.join([data] + [input])
    output = generate_response(prompt)
    prev.append((input, output))
    return prev, prev

gradio_interface = g.Blocks()


with gradio_interface:
    g.Markdown("""<h1>Mental Health Chatbot</h1>
    """)
    chats = g.Chatbot()
    text = g.Textbox(placeholder="")
    chatbot_state = g.State()
    submit = g.Button("SEND")
    submit.click(generate_response, inputs=[text,chatbot_state], outputs=[chats,chatbot_state])

gradio_interface.launch(debug = True)

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://12e6f4e0dca6d4aa58.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://12e6f4e0dca6d4aa58.gradio.live


