<a href="https://colab.research.google.com/github/kkartikya/Mental-Health-Assistant/blob/main/mha.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **PRE REQ**

In [1]:
# @title
!pip install -q -U transformers datasets bitsandbytes huggingface_hub peft

In [2]:
# @title
import torch
from datasets import load_dataset, Dataset
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, get_peft_model_state_dict
from google.colab import userdata
import os

In [3]:
# @title
# 0. Setup & Authentication
model_id = "meta-llama/Meta-Llama-Guard-2-8B"
dataset_name = "Amod/mental_health_counseling_conversations"
output_dir = "/content/drive/MyDrive/llama-finetuned-chatbot/"
num_train_epochs = 1
per_device_train_batch_size = 1
gradient_accumulation_steps = 8
learning_rate = 2e-5
fp16 = True
save_steps = 100
logging_steps = 10

# Authenticate with Hugging Face
login(token = userdata.get('HF_TOKEN'))

In [4]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


# **Training**

In [5]:
# @title
# Load and Preprocess Data
dataset = load_dataset(dataset_name)

for example in dataset['train']:
    print("Context:", example['Context'])
    print("Response:", example['Response'])
    print("----")
    break

# Tokenize the dataset
tokenizer = AutoTokenizer.from_pretrained(model_id, token=True)

# Use eos_token as pad_token if pad_token is not defined
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

def preprocess_data(examples):
    inputs = tokenizer(examples['Context'], truncation=True, padding="max_length", return_tensors="pt", max_length=512)
    labels = tokenizer(examples['Response'], truncation=True, padding="max_length", return_tensors="pt", max_length=512)
    return {
        'input_ids': inputs['input_ids'].squeeze(),
        'attention_mask': inputs['attention_mask'].squeeze(),
        'labels': labels['input_ids'].squeeze()
    }

# Apply preprocessing to the dataset
dataset = dataset.map(preprocess_data, batched=True, remove_columns=['Context', 'Response'])

for i in dataset['train']:
    print(i)
    break

# Split the dataset
train_test_split = dataset['train'].train_test_split(test_size=0.1)
train_dataset = train_test_split['train']
test_dataset = train_test_split['test']

README.md: 0.00B [00:00, ?B/s]

combined_dataset.json: 0.00B [00:00, ?B/s]

Generating train split:   0%|          | 0/3512 [00:00<?, ? examples/s]

Context: I'm going through some things with my feelings and myself. I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here.
   I've never tried or contemplated suicide. I've always wanted to fix my issues, but I never get around to it.
   How can I change my feeling of being worthless to everyone?
Response: If everyone thinks you're worthless, then maybe you need to find new people to hang out with.Seriously, the social context in which a person lives is a big influence in self-esteem.Otherwise, you can go round and round trying to understand why you're not worthless, then go back to the same crowd and be knocked down again.There are many inspirational messages you can find in social media.  Maybe read some of the ones which state that no person is worthless, and that everyone has a good purpose to their life.Also, since our culture is so saturated with the belief that if someone doesn't feel good about themselves that this is somehow terrible.Bad

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.9k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Map:   0%|          | 0/3512 [00:00<?, ? examples/s]

{'input_ids': [40, 2846, 2133, 1555, 1063, 2574, 449, 856, 16024, 323, 7182, 13, 358, 20025, 6212, 323, 358, 656, 4400, 719, 1781, 922, 1268, 358, 2846, 66121, 323, 1268, 358, 13434, 956, 387, 1618, 627, 256, 358, 3077, 2646, 6818, 477, 93439, 18639, 13, 358, 3077, 2744, 4934, 311, 5155, 856, 4819, 11, 719, 358, 2646, 636, 2212, 311, 433, 627, 256, 2650, 649, 358, 2349, 856, 8430, 315, 1694, 66121, 311, 5127, 30, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001,

In [None]:
# @title
#  Load LLaMA Model and Configure Quantization
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

if not os.path.exists(f'{output_dir}/pre'):
    os.makedirs(f'{output_dir}/pre')

if len(os.listdir(f'{output_dir}/pre')) == 0:
    # Model does not exist, download and save it
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        token=True,
        quantization_config=bnb_config,
        device_map="auto",  # Use multiple GPUs if available
        offload_folder="offload_dir"  # Offload to disk if needed
    )
    model.save_pretrained(f'{output_dir}/pre')
else:
    # Model already saved, load it from Google Drive
    model = AutoModelForCausalLM.from_pretrained(
        f'{output_dir}/pre',
        quantization_config=bnb_config,
        device_map="auto",  # Use multiple GPUs if available
        offload_folder="offload_dir"  # Offload to disk if needed
    )

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# @title
# Configure and attach adapters
lora_config = LoraConfig(
    r=8,  # Dimension of the adapters
    lora_alpha=16,  # Scaling factor for LoRA
    lora_dropout=0.1,  # Dropout rate for LoRA
    target_modules=["q_proj", "v_proj"],  # Specify modules to apply adapters
)

# Wrap model with PEFT
model = get_peft_model(model, lora_config)

In [None]:
# @title
# Define Training Arguments
training_args = TrainingArguments(
    output_dir= output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    learning_rate=learning_rate,
    fp16=fp16,
    save_steps=save_steps,
    logging_steps=logging_steps,
)

In [None]:
# @title
#  Create Trainer and Fine-tune
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset
)
trainer.train()

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss
10,5.6577
20,5.212
30,5.3991
40,4.8304
50,3.9987
60,4.7984
70,4.4999
80,4.1463
90,4.0796
100,3.8551


TrainOutput(global_step=395, training_loss=3.7981371626069276, metrics={'train_runtime': 5414.7802, 'train_samples_per_second': 0.584, 'train_steps_per_second': 0.073, 'total_flos': 7.288728833753088e+16, 'train_loss': 3.7981371626069276, 'epoch': 1.0})

In [None]:
# @title
#  Save the Fine-tuned Model
trainer.save_model(output_dir)

Completed!!

# **Test**

In [None]:
# @title
final_checkpoint = f"{output_dir}/checkpoint-395"  # Change to your desired checkpoint

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(output_dir)

# Load the base model from the checkpoint
model = AutoModelForCausalLM.from_pretrained(final_checkpoint)

# Load the LoRA-adapted model
# Configure and attach adapters
lora_config = LoraConfig(
    r=8,  # Dimension of the adapters
    lora_alpha=16,  # Scaling factor for LoRA
    lora_dropout=0.1,  # Dropout rate for LoRA
    target_modules=["q_proj", "v_proj"],  # Specify modules to apply adapters
)

# Wrap model with PEFT
model = get_peft_model(model, lora_config)

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# Contextual prompt similar to training data
context = "hi, i'm krish.today i'm feeling very anxious, this has been a problem for a while now and i don't know how to cope with this. what should i do?."
input_text = f"Context: {context}\nResponse:"

input_ids = tokenizer.encode(input_text, return_tensors='pt').to('cuda')

# Generate attention mask
#attention_mask = (input_ids != tokenizer.pad_token_id).long().to('cuda')

# Set parameters for generation
outputs = model.generate(
    input_ids,
    #attention_mask=attention_mask,
    max_length=2000,  # Adjust length based on response needs
    num_return_sequences=1,
    do_sample=True,
    temperature=0.5,  # Higher temperature for varied responses
    top_k=50,
    top_p=0.9,
    pad_token_id=tokenizer.pad_token_id
)

response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response.split("Response:")[1].strip().split("\n")[0])

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


hi krish, i'm sorry to hear that you're feeling anxious. it's important to take some time for yourself and do things that make you feel calm and relaxed. it could be something as simple as taking a warm bath, going for a walk, or listening to your favorite music. it's also important to talk to someone you trust about how you're feeling. a friend, family member, or therapist can help you work through your feelings of anxiety and develop coping mechanisms.


# **UI**

In [None]:
!pip install flask-ngrok
!pip install flask_cors
!pip install pyngrok

Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Installing collected packages: flask-ngrok
Successfully installed flask-ngrok-0.0.25
Collecting flask_cors
  Downloading Flask_Cors-5.0.0-py2.py3-none-any.whl.metadata (5.5 kB)
Downloading Flask_Cors-5.0.0-py2.py3-none-any.whl (14 kB)
Installing collected packages: flask_cors
Successfully installed flask_cors-5.0.0
Collecting pyngrok
  Downloading pyngrok-7.2.0-py3-none-any.whl.metadata (7.4 kB)
Downloading pyngrok-7.2.0-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.0


In [None]:
from flask import Flask, request, jsonify, render_template_string
from flask_cors import CORS
from pyngrok import ngrok
from transformers import LlamaForCausalLM, LlamaTokenizer
import torch

app = Flask(__name__)
CORS(app)

# HTML template for the frontend with embedded CSS
HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Login Page</title>
    <style>
        body {
            font-family: 'Arial', sans-serif;
            background: linear-gradient(135deg, #6b18ff, #a47fff);
            height: 100vh;
            display: flex;
            justify-content: center;
            align-items: center;
            margin: 0;
        }

        .login-container {
            background-color: white;
            padding: 30px;
            border-radius: 15px;
            box-shadow: 0px 4px 15px rgba(0, 0, 0, 0.1);
            max-width: 350px;
            text-align: center;
        }

        h2 {
            font-size: 24px;
            margin-bottom: 20px;
            color: #ffffff;
            background: linear-gradient(135deg, #6b18ff, #a47fff);
            padding: 10px;
            border-radius: 10px;
        }

        input {
            padding: 12px 20px;
            margin: 10px 0;
            border-radius: 30px;
            border: 1px solid #ccc;
            outline: none;
            background-color: #f9f9f9;
            padding-left: 40px;
            font-size: 16px;
        }

        .input-icon {
            position: relative;
        }

        .input-icon input {
            padding-left: 40px;
        }

        .input-icon i {
            position: absolute;
            left: 10px;
            top: 50%;
            transform: translateY(-50%);
            color: #888;
            font-size: 18px;
        }

        .remember-me {
            display: flex;
            justify-content: space-between;
            font-size: 14px;
            margin: 10px 0;
            color: #888;
        }

        button {
            width: 100%;
            background-color: #6200ea;
            color: white;
            border: none;
            padding: 12px;
            border-radius: 30px;
            cursor: pointer;
            font-size: 16px;
            margin-top: 10px;
        }

        button:hover {
            background-color: #4500b5;
        }

        .register {
            font-size: 14px;
            margin-top: 20px;
        }

        .register a {
            color: #6200ea;
            text-decoration: none;
        }

        .register a:hover {
            text-decoration: underline;
        }

        .chatbot-container {
            background-color: #fff;
            padding: 20px;
            margin: 50px auto;
            border-radius: 10px;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
            max-width: 400px;
            text-align: center;
        }

        textarea {
            width: 90%;
            height: 100px;
            padding: 10px;
            margin: 10px 0;
            border-radius: 5px;
            border: 1px solid #ccc;
        }

        .response {
            margin-top: 20px;
            padding: 10px;
            background-color: #f8f9fa;
            border-radius: 5px;
            text-align: left;
            white-space: pre-wrap;
        }

    </style>
</head>
<body>

    <div class="login-container">
        <h2>Login</h2>
        <form id="login-form">
            <div class="input-icon">
                <input type="text" id="username" placeholder="Username" required>
                <i class="fa fa-user"></i>
            </div>
            <div class="input-icon">
                <input type="password" id="password" placeholder="Password" required>
                <i class="fa fa-lock"></i>
            </div>
            <div class="remember-me">
                <label><input type="checkbox"> Remember ME</label>
                <a href="#">Forgot Password?</a>
            </div>
            <button type="submit">Login</button>
            <div class="register">Don't have an account? <a href="#">Register</a></div>
        </form>
    </div>

    <div id="chatbot-container" style="display:none;" class="chatbot-container">
        <h2>Mental Health Assistant</h2>
        <form id="chatbot-form">
            <textarea id="prompt" placeholder="Enter your message..."></textarea>
            <button type="submit">Get Response</button>
        </form>
        <div class="response" id="response"></div>
    </div>

    <script>
        document.getElementById('login-form').onsubmit = function(event) {
            event.preventDefault();
            const username = document.getElementById('username').value;
            const password = document.getElementById('password').value;
            if (username === 'admin' && password === 'password') {
                document.querySelector('.login-container').style.display = 'none';
                document.querySelector('#chatbot-container').style.display = 'block';
            } else {
                alert('Invalid credentials!');
            }
        };

        document.getElementById('chatbot-form').onsubmit = async function(event) {
            event.preventDefault();
            const prompt = document.getElementById('prompt').value;
            const res = await fetch('/predict', {
              method: 'POST',
              headers: { 'Content-Type': 'application/json' },
              body: JSON.stringify({ prompt: prompt })
            });
            const data = await res.json();
            document.getElementById('response').innerText = data.response;
        };
    </script>
</body>
</html>
"""

@app.route('/')
def home():
    return render_template_string(HTML_TEMPLATE)

@app.route('/predict', methods=['POST'])
def predict():
    data = request.get_json()
    prompt = data['prompt']
    input_text = f"Context: {prompt}\nResponse:"
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to('cuda')

    with torch.no_grad():  # Disable gradient calculation
        outputs = model.generate(input_ids,
                                 max_length=200,  # Adjust length based on response needs
                                 num_return_sequences=1,
                                 do_sample=True,
                                 temperature=0.7,  # Adjust temperature for varied responses
                                 top_k=50,
                                 top_p=0.9,
                                 pad_token_id=tokenizer.pad_token_id)

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    generated_response = response.split("Response:")[1].strip() if "Response:" in response else response
    return jsonify({'response': generated_response})

# Run ngrok to expose the Flask app
ngrok.set_auth_token("2nDZlRYOiwO45PZhIjhGboGQ3S5_83H8WeoL776bx32s9Vukj")  # Optional: Set your ngrok auth token if needed
public_url = ngrok.connect(addr="http://localhost:5000", proto="http")
print(f"Public URL: {public_url}")

app.run(port=5000)


Public URL: NgrokTunnel: "https://24ad-34-125-11-58.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [28/Oct/2024 18:21:28] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [28/Oct/2024 18:21:28] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
INFO:werkzeug:127.0.0.1 - - [28/Oct/2024 18:22:49] "POST /predict HTTP/1.1" 200 -
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
INFO:werkzeug:127.0.0.1 - - [28/Oct/2024 18:27:24] "POST /predict HTTP/1.1" 200 -
The attention mask and the pad token id were not set. As a consequence, you may observe 