### Model 1: BlenderBot model on the Empathetic Dialogues

In [1]:
import torch
from torch.utils.data import DataLoader
from transformers import (
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    BlenderbotTokenizer,
    BlenderbotForConditionalGeneration,
    DataCollatorForSeq2Seq,
    AutoModelForSequenceClassification,
    AutoTokenizer
)
from datasets import DatasetDict, Dataset, load_dataset
import numpy as np
import pandas as pd
import random

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


### Load Dataset

In [3]:
dataset = load_dataset("facebook/empathetic_dialogues")

In [3]:
# dataset = load_dataset("empathetic_dialogues")
train_dataset = Dataset.from_file("./empathetic_dialogues-train.arrow")

val_dataset = Dataset.from_file("./empathetic_dialogues-validation.arrow")

test_dataset = Dataset.from_file("./empathetic_dialogues-test.arrow")

In [4]:
dataset = DatasetDict({
    "train": train_dataset,
    "validation": val_dataset,
    "test": test_dataset
})

In [5]:
train_dataset

Dataset({
    features: ['conv_id', 'utterance_idx', 'context', 'prompt', 'speaker_idx', 'utterance', 'selfeval', 'tags'],
    num_rows: 76673
})

In [6]:
train_dataset[0:5]

{'conv_id': ['hit:0_conv:1',
  'hit:0_conv:1',
  'hit:0_conv:1',
  'hit:0_conv:1',
  'hit:0_conv:1'],
 'utterance_idx': [1, 2, 3, 4, 5],
 'context': ['sentimental',
  'sentimental',
  'sentimental',
  'sentimental',
  'sentimental'],
 'prompt': ['I remember going to the fireworks with my best friend. There was a lot of people_comma_ but it only felt like us in the world.',
  'I remember going to the fireworks with my best friend. There was a lot of people_comma_ but it only felt like us in the world.',
  'I remember going to the fireworks with my best friend. There was a lot of people_comma_ but it only felt like us in the world.',
  'I remember going to the fireworks with my best friend. There was a lot of people_comma_ but it only felt like us in the world.',
  'I remember going to the fireworks with my best friend. There was a lot of people_comma_ but it only felt like us in the world.'],
 'speaker_idx': [1, 0, 1, 0, 1],
 'utterance': ['I remember going to see the fireworks with my 

### Preprocess dataset

Here, we process the dialogue dataset by cleaning the text and constructing contextual input-response pairs (tagged with emotions) based on speaker turns. This helps create a realistic conversational setup for training our model.

In [7]:
def process_empathetic_dataset(dataset_split, max_turns=4):
    def clean_text(text):
        replacements = {'_comma_': ',', '_period_': '.', '_exclamation_': '!'}
        for k, v in replacements.items():
            text = text.replace(k, v)
        return text.strip()

    df = pd.DataFrame(dataset_split)
    df = df.sort_values(by=['conv_id', 'utterance_idx']).reset_index(drop=True)

    pairs = []
    for conv_id, conv in df.groupby('conv_id'):
        history = []  # store (speaker, utterance)
        for i, row in conv.iterrows():
            utterance = clean_text(row['utterance'])
            speaker = row['speaker_idx']
            emotion = row['context']

            # Only create a pair if there's history and speaker has changed
            if history and speaker != history[-1][0]:
                # Extract only the utterances from history
                context_utts = [utt for _, utt in history[-max_turns:]]
                context = " [SEP] ".join(context_utts)
                input_text = f"<emotion={emotion}> [CONTEXT] {context} [USER]"
                
                pairs.append({
                    "input_text": input_text,
                    "response": utterance,
                    "emotion": emotion
                })

            history.append((speaker, utterance))

    return Dataset.from_pandas(pd.DataFrame(pairs))


In [8]:
# Apply to each split
dataset_processed = DatasetDict({
    "train": process_empathetic_dataset(dataset['train']),
    "validation": process_empathetic_dataset(dataset['validation']),
    "test": process_empathetic_dataset(dataset['test'])
})

In [9]:
dataset_processed['train'][0:5]

{'input_text': ['<emotion=sentimental> [CONTEXT] I remember going to see the fireworks with my best friend. It was the first time we ever spent time alone together. Although there was a lot of people, we felt like the only people in the world. [USER]',
  '<emotion=sentimental> [CONTEXT] I remember going to see the fireworks with my best friend. It was the first time we ever spent time alone together. Although there was a lot of people, we felt like the only people in the world. [SEP] Was this a friend you were in love with, or just a best friend? [USER]',
  '<emotion=sentimental> [CONTEXT] I remember going to see the fireworks with my best friend. It was the first time we ever spent time alone together. Although there was a lot of people, we felt like the only people in the world. [SEP] Was this a friend you were in love with, or just a best friend? [SEP] This was a best friend. I miss her. [USER]',
  '<emotion=sentimental> [CONTEXT] I remember going to see the fireworks with my best f

### Loading the model
- model - Blenderbot
- input tokenize max len - 128
- response tokenize max len - 64

In [33]:
model = BlenderbotForConditionalGeneration.from_pretrained("./blenderbot_local")
tokenizer = BlenderbotTokenizer.from_pretrained("./blenderbot_local")

loading configuration file ./blenderbot_local/config.json
Model config BlenderbotConfig {
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": true,
  "architectures": [
    "BlenderbotForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1280,
  "decoder_attention_heads": 32,
  "decoder_ffn_dim": 5120,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": true,
  "dropout": 0.1,
  "encoder_attention_heads": 32,
  "encoder_ffn_dim": 5120,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 2,
  "encoder_no_repeat_ngram_size": 3,
  "eos_token_id": 2,
  "extra_layer_norm": false,
  "extra_pos_embeddings": 0,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 2,
  "gradient_checkpointing": false,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": 

In [34]:
emotions = list(set(dataset_processed["train"]["emotion"]))
special_tokens = [f"<emotion={e}>" for e in emotions]
tokenizer.add_tokens(special_tokens)
model.resize_token_embeddings(len(tokenizer))

Adding <emotion=sad> to the vocabulary
Adding <emotion=jealous> to the vocabulary
Adding <emotion=devastated> to the vocabulary
Adding <emotion=ashamed> to the vocabulary
Adding <emotion=confident> to the vocabulary
Adding <emotion=embarrassed> to the vocabulary
Adding <emotion=content> to the vocabulary
Adding <emotion=hopeful> to the vocabulary
Adding <emotion=anticipating> to the vocabulary
Adding <emotion=furious> to the vocabulary
Adding <emotion=sentimental> to the vocabulary
Adding <emotion=annoyed> to the vocabulary
Adding <emotion=proud> to the vocabulary
Adding <emotion=surprised> to the vocabulary
Adding <emotion=trusting> to the vocabulary
Adding <emotion=grateful> to the vocabulary
Adding <emotion=disgusted> to the vocabulary
Adding <emotion=afraid> to the vocabulary
Adding <emotion=lonely> to the vocabulary
Adding <emotion=faithful> to the vocabulary
Adding <emotion=angry> to the vocabulary
Adding <emotion=nostalgic> to the vocabulary
Adding <emotion=joyful> to the vocabu

Embedding(8041, 1280)

In [None]:
# Tokenizing the dataset
def tokenize_fn(examples):
    inputs = tokenizer(
        examples["input_text"],
        max_length=128,
        truncation=True,
        padding="max_length",
        return_tensors="pt"
    )
    
    targets = tokenizer(
        examples["response"],
        max_length=64,
        truncation=True,
        padding="max_length",
        return_tensors="pt"
    )
    
    inputs["labels"] = targets["input_ids"]
    return inputs

In [None]:
tokenized_dataset['train'][0]

In [None]:
tokenized_dataset

In [92]:
tokenized_dataset = tokenized_dataset.remove_columns(["input_text", "response", "emotion"])

#### Loading from saved tokenizer on the disk

In [4]:
from datasets import load_from_disk

In [5]:
tokenized_dataset = load_from_disk("./Chatbot Training/blender_tokenized_dataset")

In [38]:
# model.to(device)

### Training the model

In [39]:
training_args = Seq2SeqTrainingArguments(
    output_dir="./blenderbot_empathetic",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=5,
    learning_rate=2e-5,
    warmup_steps=300,
    gradient_accumulation_steps=2,
    predict_with_generate=True,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=100,
    fp16=torch.cuda.is_available(),
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    load_best_model_at_end=True,
)
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using cuda_amp half precision backend


In [None]:
print(next(model.parameters()).device)

cuda:0


In [41]:
trainer.train()

***** Running training *****
  Num examples = 58829
  Num Epochs = 5
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 2
  Total optimization steps = 36770


Epoch,Training Loss,Validation Loss
1,0.4182,0.776847
2,0.3028,0.827595
3,0.2511,0.875128
4,0.2041,0.90935
5,0.1783,0.929904


***** Running Evaluation *****
  Num examples = 9266
  Batch size = 4
Saving model checkpoint to ./blenderbot_empathetic/checkpoint-7354
Configuration saved in ./blenderbot_empathetic/checkpoint-7354/config.json
Model weights saved in ./blenderbot_empathetic/checkpoint-7354/pytorch_model.bin
tokenizer config file saved in ./blenderbot_empathetic/checkpoint-7354/tokenizer_config.json
Special tokens file saved in ./blenderbot_empathetic/checkpoint-7354/special_tokens_map.json
added tokens file saved in ./blenderbot_empathetic/checkpoint-7354/added_tokens.json
***** Running Evaluation *****
  Num examples = 9266
  Batch size = 4
Saving model checkpoint to ./blenderbot_empathetic/checkpoint-14708
Configuration saved in ./blenderbot_empathetic/checkpoint-14708/config.json
Model weights saved in ./blenderbot_empathetic/checkpoint-14708/pytorch_model.bin
tokenizer config file saved in ./blenderbot_empathetic/checkpoint-14708/tokenizer_config.json
Special tokens file saved in ./blenderbot_empa

TrainOutput(global_step=36770, training_loss=0.30277411930581166, metrics={'train_runtime': 5194.0889, 'train_samples_per_second': 56.631, 'train_steps_per_second': 7.079, 'total_flos': 8.00205363707904e+16, 'train_loss': 0.30277411930581166, 'epoch': 5.0})

In [42]:
# trainer.train(resume_from_checkpoint=True)

The history saving thread hit an unexpected error (OperationalError('unable to open database file')).History will not be written to the database.


### Saving the model

In [46]:
trainer.save_model("./blender_empathetic_final")
tokenizer.save_pretrained("./blender_empathetic_final")

Saving model checkpoint to ./blender_empathetic_final
Configuration saved in ./blender_empathetic_final/config.json
Model weights saved in ./blender_empathetic_final/pytorch_model.bin
tokenizer config file saved in ./blender_empathetic_final/tokenizer_config.json
Special tokens file saved in ./blender_empathetic_final/special_tokens_map.json
added tokens file saved in ./blender_empathetic_final/added_tokens.json
tokenizer config file saved in ./blender_empathetic_final/tokenizer_config.json
Special tokens file saved in ./blender_empathetic_final/special_tokens_map.json
added tokens file saved in ./blender_empathetic_final/added_tokens.json


('./blender_empathetic_final/tokenizer_config.json',
 './blender_empathetic_final/special_tokens_map.json',
 './blender_empathetic_final/vocab.json',
 './blender_empathetic_final/merges.txt',
 './blender_empathetic_final/added_tokens.json')

In [6]:
model_path = "./Chatbot Training/blender_empathetic_final/"
tokenizer_final = BlenderbotTokenizer.from_pretrained(model_path)
model_final = BlenderbotForConditionalGeneration.from_pretrained(model_path)

In [17]:
device

device(type='cpu')

In [18]:
model_final.to(device)

BlenderbotForConditionalGeneration(
  (model): BlenderbotModel(
    (shared): BlenderbotScaledWordEmbedding(8041, 1280, padding_idx=0)
    (encoder): BlenderbotEncoder(
      (embed_tokens): BlenderbotScaledWordEmbedding(8041, 1280, padding_idx=0)
      (embed_positions): BlenderbotLearnedPositionalEmbedding(128, 1280)
      (layers): ModuleList(
        (0-1): 2 x BlenderbotEncoderLayer(
          (self_attn): BlenderbotAttention(
            (k_proj): Linear(in_features=1280, out_features=1280, bias=True)
            (v_proj): Linear(in_features=1280, out_features=1280, bias=True)
            (q_proj): Linear(in_features=1280, out_features=1280, bias=True)
            (out_proj): Linear(in_features=1280, out_features=1280, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1280, out_features=5120, bias=True)
          (fc2): Linear(in_features=5

### Loading our pre-trained Roberta large model for emotion detection

In [19]:
# Load emotion classifier for inference only
from transformers import RobertaForSequenceClassification,RobertaTokenizer
import torch.nn.functional as F

# 1a) Load a pretrained emotion detector
emo_tokenizer = RobertaTokenizer.from_pretrained("./rob-large-emotion-detector_dedupe/")
emo_model     = RobertaForSequenceClassification.from_pretrained("./rob-large-emotion-detector_dedupe/")
emo_model.to(device)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=Tru

In [20]:
def detect_emotion(text: str) -> str:
    inputs = emo_tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        max_length=128
    ).to(device)
    
    logits = emo_model(**inputs).logits
    probs  = F.softmax(logits, dim=-1)
    idx    = probs.argmax(dim=-1).item()
    
    return id_to_emotion[idx]

In [21]:
emotions = [
    'jealous', 'furious', 'disgusted', 'nostalgic', 'impressed', 'faithful',
    'caring', 'confident', 'guilty', 'angry', 'disappointed', 'sentimental',
    'anxious', 'annoyed', 'embarrassed', 'terrified', 'apprehensive', 'grateful',
    'sad', 'afraid', 'ashamed', 'devastated', 'joyful', 'hopeful', 'lonely',
    'prepared', 'trusting', 'anticipating', 'excited', 'surprised', 'content', 'proud'
]

emotion_to_id = {emotion: idx for idx, emotion in enumerate(emotions)}
id_to_emotion = {idx: emotion for emotion, idx in emotion_to_id.items()}


#### Using a pretrained model to detect distress

In [22]:
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
  import torch

  tox_tokenizer = AutoTokenizer.from_pretrained("sentinet/suicidality")
  tox_model = AutoModelForSequenceClassification.from_pretrained("sentinet/suicidality")

In [23]:
import torch
import random

# Responses
MILD_RESPONSES = [
    "💙 It sounds like you're going through a tough time. You're not alone.",
    "🫶 I'm really sorry you're feeling this way. Please know that help is available.",
    "🌻 You matter. Please reach out to someone you trust or a professional.",
    "🌸 I'm here for you. Talking to a counselor can really help in moments like these."
]

EXTREME_RESPONSES = [
    "🚨 I'm deeply concerned about your safety. Please talk to a mental health professional or call a crisis hotline immediately.",
    "⚠️ It sounds like you're in a lot of pain. I'm not a crisis service, but you're not alone — please reach out to a counselor or crisis line now.",
    "⛑️ I'm just a support tool and not equipped to help in a crisis. Please talk to a licensed mental health professional right away."
]

HIGH_SEVERITY_KEYWORDS = {"kill myself", "end of me", "want to die", "suicide", "die", "can't go on", "ending it all", "not worth living"}


# Safety detection function
def detect_distress_and_severity(text, model, tokenizer, model_threshold=0.7):
    # Model-based distress detection
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    probs = torch.softmax(outputs.logits, dim=1)
    distress_score = probs[0][1].item()

    # Heuristic severity detection
    text_lower = text.lower()
    high_severity_flag = any(phrase in text_lower for phrase in HIGH_SEVERITY_KEYWORDS)

    is_distressed = distress_score > model_threshold
    severity = "extreme" if high_severity_flag else "mild" if is_distressed else "none"

    return severity

# # safe fallback response
# def safe_fallback_response():
#     return random.choice(SAFE_RESPONSES)

def safety_response_handler(text):
    severity = detect_distress_and_severity(text, tox_model, tox_tokenizer)

    if severity == "extreme":
        return random.choice(EXTREME_RESPONSES)
    elif severity == "mild":
        return random.choice(MILD_RESPONSES)
    else:
        return None  # safe to proceed with normal response generation


In [None]:
import random
import requests
from transformers import pipeline

def adjust_response(response, emotion):
    
    emotion_responses = {
        'furious': ["😡 That's infuriating! ", "💢 This is unacceptable! "],
        'proud': ["🏆 Incredible achievement! ", "👏 You should be proud! "],
        'nostalgic': ["🕰️ Reminiscing can be powerful. ", "📻 Those memories matter. "],
        'jealous': ["💚 It's natural to feel this way. ", "🤢 Jealousy is tough. "],
        'anticipating': ["⏳ The wait must be intense. ", "🔮 Exciting things ahead! "],
        'sentimental': ["📜 Those feelings are valid. ", "💌 Heartfelt moments. "],
        'grateful': ["🙏 Gratitude changes everything. ", "🌈 Appreciation is beautiful. "],
        'caring': ["💖 Your compassion shines. ", "🤗 Kindness matters. "],
        'hopeful': ["🌟 Hope fuels progress. ", "🔭 Looking forward with you. "],
        'devastated': ["💔 This is heartbreaking. ", "🕯️ I'm here in this pain. "],
        'terrified': ["😱 That sounds terrifying! ", "🛡️ Let's find safety. "],
        'ashamed': ["😞 These feelings are valid. ", "🛑 You're safe here. "],
    }

    # Default fallbacks
    default_prefixes = {
        'positive': "😊 ",
        'negative': "😟 ",
        'neutral': "🤖 "
    }
    
    # We get prefix based on emotion, falling back to the appropriate default
    if emotion in emotion_responses:
        prefix = random.choice(emotion_responses[emotion])
    else:
        # We Choose default prefix based on emotion type
        if emotion in ['joyful', 'excited', 'confident']:
            prefix = default_prefixes['positive']
        elif emotion in ['sad', 'anxious', 'guilty']:
            prefix = default_prefixes['negative']
        else:
            prefix = default_prefixes['neutral']
    
    return f"{prefix}{response}"



In [45]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [None]:
def chat():
    conversation_turns = []
    print("Empathetic Chatbot (type 'exit' to quit)")

    while True:
        prompt = input("\nYou: ").strip()
        if prompt.lower() == 'exit':
            print("\nBot: Goodbye! Take care. 😊")
            break
        
        response = safety_response_handler(prompt)
        if response:
            print(f"Bot: {response}")
            continue
    
        # Automatically detect emotion
        emotion = detect_emotion(user_input)
        print(f"→ Detected emotion: {emotion}")

        # Building conversation history
        conversation_history = " ".join(conversation_turns)
        if conversation_history:
            input_text = f"<emotion={emotion}> [CONTEXT] {conversation_history}"
        else:
            input_text = f"<emotion={emotion}> [USER] {prompt}"

        inputs = tokenizer_final(
            input_text,
            return_tensors="pt",
            max_length=512,
            truncation=True
        ).to(device)

        input_len = inputs["input_ids"].shape[1]
        if input_len > 500:
            print(f"⚠️  Warning: input length {input_len} is very close to max limit.")

        outputs = model_final.generate(
            **inputs,
#             max_length=128,
            max_new_tokens=64,
            temperature=0.9,
            top_k=50,
            top_p=0.95,
            repetition_penalty=2.0,
            num_beams=4,
            do_sample=True,
            no_repeat_ngram_size=2,
            length_penalty=0.9,
            early_stopping=True
        )

        response = tokenizer_final.decode(outputs[0], skip_special_tokens=True)
        adjusted_response = adjust_response(response, emotion)

        # Update context
        conversation_turns.append(f"[USER] {prompt} [BOT] {adjusted_response}")
        if len(conversation_turns) > 3:
            conversation_turns = conversation_turns[-3:]
        print(f"\nBot: {adjusted_response}")


In [33]:
chat()

Empathetic Chatbot (type 'exit' to quit)



You:  I feel really lonely you know. It's been difficult


→ Detected emotion: afraid

Bot: 🤖  I'm sorry to hear that. What's going on?



You:  It's been really difficult to do things alone. I feel anxious


→ Detected emotion: afraid

Bot: 🤖  I have no one to hang out with and my friends ditched me.



You:  yes exactly. That's what I feel


→ Detected emotion: afraid

Bot: 🤖  I'm so sorry. That sounds really tough.



You:  I feel I am in a very dark place. I think something is wrong with me


Bot: 💙 It sounds like you're going through a tough time. You're not alone.



You:  I feel like hurting myself is the right answer. I guess it will make me feel something


Bot: 💙 It sounds like you're going through a tough time. You're not alone.



You:  I really think killing myself is the answer


Bot: 💙 It sounds like you're going through a tough time. You're not alone.



You:  I always think about ending it all


Bot: 🚨 I'm deeply concerned about your safety. Please talk to a mental health professional or call a crisis hotline immediately.



You:  I think this life is not worth living.


Bot: ⛑️ I'm just a support tool and not equipped to help in a crisis. Please talk to a licensed mental health professional right away.



You:  exit



Bot: Goodbye! Take care. 😊
