### DialoGPT-Medium Chatbot with Custom Emotion classifier (Roberta Large) and Safety Filters

In [1]:
import torch
from torch.utils.data import DataLoader
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling
)
from datasets import Dataset, DatasetDict
import numpy as np
import pandas as pd
import math


In [39]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [3]:
#Load EmpatheticDialogues splits
from datasets import load_dataset
dataset = load_dataset("empathetic_dialogues")


In [None]:
# process_empathetic_dataset → returns input_bart, response, emotion
def process_empathetic_dataset(dataset_split, max_turns=3):
    def clean_text(text):
        return (
            text.replace('_comma_', ',')
                .replace('_period_', '.')
                .replace('_exclamation_', '!')
                .strip()
        )

    df = pd.DataFrame(dataset_split)
    df = df.sort_values(by=['conv_id', 'utterance_idx']).reset_index(drop=True)

    pairs = []
    for conv_id, conv in df.groupby('conv_id'):
        history = []
        for _, row in conv.iterrows():
            utterance = clean_text(row['utterance'])
            emotion  = row['context']
            speaker  = row['speaker_idx']

            if history:
                truncated = history[-max_turns:]
                context_str = " ".join(truncated)
                input_bart = f"<emotion={emotion}> {context_str}"
                pairs.append({
                    "input_bart": input_bart,
                    "response": utterance,
                    "emotion": emotion
                })
            history.append(f"[Speaker {speaker}] {utterance}")

    return Dataset.from_pandas(pd.DataFrame(pairs))

In [None]:
# Applying processing to each split
dataset_processed = DatasetDict({
    "train":      process_empathetic_dataset(dataset["train"]),
    "validation": process_empathetic_dataset(dataset["validation"]),
    "test":       process_empathetic_dataset(dataset["test"])
})


In [None]:
# Tokenizer & DialoGPT-medium Model initialization
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
# ensuring there is a pad token
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
model.resize_token_embeddings(len(tokenizer))


tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/863M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Embedding(50257, 1024)

In [None]:
# Tokenization function for causal LM
def tokenize_fn(example):
    # concatenate input_bart, response, and EOS so model sees full context→reply
    texts = [
        inp + tokenizer.eos_token + resp + tokenizer.eos_token
        for inp, resp in zip(example["input_bart"], example["response"])
    ]
    batch = tokenizer(
        texts,
        max_length=128,
        padding="max_length",
        truncation=True
    )
    # causal LM: labels = input_ids so each token predicts next
    batch["labels"] = batch["input_ids"].copy()
    return batch

tokenized_dataset = dataset_processed.map(tokenize_fn, batched=True)


Map:   0%|          | 0/58829 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/863M [00:00<?, ?B/s]

Map:   0%|          | 0/9267 [00:00<?, ? examples/s]

Map:   0%|          | 0/8401 [00:00<?, ? examples/s]

In [None]:
# Removing raw columns
tokenized_dataset = tokenized_dataset.remove_columns(["input_bart", "response", "emotion"])


In [None]:
# Trainer setup
training_args = TrainingArguments(
    output_dir="./dialogpt_empathetic",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=500,         # log every 500 steps
    save_steps=1000,           # save a checkpoint every 1k steps
    warmup_steps=200,
    learning_rate=5e-5,
    fp16=True
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator
)


  trainer = Trainer(


In [None]:
# Train
trainer.train()


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


wandb: Paste an API key from your profile and hit enter:

 ··········


wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mgaurav-ksinghyt[0m ([33mgaurav-ksinghyt-university-at-buffalo[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
500,2.7575
1000,2.2433
1500,2.1727
2000,2.1154
2500,2.0716
3000,2.021
3500,1.9902
4000,1.8807
4500,1.8081
5000,1.7877


TrainOutput(global_step=11031, training_loss=1.8389444106531407, metrics={'train_runtime': 4978.7281, 'train_samples_per_second': 35.448, 'train_steps_per_second': 2.216, 'total_flos': 4.097589986879078e+16, 'train_loss': 1.8389444106531407, 'epoch': 3.0})

In [None]:
# Evaluation (perplexity)
eval_results = trainer.evaluate()
ppl = math.exp(eval_results["eval_loss"])
print(f"Validation Perplexity → {ppl:.2f}")


Validation Perplexity → 11.60


In [None]:
# Interactive empathetic chat (auto emotion)
import re

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

SYSTEM = "You are a caring, empathetic assistant. Always validate the user’s feelings before responding."
chat_history = ""

while True:
    user_input = input("You (/reset to clear, /exit to quit): ").strip()
    if user_input.lower() == "/exit":
        print("Bot: Goodbye! Take care!")
        break
    if user_input.lower() == "/reset":
        chat_history = ""
        print("→ Conversation history cleared.")
        continue
    if not user_input:
        continue

    # Automatically detect emotion
    emotion = detect_emotion(user_input)
    print(f"→ Detected emotion: {emotion}")

    # Build the same <emotion=…> prompt you trained on
    prompt = SYSTEM + "\n" + f"<emotion={emotion}> {chat_history} [Speaker 0] {user_input}"
    inputs = tokenizer(
        prompt + tokenizer.eos_token,
        return_tensors="pt",
        truncation=True,
        max_length=128
    ).to(device)

    outputs = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_new_tokens=64,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
        do_sample=True,
        top_k=50,
        top_p=0.8,
        temperature=0.8,
        repetition_penalty=1.2,
        no_repeat_ngram_size=3,
        early_stopping=True
    )

    # Decode only the newly generated tokens
    gen_ids = outputs[0][ inputs["input_ids"].shape[-1] : ]
    raw    = tokenizer.decode(gen_ids, skip_special_tokens=True)
    cleaned= re.split(r"\[Speaker", raw)[0].strip()

    # Update history and display
    chat_history += f"[Speaker 0] {user_input} [Speaker 1] {cleaned} "
    print("Bot:", cleaned)


You (/reset to clear, /exit to quit): I just found out my dog passed away.
→ Detected emotion: sadness
Bot: I am so sorry to hear that.  Is it serious?  You must be devastated.  Are you doing alright?
You (/reset to clear, /exit to quit): I'm very sad. He was my buddy
→ Detected emotion: sadness
Bot: I'm so sorry.  Have you been able to cope with your loss?  How did he pass?  I hope you have good memories of him.  Do you know what caused It's hard.  We all lose our pets at some point of time.  I'm sorry for your loss.  Just
You (/reset to clear, /exit to quit): /reset
→ Conversation history cleared.
You (/reset to clear, /exit to quit): I got accepted into my dream school!
→ Detected emotion: joy
Bot: Oh, that's great.  Congratulations!  What are you studying?
You (/reset to clear, /exit to quit): computer Science with focus on AI
→ Detected emotion: neutral
Bot: Nice!  That's exciting.  Good luck!  I'm sure you will do well!  Just remember that sometimes the best way to learn is throu

In [None]:

# Saving fine-tuned model & tokenizer
output_dir = "./dialogpt_empathetic_final_medium"
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"Model and tokenizer saved to {output_dir}")


Model and tokenizer saved to ./dialogpt_empathetic_final_medium


In [None]:
# Loading emotion classifier
from transformers import RobertaForSequenceClassification,RobertaTokenizer
import torch.nn.functional as F

# 1a) Load a pretrained emotion detector
emo_tokenizer = RobertaTokenizer.from_pretrained("./rob-large-emotion-detector_dedupe/")
emo_model     = RobertaForSequenceClassification.from_pretrained("./rob-large-emotion-detector_dedupe/")
emo_model.to(device)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=Tru

In [41]:
def detect_emotion(text: str) -> str:
    inputs = emo_tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        max_length=128
    ).to(device)
    
    logits = emo_model(**inputs).logits
    probs  = F.softmax(logits, dim=-1)
    idx    = probs.argmax(dim=-1).item()
    
    return id_to_emotion[idx]

In [42]:
model_path = "./DialoGPTmedium/dialogpt_empathetic_final_medium/"
tokenizer_final = AutoTokenizer.from_pretrained(model_path)
model_final = AutoModelForCausalLM.from_pretrained(model_path)

In [43]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

tox_tokenizer = AutoTokenizer.from_pretrained("sentinet/suicidality")
tox_model = AutoModelForSequenceClassification.from_pretrained("sentinet/suicidality")

In [44]:
emotions = [
    'jealous', 'furious', 'disgusted', 'nostalgic', 'impressed', 'faithful',
    'caring', 'confident', 'guilty', 'angry', 'disappointed', 'sentimental',
    'anxious', 'annoyed', 'embarrassed', 'terrified', 'apprehensive', 'grateful',
    'sad', 'afraid', 'ashamed', 'devastated', 'joyful', 'hopeful', 'lonely',
    'prepared', 'trusting', 'anticipating', 'excited', 'surprised', 'content', 'proud'
]

emotion_to_id = {emotion: idx for idx, emotion in enumerate(emotions)}
id_to_emotion = {idx: emotion for emotion, idx in emotion_to_id.items()}


In [46]:
import torch
import random

# Responses
MILD_RESPONSES = [
    "💙 It sounds like you're going through a tough time. You're not alone.",
    "🫶 I'm really sorry you're feeling this way. Please know that help is available.",
    "🌻 You matter. Please reach out to someone you trust or a professional.",
    "🌸 I'm here for you. Talking to a counselor can really help in moments like these."
]

EXTREME_RESPONSES = [
    "🚨 I'm deeply concerned about your safety. Please talk to a mental health professional or call a crisis hotline immediately.",
    "⚠️ It sounds like you're in a lot of pain. I'm not a crisis service, but you're not alone — please reach out to a counselor or crisis line now.",
    "⛑️ I'm just a support tool and not equipped to help in a crisis. Please talk to a licensed mental health professional right away."
]

HIGH_SEVERITY_KEYWORDS = {"kill myself", "end of me", "want to die", "suicide", "die", "can't go on", "ending it all", "not worth living"}


# Safety detection function
def detect_distress_and_severity(text, model, tokenizer, model_threshold=0.7):
    # Model-based distress detection
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    probs = torch.softmax(outputs.logits, dim=1)
    distress_score = probs[0][1].item()

    # Heuristic severity detection
    text_lower = text.lower()
    high_severity_flag = any(phrase in text_lower for phrase in HIGH_SEVERITY_KEYWORDS)

    is_distressed = distress_score > model_threshold
    severity = "extreme" if high_severity_flag else "mild" if is_distressed else "none"

    return severity

# # safe fallback response
# def safe_fallback_response():
#     return random.choice(SAFE_RESPONSES)

def safety_response_handler(text):
    severity = detect_distress_and_severity(text, tox_model, tox_tokenizer)

    if severity == "extreme":
        return random.choice(EXTREME_RESPONSES)
    elif severity == "mild":
        return random.choice(MILD_RESPONSES)
    else:
        return None  # safe to proceed with normal response generation


In [47]:
# Interactive empathetic chat
import re

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_final.to(device)

SYSTEM = "You are a caring, empathetic assistant. Always validate the user’s feelings before responding."
chat_history = ""

while True:
    user_input = input("You (/reset to clear, /exit to quit): ").strip()
    if user_input.lower() == "/exit":
        print("Bot: Goodbye! Take care!")
        break
    if user_input.lower() == "/reset":
        chat_history = ""
        print("→ Conversation history cleared.")
        continue
    if not user_input:
        continue
    
    response = safety_response_handler(user_input)

    if response:
        print(f"Bot: {response}")
        continue
    
    # Automatically detect emotion
    emotion = detect_emotion(user_input)
    print(f"→ Detected emotion: {emotion}")

    # Build the same <emotion=…> prompt you trained on
    prompt = SYSTEM + "\n" + f"<emotion={emotion}> {chat_history} [Speaker 0] {user_input}"
    inputs = tokenizer_final(
        prompt + tokenizer_final.eos_token,
        return_tensors="pt",
        truncation=True,
        max_length=128
    ).to(device)

    outputs = model_final.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_new_tokens=64,
        pad_token_id=tokenizer_final.eos_token_id,
        eos_token_id=tokenizer_final.eos_token_id,
        do_sample=True,
        top_k=50,
        top_p=0.8,
        temperature=0.8,
        repetition_penalty=1.2,
        no_repeat_ngram_size=3,
        early_stopping=True
    )

    # Decode only the newly generated tokens
    gen_ids = outputs[0][ inputs["input_ids"].shape[-1] : ]
    raw    = tokenizer_final.decode(gen_ids, skip_special_tokens=True)
    cleaned= re.split(r"\[Speaker", raw)[0].strip()

    # Update history and display
    chat_history += f"[Speaker 0] {user_input} [Speaker 1] {cleaned} "
    print("Bot:", cleaned)


You (/reset to clear, /exit to quit):  I'm really excited about the upcoming trip!


→ Detected emotion: excited




Bot: Oh, what are you going to do?  Where are you headed?  Are you flying or driving?  What are your plans?  How long are you staying?  Do you have any family you can call?  I hope you have fun! :)  What's the temperature where you are? [Spe


You (/reset to clear, /exit to quit):  You seem really excited for me. I am going to Maldives for my vacation. Probably going to go sight-seeing


→ Detected emotion: excited
Bot: We're driving to Austin Texas. It will be over 2 weeks. I think it! I am taking my boyfriend with me. He is in a car. I'll take him. So exciting. We're going on a roadtrip. He's going to see all of course. Maybe he's friends. I want


You (/reset to clear, /exit to quit):  /reset


→ Conversation history cleared.


You (/reset to clear, /exit to quit):  I feel really lonely you know. It's been difficult


→ Detected emotion: lonely
Bot: I understand. Have you thought about seeking out other friends? Maybe you should try online dating? Or see if there is any chance of meeting some new people. You can't hurt yourself if you're happy! :)-


You (/reset to clear, /exit to quit):  That's really a good advice. But it's really difficult to do things alone


→ Detected emotion: caring
Bot: It is. I hope you find someone who feels the Good luck to you. :) If not all you need is to stay strong and keep your mind busy and be patient. You will find someone soon enough to meet some new. It will happen. If you get a partner you a good partner. :) :) :) LOL


You (/reset to clear, /exit to quit):  That's a very interesting way to put it. Thanks


→ Detected emotion: impressed
Bot: I just keep going to try to meet new places and share the joy with others! :) It'll be better soon :) Good luck. Have fun! :) :) :D I hope it goes by! :) [Speak you have a nice day :) :) :) You will have a beautiful day! :) I'm feeling ok


You (/reset to clear, /exit to quit):  /reset


→ Conversation history cleared.


You (/reset to clear, /exit to quit):  I have been having some tough days. 


→ Detected emotion: sad
Bot: I am sorry. Have you found someone to help? Maybe they can relate. :)


You (/reset to clear, /exit to quit):  I feel I am in a very dark place. I think something is wrong with me


Bot: 🌻 You matter. Please reach out to someone you trust or a professional.


You (/reset to clear, /exit to quit):  I feel like hurting myself is the right answer. I guess it will make me feel something


Bot: 🌸 I'm here for you. Talking to a counselor can really help in moments like these.


You (/reset to clear, /exit to quit):  What can a counselor understand ? I really think killing myself is the answer


Bot: 🫶 I'm really sorry you're feeling this way. Please know that help is available.


You (/reset to clear, /exit to quit):  Who is gonna help me


Bot: 🌻 You matter. Please reach out to someone you trust or a professional.


You (/reset to clear, /exit to quit):  No one


→ Detected emotion: lonely
Bot: I hope you find someone!  Good luck.  Maybe you can find a new hobby that will be more fulfilling for you.  Just take it slow and enjoy the peace. :) You deserve it!  You should be proud of yourself! :)  It's always lonliness is good to be proud and encourage


You (/reset to clear, /exit to quit):  /exit


Bot: Goodbye! Take care!
