In [3]:
from datasets import load_dataset
import pandas as pd
from transformers import RobertaForMaskedLM, RobertaTokenizer, RobertaConfig
from transformers import TextDataset, DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments

In [57]:
dataset = load_dataset("heliosbrahma/mental_health_conversational_dataset")
list_pairs=[]
for i in dataset["train"]:

    parts = i['text'].split("<<<ASSISTANT>>>:")

    # Extract the human and assistant parts
    human_part = parts[0].strip()  # Remove leading/trailing whitespaces
    assistant_part = parts[1].strip()  # Remove leading/trailing whitespaces
    list_pairs.append((human_part.split("<<<HUMAN>>>:")[1]+" "+assistant_part))
list_pairs    

[' What is a panic attack? Panic attacks come on suddenly and involve intense and often overwhelming fear. They’re accompanied by very challenging physical symptoms, like a racing heartbeat, shortness of breath, or nausea. Unexpected panic attacks occur without an obvious cause. Expected panic attacks are cued by external stressors, like phobias. Panic attacks can happen to anyone, but having more than one may be a sign of panic disorder, a mental health condition characterized by sudden and repeated panic attacks.',
 ' What are symptoms of panic attack vs. anxiety attack? Panic and anxiety attacks may feel similar, and they share a lot of emotional and physical symptoms. You can experience both an anxiety and a panic attack at the same time. For instance, you might experience anxiety while worrying about a potentially stressful situation, like an important presentation at work. When the situation arrives, anxiety may culminate in a panic attack.\n\nA panic attack or anxiety attack can

In [4]:
# Load pre-trained RoBERTa model and tokenizer
model_name = "roberta-base"
model = RobertaForMaskedLM.from_pretrained(model_name)
tokenizer = RobertaTokenizer.from_pretrained(model_name)


In [61]:
# Tokenize and process the dataset
train_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="dataset.txt",
    block_size=128
)

# Create data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=True  # RoBERTa uses masked language modeling
)

# Training arguments
training_args = TrainingArguments(
    output_dir="./FineTuneRoBERTa",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    save_steps=10_000,
    save_total_limit=2,
)

# Create Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset
)

# Fine-tune the model
trainer.train()




Step,Training Loss


TrainOutput(global_step=204, training_loss=0.1912691265928979, metrics={'train_runtime': 70.6213, 'train_samples_per_second': 11.427, 'train_steps_per_second': 2.889, 'total_flos': 53113807219968.0, 'train_loss': 0.1912691265928979, 'epoch': 3.0})

In [62]:
model.save_pretrained(training_args.output_dir)

tokenizer.save_pretrained(training_args.output_dir)



('./FineTuneRoBERTa/tokenizer_config.json',
 './FineTuneRoBERTa/special_tokens_map.json',
 './FineTuneRoBERTa/vocab.json',
 './FineTuneRoBERTa/merges.txt',
 './FineTuneRoBERTa/added_tokens.json')

In [44]:
import tkinter as tk



model_name = "./FineTuneRoBERTa"  
model = RobertaForMaskedLM.from_pretrained(model_name)
tokenizer = RobertaTokenizer.from_pretrained(model_name)

# Function to generate a response
def generate_response(prompt, max_length=100):
    input_ids = tokenizer.encode(prompt, return_tensors="pt")

    # Generate response
    output = model.generate(
        input_ids,
        max_length=max_length,
        num_beams=5,
        no_repeat_ngram_size=2,
        top_k=50,
        top_p=0.95,
        temperature=0.7
    )

    # Decode and return the generated text without repeating the user's input
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    response = response.replace(prompt, "", 1).strip()  # Remove user's input from the response

    # Remove the default "How can I help?" prefix
    response = response.split("?", 1)[-1].strip()

    return response

# Function to handle user input and update the chat window
def handle_user_input(event=None):
    user_input = entry.get()
    if user_input:
        response = generate_response(user_input)
        chat_history.config(state=tk.NORMAL)
        chat_history.insert(tk.END, f"You: {user_input}\nBot: {response}\n\n")
        chat_history.config(state=tk.DISABLED)
        entry.delete(0, tk.END)

# Create the main GUI window
root = tk.Tk()
root.title("Chatbot GUI")

# Create and configure GUI elements
chat_history = tk.Text(root, height=20, width=50, state=tk.DISABLED)
chat_history.pack()

entry = tk.Entry(root, width=50)
entry.pack(pady=10)
entry.bind("<Return>", handle_user_input)  # Bind the handle_user_input function to the Return key

send_button = tk.Button(root, text="Send", command=handle_user_input)
send_button.pack()

# Run the GUI
root.mainloop()




In [5]:
import tkinter as tk
from tkinter import scrolledtext

class ChatApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Chatbot Messenger")
        self.create_widgets()
        self.model_name = "./FineTuneRoBERTa"  # Replace with the path to your fine-tuned model directory
        self.model = RobertaForMaskedLM.from_pretrained(model_name)
        self.tokenizer = RobertaTokenizer.from_pretrained(model_name)

    def create_widgets(self):
        self.chat_history = scrolledtext.ScrolledText(self.root, wrap=tk.WORD, width=50, height=20)
        self.chat_history.pack(pady=10)

        self.entry = tk.Entry(self.root, width=40)
        self.entry.pack(pady=10)
        self.entry.bind("<Return>", self.handle_user_input)

        self.send_button = tk.Button(self.root, text="Send", command=self.handle_user_input)
        self.send_button.pack(pady=10)

    def generate_response(self,prompt, max_length=100):
        input_ids = tokenizer.encode(prompt, return_tensors="pt")
        # Generate response
        output = model.generate(
            input_ids,
            max_length=max_length,
            num_beams=5,
            no_repeat_ngram_size=2,
            top_k=50,
            top_p=0.95,
            temperature=0.7,
            pad_token_id=self.tokenizer.eos_token_id
            )

        # Decode and return the generated text without repeating the user's input
        response = tokenizer.decode(output[0], skip_special_tokens=True)
        response = response.replace(prompt, "", 1).strip()  # Remove user's input from the response

        # Remove the default "How can I help?" prefix
        response = response.split("?", 1)[-1].strip()
        return response

    def handle_user_input(self, event=None):
        user_input = self.entry.get()
        if user_input:
            response = self.generate_response(user_input)
            self.update_chat_history(f"You: {user_input}\n")
            self.update_chat_history(f"Bot: {response}\n", bot=True)
            self.entry.delete(0, tk.END)

    def update_chat_history(self, message, bot=False):
        self.chat_history.config(state=tk.NORMAL)
        if bot:
            self.chat_history.tag_configure("bot", justify="left", foreground="blue")
            self.chat_history.insert(tk.END, message, "bot")
        else:
            self.chat_history.tag_configure("user", justify="right", foreground="green")
            self.chat_history.insert(tk.END, message, "user")
        self.chat_history.see(tk.END)
        self.chat_history.config(state=tk.DISABLED)

if __name__ == "__main__":
    root = tk.Tk()
    app = ChatApp(root)
    root.mainloop()


Exception in Tkinter callback
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/tkinter/__init__.py", line 1892, in __call__
    return self.func(*args)
  File "/var/folders/71/0q0jw7j15_7__5qvqmbhmjbw0000gn/T/ipykernel_32726/3723666327.py", line 49, in handle_user_input
    response = self.generate_response(user_input)
  File "/var/folders/71/0q0jw7j15_7__5qvqmbhmjbw0000gn/T/ipykernel_32726/3723666327.py", line 27, in generate_response
    output = model.generate(
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
    return func(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/transformers/generation/utils.py", line 1460, in generate
    self._validate_model_class()
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/transformers/generation/utils.py

In [3]:
from transformers import RobertaForCausalLM, RobertaTokenizer


model_name = "./FineTuneRoBERTa"  
model = RobertaForCausalLM.from_pretrained(model_name)
tokenizer = RobertaTokenizer.from_pretrained(model_name)

# Example text for inference
prompt = "Where is US located"

input_ids = tokenizer.encode(prompt, return_tensors="pt")
        # Generate response
output = model.generate(
    input_ids,
    max_length=100,
    num_beams=5,
    no_repeat_ngram_size=2,
    top_k=50,
    top_p=0.95,
    temperature=0.7,
    pad_token_id=tokenizer.eos_token_id,
    )

# Decode and return the generated text without repeating the user's input
response = tokenizer.decode(output[0], skip_special_tokens=True)
response = response.replace(prompt, "", 1).strip()
print(response)

HFValidationError: Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' cannot start or end the name, max length is 96: './FineTuneRoBERTa'.

In [63]:
from transformers import RobertaForCausalLM, RobertaTokenizer

# Load the fine-tuned RoBERTa model and tokenizer
model_name = "./FineTuneRoBERTa"  # Update with the actual path to your fine-tuned model
model = RobertaForCausalLM.from_pretrained(model_name)
tokenizer = RobertaTokenizer.from_pretrained(model_name)

# Example text for inference
input_text = "What is mental illness?"

# Tokenize the input text
input_ids = tokenizer.encode(input_text, return_tensors="pt")

# Generate text with adjusted parameters
output = model.generate(input_ids, max_length=10000, num_beams=5, no_repeat_ngram_size=2, top_k=50, top_p=0.95)

# Decode and print the generated text
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)


If you want to use `RobertaLMHeadModel` as a standalone, add `is_decoder=True.`


What is mental illness?.


In [64]:
!pip install llama-index

