In [1]:
import pandas as pd
from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config
from transformers import TextDataset, DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments

In [3]:
# Load pre-trained GPT-2 model and tokenizer
model_name = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Load your CSV dataset
df = pd.read_csv("Mental_Health_FAQ.csv")
df['Questions']

0           What does it mean to have a mental illness?
1                       Who does mental illness affect?
2                           What causes mental illness?
4               Can people with mental illness recover?
                            ...                        
93              How do I know if I'm drinking too much?
94    If cannabis is dangerous, why are we legalizin...
95         How can I convince my kids not to use drugs?
96    What is the legal status (and evidence) of CBD...
97                      What is the evidence on vaping?
Name: Questions, Length: 98, dtype: object

In [3]:

# Combine question and answer for training
df['input_text'] = df['Questions'] + " " + df['Answers']

# Save the combined text to a text file
df['input_text'].to_csv("your_dataset.txt", header=False, index=False)

# Tokenize and process the dataset
train_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="your_dataset.txt",
    block_size=128
)

# Create data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# Training arguments
training_args = TrainingArguments(
    output_dir="./FineTune",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    save_steps=10_000,
    save_total_limit=2,
)

# Create Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset
)

# Fine-tune the model
trainer.train()





Step,Training Loss


TrainOutput(global_step=201, training_loss=2.705742831253887, metrics={'train_runtime': 1668.0151, 'train_samples_per_second': 0.477, 'train_steps_per_second': 0.121, 'total_flos': 51931791360000.0, 'train_loss': 2.705742831253887, 'epoch': 3.0})

In [None]:
model.save_pretrained(training_args.output_dir)

In [None]:
tokenizer.save_pretrained(training_args.output_dir)

In [None]:
import tkinter as tk
from tkinter import scrolledtext

class ChatApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Chatbot Messenger")
        self.create_widgets()
        self.model_name = "./FineTune"  # Replace with the path to your fine-tuned model directory
        self.model = GPT2LMHeadModel.from_pretrained(model_name)
        self.tokenizer = GPT2Tokenizer.from_pretrained(model_name)

    def create_widgets(self):
        self.chat_history = scrolledtext.ScrolledText(self.root, wrap=tk.WORD, width=50, height=20)
        self.chat_history.pack(pady=10)

        self.entry = tk.Entry(self.root, width=40)
        self.entry.pack(pady=10)
        self.entry.bind("<Return>", self.handle_user_input)

        self.send_button = tk.Button(self.root, text="Send", command=self.handle_user_input)
        self.send_button.pack(pady=10)

    def generate_response(self,prompt, max_length=100):
        input_ids = tokenizer.encode(prompt, return_tensors="pt")
        # Generate response
        output = model.generate(
            input_ids,
            max_length=max_length,
            num_beams=5,
            no_repeat_ngram_size=2,
            top_k=50,
            top_p=0.95,
            temperature=0.7,
            pad_token_id=self.tokenizer.eos_token_id
            )

        # Decode and return the generated text without repeating the user's input
        response = tokenizer.decode(output[0], skip_special_tokens=True)
        response = response.replace(prompt, "", 1).strip()  # Remove user's input from the response

        response = response.split("?", 1)[-1].strip()
        return response

    def handle_user_input(self, event=None):
        user_input = self.entry.get()
        if user_input:
            response = self.generate_response(user_input)
            self.update_chat_history(f"You: {user_input}\n")
            self.update_chat_history(f"Bot: {response}\n", bot=True)
            self.entry.delete(0, tk.END)

    def update_chat_history(self, message, bot=False):
        self.chat_history.config(state=tk.NORMAL)
        if bot:
            self.chat_history.tag_configure("bot", justify="left", foreground="blue")
            self.chat_history.insert(tk.END, message, "bot")
        else:
            self.chat_history.tag_configure("user", justify="right", foreground="green")
            self.chat_history.insert(tk.END, message, "user")
        self.chat_history.see(tk.END)
        self.chat_history.config(state=tk.DISABLED)

if __name__ == "__main__":
    root = tk.Tk()
    app = ChatApp(root)
    root.mainloop()
