In [1]:
!pip install transformers datasets peft accelerate bitsandbytes gradio rouge-score evaluate nltk

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting gradio
  Downloading gradio-5.29.1-py3-none-any.whl.metadata (16 kB)
Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-a

In [2]:
#import libraries
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer
from datasets import load_dataset, Dataset
import pandas as pd
import torch

In [None]:
#Load and check your dataset
df = pd.read_csv("debate_dataset_10k_fresh.csv", quoting=1, encoding="utf-8", engine="python")
print(df.shape) 

(3000, 3)


In [None]:
df.describe() #check the charecteristics of the dataset

Unnamed: 0,topic,for_argument,against_argument
count,3000,3000,3000
unique,30,10,10
top,Should hate speech be protected as free speech?,"It reflects the needs of a modern, diverse soc...",It undermines public trust through overregulat...
freq,100,300,300


In [None]:
from datasets import Dataset

#Load CSV file
df = pd.read_csv("debate_dataset_10k_fresh.csv") #Use your own dataset here

#Clean and normalize fields
df["topic"] = df["topic"].astype(str).str.strip()
df["for_argument"] = df["for_argument"].astype(str).str.strip()
df["against_argument"] = df["against_argument"].astype(str).str.strip()

#Drop incomplete rows or too-short rows
df.dropna(subset=["topic", "for_argument", "against_argument"], inplace=True)
df = df[
    (df["for_argument"].str.len() > 10) &
    (df["against_argument"].str.len() > 10)
]

#Normalize and simplify arguments
def clean_argument(arg):
    return str(arg).split("|")[0].strip().replace("’", "'").replace("‘", "'")

df["for_argument"] = df["for_argument"].apply(clean_argument)
df["against_argument"] = df["against_argument"].apply(clean_argument)

#Formatting the input (instruction-style)
df["input"] = df["topic"].apply(
    lambda x: f"Debate Topic: {x.strip()}\nProvide one clear argument FOR and one AGAINST. Label them."
)

#Formatting the output with
df["output"] = df.apply(
    lambda row: f"Topic: {row['topic']}\nArgument for: {row['for_argument']}\nArgument against: {row['against_argument']}",
    axis=1
)

#limiting output to 256 words total
df["output"] = df["output"].apply(
    lambda x: "\n".join([" ".join(part.strip().split()[:128]) for part in x.split("\n")])
)

#Converting to Hugging Face Dataset and split
debate_dataset = Dataset.from_pandas(df[["input", "output"]])
debate_dataset = debate_dataset.train_test_split(test_size=0.2)

#Preview a sample to see if it works so far
print("✅ Sample entry:")
print(debate_dataset["train"][0]["output"])

✅ Sample entry:
Topic: Should election day be a national holiday?
Argument for: It encourages civic engagement and democratic participation.
Argument against: It challenges traditional structures without viable alternatives.


In [6]:
#Strip each input
debate_dataset = debate_dataset.map(
    lambda example: {
        "input": str(example["input"]).strip(),
        "output": str(example["output"]).strip()
    }
)

#remove empty rows (depending on the dataset. Just a sanity check, one of many!)
debate_dataset = debate_dataset.filter(
    lambda example: example["input"] != "" and example["output"] != ""
)

Map:   0%|          | 0/2400 [00:00<?, ? examples/s]

Map:   0%|          | 0/600 [00:00<?, ? examples/s]

Filter:   0%|          | 0/2400 [00:00<?, ? examples/s]

Filter:   0%|          | 0/600 [00:00<?, ? examples/s]

In [None]:
df.shape  #Sanity check 1

(3000, 5)

In [None]:
df["output"].apply(lambda x: len(x.split())).describe() #Sanity check 2

Unnamed: 0,output
count,3000.0
mean,26.733333
std,1.974045
min,21.0
25%,25.0
50%,27.0
75%,28.0
max,32.0


In [9]:
from transformers import AutoTokenizer

#Load tokenizer
model_name = "facebook/bart-base" #Replace with any model you like
tokenizer = AutoTokenizer.from_pretrained(model_name)

#Tokenization function
def tokenize_function(batch):
    #Tokenizing inputs
    model_inputs = tokenizer(
        batch["input"],
        max_length=256,
        padding="max_length",
        truncation=True
    )

    #Tokenizing outputs
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            text_target=batch["output"],
            max_length=256,
            padding="max_length",
            truncation=True
        )["input_ids"]

    #Mask padding tokens with -100 for loss
    pad_token_id = tokenizer.pad_token_id or 0
    model_inputs["labels"] = [
        [token if token != pad_token_id else -100 for token in label_seq]
        for label_seq in labels
    ]

    return model_inputs

#Apply the tokenization to the dataset
tokenized_dataset = debate_dataset.map(
    tokenize_function,
    remove_columns=debate_dataset["train"].column_names,
    batched=True
)

#Preview tokenized label
decoded_label = tokenizer.decode(
    [t for t in tokenized_dataset["train"][0]["labels"] if t != -100],
    skip_special_tokens=True
)

print("Decoded LABEL:")
print(decoded_label)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/2400 [00:00<?, ? examples/s]



Map:   0%|          | 0/600 [00:00<?, ? examples/s]

Decoded LABEL:
Topic: Should election day be a national holiday?
Argument for: It encourages civic engagement and democratic participation.
Argument against: It challenges traditional structures without viable alternatives.


In [10]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

#Define model
model_name = "facebook/bart-base" #Use any model you like

#Load model and resize for new tokens
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
model.resize_token_embeddings(len(tokenizer))

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

BartScaledWordEmbedding(50265, 768, padding_idx=1)

In [12]:
from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq
import torch

#Define training arguments
training_args = TrainingArguments(
    output_dir="./debate_model_bart_10k_final",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    learning_rate=3e-5,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    report_to="none",
    fp16=False
)

#Data collator for dynamic padding. A cope mechanism in case previous masking didn't work
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True
)

#Setup Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator
)

#Train
if torch.cuda.is_available():
    torch.cuda.empty_cache()

train_result = trainer.train()
print("Training completed.")
print("Final training loss:", train_result.metrics.get("train_loss"))

#Evaluate
eval_result = trainer.evaluate()
print("Validation loss:", eval_result.get("eval_loss"))

#Save the model & tokenizer for subsequent or future uses
model.save_pretrained("./debate_model_bart_10k_final")
tokenizer.save_pretrained("./debate_model_bart_10k_final")

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,0.1165,0.120405
2,0.119,0.119648
3,0.1184,0.116038
4,0.117,0.115391
5,0.1201,0.11392


There were missing keys in the checkpoint model loaded: ['model.encoder.embed_tokens.weight', 'model.decoder.embed_tokens.weight', 'lm_head.weight'].


Training completed.
Final training loss: 0.11857049687703451


Validation loss: 0.11391977965831757


('./debate_model_bart_10k_final/tokenizer_config.json',
 './debate_model_bart_10k_final/special_tokens_map.json',
 './debate_model_bart_10k_final/vocab.json',
 './debate_model_bart_10k_final/merges.txt',
 './debate_model_bart_10k_final/added_tokens.json',
 './debate_model_bart_10k_final/tokenizer.json')

In [13]:
import evaluate
import numpy as np
from tqdm import tqdm
import torch

#Load your trained model and tokenizer
model_path = "./debate_model_bart_10k_final"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to("cuda" if torch.cuda.is_available() else "cpu")

#Load ROUGE evaluator
rouge = evaluate.load("rouge")

#Generate predictions for evaluation dataset
def generate_predictions(dataset, num_samples=None):
    preds, refs = [], []
    dataset = dataset.select(range(num_samples)) if num_samples else dataset

    for sample in tqdm(dataset):
        inputs = tokenizer(sample["input"], return_tensors="pt", truncation=True, padding=True).to(model.device)
        with torch.no_grad():
            generated_ids = model.generate(
                **inputs,
                max_new_tokens=200,
                num_beams=4,
                early_stopping=True,
                no_repeat_ngram_size=3
            )

        generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
        preds.append(generated_text.strip())
        refs.append(sample["output"].strip())

    return preds, refs

#Evaluate function using ROUGE
def evaluate_model(dataset, num_samples=None):
    predictions, references = generate_predictions(dataset, num_samples=num_samples)
    results = rouge.compute(predictions=predictions, references=references)

    #Display neatly
    print("ROUGE Scores:\n")
    for key, value in results.items():
        print(f"{key}: {value:.4f}")

#Run evaluation on test dataset
evaluate_model(debate_dataset["test"])

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

  0%|          | 0/600 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
100%|██████████| 600/600 [04:01<00:00,  2.48it/s]


ROUGE Scores:

rouge1: 0.4801
rouge2: 0.4199
rougeL: 0.4758
rougeLsum: 0.4777


In [14]:
import torch
import random
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr

#Load your fine-tuned debate model
model_path = "./debate_model_bart_10k_final"  # adjust as needed
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

#Expanded & improved a topic list
def generate_random_topics(n=5):
    curated_topics = [
        "Should AI replace human jobs?",
        "Is social media harmful to democracy?",
        "Should schools ban standardized testing?",
        "Is nuclear energy a viable green solution?",
        "Should voting be mandatory?",
        "Is cancel culture a form of accountability?",
        "Should meat consumption be taxed?",
        "Should homeschooling replace traditional education?",
        "Is universal basic income practical?",
        "Should animal testing be banned?",
        "Should billionaires exist in a fair society?",
        "Should we legalize euthanasia worldwide?",
        "Is space colonization ethical?",
        "Should governments regulate religious institutions?",
        "Should hate speech be protected under free speech?"
    ]
    return random.sample(curated_topics, n)

#Argument generation logic
def debate_response(prompt, max_tokens=200):
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=max_tokens,
            num_beams=6,
            no_repeat_ngram_size=4,
            early_stopping=True,
            length_penalty=0.8,
            do_sample=False
        )
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return decoded.replace("Argument against:", "\nArgument against:").strip()

#Main handler function
def handle_debate(topic_choice, custom_topic):
    topic_used = custom_topic.strip() if custom_topic.strip() else topic_choice
    if not topic_used:
        return "⚠️ Please select or enter a topic."

    prompt = (
        f"Topic: {topic_used}\n"
        "Provide:\n"
        "- A compelling argument in favor.\n"
        "- A thoughtful counterargument."
    )
    return debate_response(prompt)

#Refresh dropdown function
def refresh_topics():
    new_topics = generate_random_topics()
    default = new_topics[0] if new_topics else ""
    return gr.update(choices=new_topics, value=default)

#Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## 🧠 AI DebateBot\nGet balanced arguments on real-world issues.")

    topic_dropdown = gr.Dropdown(choices=generate_random_topics(), label="Choose a topic")
    refresh_btn = gr.Button("🔄 Refresh Topics")

    custom_input = gr.Textbox(label="Or enter a custom topic")
    generate_btn = gr.Button("💬 Generate Debate")
    output = gr.Textbox(label="DebateBot Response", lines=8)

    generate_btn.click(fn=handle_debate, inputs=[topic_dropdown, custom_input], outputs=output)
    refresh_btn.click(fn=refresh_topics, inputs=[], outputs=topic_dropdown)

demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://7036fd5636081a1df7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
#Final sanity Check to see if the trained model gives adequate outputs
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

#Load fine-tuned model and tokenizer
model_path = "./debate_model_bart_10k_final"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
model.to("cuda" if torch.cuda.is_available() else "cpu")

def debate_response(prompt, max_tokens=150):
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(model.device)

    outputs = model.generate(
        **inputs,
        max_length=max_tokens,
        num_beams=6,
        no_repeat_ngram_size=4,
        early_stopping=True,
        length_penalty=0.8,
        do_sample=False
    )

    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return decoded.replace("Argument against:", "\nArgument against:")

#Example. You can use your own prompt!
if __name__ == "__main__":
    prompt = "What is one argument for and one against the topic: Should AI be used in military?" #Replace with your own query
    response = debate_response(prompt, max_tokens=200)
    print("💬 DebateBot:\n")
    print(response)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


💬 DebateBot:

Topic: Should AI be used in military?
Argument for: It ensures sustainability and long-term societal benefit.

Argument against: It could lead to unintended consequences or social disruption.
