In [6]:
import json
import random

# Provided topics in English and Spanish
eng_topics = ['Euthanasia', 'Mandatory vaccination in pandemic', 'Physical appearance for personal success', 'Intermittent fasting', 'Capital punishment', 'Animal testing', 'Climate change', 'Legalisation of cannabis', 'Abortion', 'Freedom of speech', 'Tax increase', 'Animal/human cloning', 'Research in artificial intelligence', 'Nuclear energy', 'Use of online social networks', 'Gun control', 'Universal basic pension', 'Gender quotas', 'Genetic manipulation', 'Reduction in working time', 'Remote work', 'Increasing security by sacrificing individual privacy', 'Cryptocurrencies', 'Censorship in social networks', 'Terraplanism', 'Renewable energy', 'Electric transport']

spn_topics = ['Eutanasia', 'Vacunación obligatoria en pandemia', 'Apariencia física para el éxito personal', 'Ayuno intermitente', 'Pena de muerte', 'Experimentación con animales', 'Cambio climático', 'Legalización de la cannabis', 'Aborto', 'Libertad de expresión', 'Aumento de impuestos', 'Clonación animal/humana', 'Investigación en inteligencia artificial', 'Energía nuclear', 'Uso de redes sociales en línea', 'Control de armas', 'Pensión básica universal', 'Cuotas de género', 'Manipulación genética', 'Reducción de la jornada laboral', 'Trabajo remoto', 'Aumento de la seguridad sacrificando la privacidad individual', 'Criptomonedas', 'Censura en redes sociales', 'Terraplanismo', 'Energía renovable', 'Transporte eléctrico']

stances = ["in favor", "against"]
argumentation_schemes = ["position to know", "expert opinion", "direct ad hominem"]

# Function to generate a random argument
def generate_argument(topic, stance, scheme):
    if scheme == "position to know":
        if stance == "in favor":
            return {
                "major premise": f"Medical professionals are in position to know about the treatment options available for terminally ill patients.",
                "minor premise": f"Many medical professionals argue that {topic.lower()} is a humane option for terminally ill patients who are experiencing unbearable suffering and have little hope for recovery.",
                "conclusion": f"{topic} can be a morally justifiable option for terminally ill patients who are experiencing unbearable suffering and have little hope for recovery."
            }
        else:
            return {
                "major premise": f"Medical professionals are in position to know about matters concerning the health and well-being of patients.",
                "minor premise": f"Medical professionals assert that {topic.lower()} is contrary to the Hippocratic Oath which prohibits them from intentionally causing harm or death to patients.",
                "conclusion": f"{topic} is contrary to the principles of medical ethics and is therefore not an acceptable practice."
            }
    elif scheme == "expert opinion":
        if stance == "in favor":
            return {
                "major premise": f"Dr. Jack Kevorkian is an expert in the field of assisted suicide and end-of-life care.",
                "minor premise": f"Dr. Kevorkian asserts that {topic.lower()} should be legalized and made available to terminally ill patients who are suffering intolerable pain and have no hope for recovery.",
                "conclusion": f"{topic} should be legalized and made available to terminally ill patients who are suffering intolerable pain and have no hope for recovery because Dr. Kevorkian is an expert in the field of assisted suicide and end-of-life care and asserts that this is the best course of action."
            }
        else:
            return {
                "major premise": f"Dr. John Smith is an expert in medical ethics containing proposition that {topic.lower()} undermines the value of human life.",
                "minor premise": f"Dr. Smith asserts that {topic.lower()} undermines the value of human life.",
                "conclusion": f"{topic} undermines the value of human life."
            }
    elif scheme == "direct ad hominem":
        if stance == "in favor":
            return {
                "character attack premise": f"Persons who advocate for {topic.lower()} are heartless and lack compassion for the terminally ill.",
                "conclusion": f"The argument for {topic.lower()} should not be accepted because it is coming from individuals who lack empathy and basic human decency."
            }
        else:
            return {
                "character attack premise": f"Dr. Smith is a heartless monster who does not care about human life.",
                "conclusion": f"Dr. Smith's argument for {topic.lower()} should not be accepted."
            }

# Function to generate the dataset
def generate_dataset(num_samples):
    data = {"eng": {}, "esp": {}}
    
    for i in range(num_samples):
        topic_index = i % len(eng_topics)
        
        topic = eng_topics[topic_index]
        stance = random.choice(stances)
        scheme = random.choice(argumentation_schemes)
        argument = generate_argument(topic, stance, scheme)
        
        data["eng"][i] = {
            "topic": topic,
            "stance": stance,
            "argumentation scheme": scheme,
            "argument": json.dumps(argument, indent=2),
            "label": "yes"
        }
        
        # Simulating the Spanish version
        translated_topic = spn_topics[topic_index]
        translated_stance = "a favor" if stance == "in favor" else "en contra"
        translated_scheme = scheme  # Should be translated for real use case
        translated_argument = argument  # Should be translated for real use case
        
        data["esp"][i] = {
            "topic": translated_topic,
            "stance": translated_stance,
            "argumentation scheme": translated_scheme,
            "argument": json.dumps(translated_argument, indent=2),
            "label": "yes"
        }
    
    return data

# Generate 2000 samples
dataset = generate_dataset(4000)

# Save to a JSON file
with open('generated_dataset.json', 'w', encoding='utf-8') as f:
    json.dump(dataset, f, ensure_ascii=False, indent=2)

print("Dataset generated and saved to 'generated_dataset.json'")


Dataset generated and saved to 'generated_dataset.json'
