# English Dataset Generation

Roughly the same as DatasetGeneration.ipynb just for english examples. Not commented due to time limitations.

In [None]:
command_templates = [
    "Create a new calendar entry for {activity} on {date} at {time} for {duration}",
    "Create a calendar entry for {activity} on {date} at {time} for {duration}",
    "Schedule {activity} on {date} at {time} for {duration} in the calendar",
    "Schedule {activity} on {date} at {time} for {duration}",
    "Enter {activity} for the {date} at {time} with a duration of {duration}",
    "Enter {activity} for the {date} at {time} with a duration of {duration} in the calendar",
    "Add a calendar entry for {activity} on {date} at {time} for {duration}"
]

command_templates_timed = [
    "Create a new calendar entry on {date} at {time} for {timed_activity}",
    "Create a calendar entry on {date} at {time} for {timed_activity}",
    "Add {timed_activity} to the calendar on {date} at {time}",
    "Add {timed_activity} to the calendar on {date} at {time}",
    "Add a calendar entry for {timed_activity} on {date} at {time}",
    "Schedule {timed_activity} in the calendar on {date} at {time}",
    "Schedule {timed_activity} on {date} at {time}",
]

In [None]:
from random import randint

activities = [
    "Linear algebra exercise",
    "Lecture on Middle High German",
    "Math exam",
    "Training in the gym",
    "Football training",
    "Concert",
    "Meeting with the design team",
    "Preliminary meeting for bachelor thesis",
    "Project presentation",
    "kick-off meeting",
    "weekly team meeting",
    "Restaurant visit",
    "Christmas dinner with my parents"
]

timed_activities = [
    {
        "timed_activity": "a one-hour initial meeting with customers",
        "activity": "initial meeting with customers",
        "duration": 60
    },
    {
        "timed_activity": "A two-hour seminar presentation",
        "activity": "Seminar presentation",
        "duration": 120
    },
    {
        "timed_activity": "A three-hour rock concert",
        "activity": "Rock concert",
        "duration": 180
    },
    {
        "timed_activity": "A one and a half hour biology of the animal kingdom lecture",
        "activity": "Biology of the animal kingdom lecture",
        "duration": 90
    },
    {
        "timed_activity": "A one-hour information session on studying",
        "activity": "Study information event",
        "duration": 60
    },
    {
        "timed_activity": "A two-hour study session in the library",
        "activity": "Study time in the library",
        "duration": 120
    },
    {
        "timed_activity": "A three-hour train journey to Berlin",
        "activity": "Train ride to Berlin",
        "duration": 180
    },
    {
        "timed_activity": "A one and a half hour weekly team meeting",
        "activity": "Weekly team meeting",
        "duration": 90
    }
]

In [None]:
import random
from datetime import datetime, timedelta

# Month names in English and German
MONTH_NAMES = {
    'en': ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'],
    'de': ['Januar', 'Februar', 'März', 'April', 'Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober', 'November', 'Dezember']
}

def generate_random_date(min_year=2022, max_year=2026, use_month_name=False, language="de"):
    # Generate random date
    start_date = datetime(min_year, 1, 1)
    end_date = datetime(max_year, 12, 31)
    time_between_dates = end_date - start_date
    days_between_dates = time_between_dates.days
    random_number_of_days = random.randrange(days_between_dates)
    random_date = start_date + timedelta(days=random_number_of_days)

    # Format the first date string
    if use_month_name:
        day = random_date.day
        month = random_date.month
        year = random_date.year
        month_name = MONTH_NAMES[language][month - 1]
        formatted_date_1 = f"{day:02d}. {month_name} {year}"
    else:
        formatted_date_1 = random_date.strftime("%d.%m.%Y")

    # Format the second date string (YYYY-MM-DD in quotes)
    formatted_date_2 = f'"{random_date.strftime("%Y-%m-%d")}"'

    return formatted_date_1, formatted_date_2

# Example usage
print("Default usage:")
date_1, date_2 = generate_random_date()
print(f"Random date in DD.MM.YYYY format: {date_1}")
print(f"Same date in YYYY-MM-DD format: {date_2}")

print("\nUsing month name in German:")
date_1, date_2 = generate_random_date(use_month_name=True)
print(f"Random date with German month name: {date_1}")
print(f"Same date in YYYY-MM-DD format: {date_2}")

print("\nUsing month name in English:")
date_1, date_2 = generate_random_date(use_month_name=True, language="en")
print(f"Random date with English month name: {date_1}")
print(f"Same date in YYYY-MM-DD format: {date_2}")

Default usage:
Random date in DD.MM.YYYY format: 10.03.2023
Same date in YYYY-MM-DD format: "2023-03-10"

Using month name in German:
Random date with German month name: 20. März 2022
Same date in YYYY-MM-DD format: "2022-03-20"

Using month name in English:
Random date with English month name: 18. October 2024
Same date in YYYY-MM-DD format: "2024-10-18"


In [None]:
import random

def generate_random_time(leading_zero=False, has_minutes=True):
    # Generate a random hour (0-23)
    hour = random.randint(0, 23)

    # Generate a random minute (0 or 5 only at the end)
    if has_minutes:
      minute = random.randint(0, 11) * 5
    else:
      minute = 0

    # Format the time as HH:MM
    if has_minutes:
      if leading_zero:
        formatted_time_1 = f"{hour:02d}:{minute:02d}"
      else:
        formatted_time_1 = f"{hour:01d}:{minute:02d}"
    else:
      formatted_time_1 = f"{hour:01d}"

    formatted_time_2 = "\"" + f"{hour:02d}:{minute:02d}" + "\""

    return formatted_time_1, formatted_time_2

# Generate and print a random time
random_time = generate_random_time()
print(f"Random time: {random_time}")

# Optional: Generate multiple random times
num_times = 5
print(f"\nGenerating {num_times} random times:")
for _ in range(num_times):
    print(generate_random_time(True, False))

Random time: ('21:05', '"21:05"')

Generating 5 random times:
('19', '"19:00"')
('12', '"12:00"')
('18', '"18:00"')
('23', '"23:00"')
('0', '"00:00"')


In [None]:
def generate_random_duration(only_minutes=False, decimal_hours=False, language="de"):
    # Generate the duration
    if only_minutes:
        duration = random.randint(1, 33) * 5
    else:
        duration = random.randint(1, 3) * 60
        if decimal_hours:
            duration += 30

    # Format the duration string
    if language == "de":
        if only_minutes:
            formatted_string = f"{duration} Minuten"
        elif decimal_hours:
            formatted_string = f"{duration / 60:.1f} Stunden".replace('.', ',')
        else:
            hours = duration // 60
            formatted_string = "1 Stunde" if hours == 1 else f"{hours} Stunden"
    else:  # English
        if only_minutes:
            formatted_string = f"{duration} minutes"
        elif decimal_hours:
            formatted_string = f"{duration / 60:.1f} hours"
        else:
            hours = duration // 60
            formatted_string = "1 hour" if hours == 1 else f"{hours} hours"

    return formatted_string, duration

# Example usage
print(generate_random_duration(only_minutes=True, language="de"))
print(generate_random_duration(only_minutes=True, language="en"))
print(generate_random_duration(decimal_hours=True, language="de"))
print(generate_random_duration(decimal_hours=True, language="en"))
print(generate_random_duration(language="de"))
print(generate_random_duration(language="en"))

('160 Minuten', 160)
('50 minutes', 50)
('2,5 Stunden', 150)
('2.5 hours', 150)
('1 Stunde', 60)
('2 hours', 120)


In [None]:
training_data = list()

In [None]:
import numpy as np

In [None]:
for _ in range(619):
  min_year = 2022
  max_year = 2026
  language = "en"
  use_month_name = np.random.choice([True, False])
  leading_zero = np.random.choice([True, False])
  has_minutes = np.random.choice([True, False])
  only_minutes = np.random.choice([True, False])
  decimal_hours = np.random.choice([True, False])

  activity = np.random.choice(activities)
  date, parameter_date = generate_random_date(min_year=min_year, max_year=max_year, use_month_name=use_month_name, language=language)
  time, parameter_time = generate_random_time(leading_zero=leading_zero, has_minutes=has_minutes)
  duration, parameter_duration = generate_random_duration(only_minutes=only_minutes, decimal_hours=decimal_hours, language=language)

  command = np.random.choice(command_templates).format(activity=activity, date=date, time=time, duration=duration)

  parameters = "(" + "\"" + activity + "\"" + ", " + parameter_date + ", " + parameter_time + ", " + str(parameter_duration) + ")"

  training_data.append({
      "function_name": "create_calendar_entry",
      "command": command,
      "parameters": parameters
  })

In [None]:
training_data[:10]

[{'function_name': 'create_calendar_entry',
  'command': 'Enter Training in the gym for the 18.06.2023 at 10 with a duration of 2.5 hours',
  'parameters': '("Training in the gym", "2023-06-18", "10:00", 150)'},
 {'function_name': 'create_calendar_entry',
  'command': 'Add a calendar entry for Math exam on 15. December 2023 at 13 for 1 hour',
  'parameters': '("Math exam", "2023-12-15", "13:00", 60)'},
 {'function_name': 'create_calendar_entry',
  'command': 'Create a new calendar entry for Lecture on Middle High German on 23. January 2026 at 10:15 for 100 minutes',
  'parameters': '("Lecture on Middle High German", "2026-01-23", "10:15", 100)'},
 {'function_name': 'create_calendar_entry',
  'command': 'Add a calendar entry for Concert on 12.04.2026 at 20 for 2.5 hours',
  'parameters': '("Concert", "2026-04-12", "20:00", 150)'},
 {'function_name': 'create_calendar_entry',
  'command': 'Create a calendar entry for Meeting with the design team on 10.11.2022 at 5 for 130 minutes',
  'par

In [None]:
for _ in range(381):
  min_year = 2022
  max_year = 2026
  language = "en"
  use_month_name = np.random.choice([True, False])
  leading_zero = np.random.choice([True, False])
  has_minutes = np.random.choice([True, False])

  timed_activity = np.random.choice(timed_activities)
  date, parameter_date = generate_random_date(min_year=min_year, max_year=max_year, use_month_name=use_month_name, language=language)
  time, parameter_time = generate_random_time(leading_zero=leading_zero, has_minutes=has_minutes)

  activity = timed_activity["activity"]
  parameter_duration = timed_activity["duration"]

  command = np.random.choice(command_templates_timed).format(timed_activity=timed_activity["timed_activity"], date=date, time=time, duration=duration)

  parameters = "(" + "\"" + activity + "\"" + ", " + parameter_date + ", " + parameter_time + ", " + str(parameter_duration) + ")"

  training_data.append({
      "function_name": "create_calendar_entry",
      "command": command,
      "parameters": parameters
  })

In [None]:
len(training_data)

1000

In [None]:
delete_command_templates = [
    "Delete the calendar entry {entry_id}",
    "Delete the entry {entry_id} from my calendar",
    "Remove the entry {entry_id}",
    "Remove the {entry_id} entry from my calendar",
    "Delete entry {entry_id} from the calendar",
    "Remove entry {entry_id} from the calendar",
    "Delete calendar entry {entry_id}",
    "Remove calendar entry {entry_id}",
    "Delete appointment {entry_id}",
    "Delete appointment {entry_id}",
    "Delete appointment {entry_id} from my calendar",
    "Delete appointment {entry_id} from my calendar",
    "Remove appointment {entry_id}",
    "Remove appointment {entry_id}",
    "Remove appointment {entry_id} from my calendar",
    "Remove appointment {entry_id} from my calendar",
    "Remove the entered appointment with ID {entry_id}",
    "Delete the entered appointment with ID {entry_id}",
    "Remove registered appointment with ID {entry_id}",
    "Delete registered appointment with ID {entry_id}",
]

In [None]:
len(delete_command_templates)

20

In [None]:
import uuid

def generate_short_uuid(length=10):
    full_uuid = uuid.uuid4()
    return str(full_uuid).replace("-", "")[:length]


In [None]:
delete_commands_training_data = list()

In [None]:
for _ in range(1000):
  entry_id = generate_short_uuid(10)
  delete_command_template = np.random.choice(delete_command_templates)
  command = delete_command_template.format(entry_id=entry_id)
  parameters = "(" + "\"" + entry_id + "\"" + ")"
  delete_commands_training_data.append({
      "function_name": "delete_calendar_entry",
      "command": command,
      "parameters": parameters
  })

In [None]:
len(delete_commands_training_data)

1000

In [None]:
list_command_templates = [
    "Show me all appointments for the {date}",
    "List all appointments for {date}",
    "Show all appointments on {date}",
    "List all appointments on {date}",
    "Show me all calendar entries for the {date}",
    "List all calendar entries for {date}",
    "Show all calendar entries on {date}",
    "List all calendar entries on {date}",
    "Which appointments do I have on {date}?",
    "Which entries are in the calendar for {date}?"
    "Show calendar entries for the {date}",
    "List calendar entries for the {date}",
    "Show entries for the {date}",
    "List entries for the {date}",
    "What do I have in the calendar on {date}?",
    "What appointments are coming up on {date}?",
    "Show appointments for the {date}",
    "List appointments for {date}",
    "Which calendar entries are available for {date}?",
    "Show appointments on {date}",
    "List appointments on {date}",
]

len(list_command_templates)

20

In [None]:
list_command_templates_not_finished = [
    "Show me all outstanding appointments for the {date}",
    "List all open appointments for {date}",
    "Show all unfinished appointments on {date}",
    "List all open appointments on {date}",
    "Show me outstanding calendar entries for the {date}",
    "List open calendar entries for the {date}",
    "Show all unfinished calendar entries for {date}",
    "List all open calendar entries on {date}",
    "Which appointments do I still have on {date}?",
    "Which entries are still in the calendar for {date}?"
    "Show outstanding calendar entries for {date}",
    "List open calendar entries for the {date}",
    "Show open entries for the {date}",
    "List pending entries for the {date}",
    "What do I still have in the calendar on {date}?",
    "Which appointments are still open for me on {date}?",
    "Show open appointments for the {date}",
    "List outstanding appointments for {date}",
    "Which calendar entries are still pending for {date}?",
    "Show appointments on {date} that are still pending for me",
    "List appointments on {date} that are still open",
]

In [None]:
list_commands_training_data = list()

In [None]:
for _ in range(1000):
  min_year = 2022
  max_year = 2026
  not_finished = np.random.choice([True, False])
  leading_zero = np.random.choice([True, False])
  use_month_name=np.random.choice([True, False])
  language="en"

  date, parameter_date = generate_random_date(min_year=min_year, max_year=max_year, use_month_name=use_month_name, language=language)

  if not_finished:
    command = np.random.choice(list_command_templates_not_finished).format(timed_activity=timed_activity["timed_activity"], date=date, time=time, duration=duration)
  else:
    command = np.random.choice(list_command_templates).format(timed_activity=timed_activity["timed_activity"], date=date, time=time, duration=duration)

  parameters = "(" + parameter_date + ", " + str(not_finished) + ")"

  list_commands_training_data.append({
      "function_name": "list_calendar_entries",
      "command": command,
      "parameters": parameters
  })

In [None]:
len(list_commands_training_data)

1000

In [None]:
training_data[:10]

[{'function_name': 'create_calendar_entry',
  'command': 'Enter Training in the gym for the 18.06.2023 at 10 with a duration of 2.5 hours',
  'parameters': '("Training in the gym", "2023-06-18", "10:00", 150)'},
 {'function_name': 'create_calendar_entry',
  'command': 'Add a calendar entry for Math exam on 15. December 2023 at 13 for 1 hour',
  'parameters': '("Math exam", "2023-12-15", "13:00", 60)'},
 {'function_name': 'create_calendar_entry',
  'command': 'Create a new calendar entry for Lecture on Middle High German on 23. January 2026 at 10:15 for 100 minutes',
  'parameters': '("Lecture on Middle High German", "2026-01-23", "10:15", 100)'},
 {'function_name': 'create_calendar_entry',
  'command': 'Add a calendar entry for Concert on 12.04.2026 at 20 for 2.5 hours',
  'parameters': '("Concert", "2026-04-12", "20:00", 150)'},
 {'function_name': 'create_calendar_entry',
  'command': 'Create a calendar entry for Meeting with the design team on 10.11.2022 at 5 for 130 minutes',
  'par

In [None]:
delete_commands_training_data[:10]

[{'function_name': 'delete_calendar_entry',
  'command': 'Remove appointment e99f75ca0c from my calendar',
  'parameters': '("e99f75ca0c")'},
 {'function_name': 'delete_calendar_entry',
  'command': 'Remove entry e76eb6f0c2 from the calendar',
  'parameters': '("e76eb6f0c2")'},
 {'function_name': 'delete_calendar_entry',
  'command': 'Remove appointment a33d000be0',
  'parameters': '("a33d000be0")'},
 {'function_name': 'delete_calendar_entry',
  'command': 'Remove appointment ef752c93fc from my calendar',
  'parameters': '("ef752c93fc")'},
 {'function_name': 'delete_calendar_entry',
  'command': 'Remove appointment 7ac8e7f7ed',
  'parameters': '("7ac8e7f7ed")'},
 {'function_name': 'delete_calendar_entry',
  'command': 'Delete entry 94e03001cc from the calendar',
  'parameters': '("94e03001cc")'},
 {'function_name': 'delete_calendar_entry',
  'command': 'Delete the calendar entry b8a6fd9a3f',
  'parameters': '("b8a6fd9a3f")'},
 {'function_name': 'delete_calendar_entry',
  'command': 'Re

In [None]:
list_commands_training_data[:10]

[{'function_name': 'list_calendar_entries',
  'command': 'List outstanding appointments for 17.09.2023',
  'parameters': '("2023-09-17", True)'},
 {'function_name': 'list_calendar_entries',
  'command': 'Show appointments for the 01. September 2022',
  'parameters': '("2022-09-01", False)'},
 {'function_name': 'list_calendar_entries',
  'command': 'What do I still have in the calendar on 05. May 2023?',
  'parameters': '("2023-05-05", True)'},
 {'function_name': 'list_calendar_entries',
  'command': 'List all calendar entries for 05. March 2023',
  'parameters': '("2023-03-05", False)'},
 {'function_name': 'list_calendar_entries',
  'command': 'List pending entries for the 12. December 2025',
  'parameters': '("2025-12-12", True)'},
 {'function_name': 'list_calendar_entries',
  'command': 'List appointments for 14.06.2025',
  'parameters': '("2025-06-14", False)'},
 {'function_name': 'list_calendar_entries',
  'command': 'Show all unfinished appointments on 31.08.2022',
  'parameters':

In [None]:
import pandas as pd

In [None]:
training_data = pd.DataFrame(training_data + delete_commands_training_data + list_commands_training_data)
training_data

Unnamed: 0,function_name,command,parameters
0,create_calendar_entry,Enter Training in the gym for the 18.06.2023 a...,"(""Training in the gym"", ""2023-06-18"", ""10:00"",..."
1,create_calendar_entry,Add a calendar entry for Math exam on 15. Dece...,"(""Math exam"", ""2023-12-15"", ""13:00"", 60)"
2,create_calendar_entry,Create a new calendar entry for Lecture on Mid...,"(""Lecture on Middle High German"", ""2026-01-23""..."
3,create_calendar_entry,Add a calendar entry for Concert on 12.04.2026...,"(""Concert"", ""2026-04-12"", ""20:00"", 150)"
4,create_calendar_entry,Create a calendar entry for Meeting with the d...,"(""Meeting with the design team"", ""2022-11-10"",..."
...,...,...,...
2995,list_calendar_entries,Which appointments do I still have on 21. July...,"(""2024-07-21"", True)"
2996,list_calendar_entries,Show all appointments on 15. February 2022,"(""2022-02-15"", False)"
2997,list_calendar_entries,Which entries are still in the calendar for 28...,"(""2025-10-28"", True)"
2998,list_calendar_entries,Which entries are still in the calendar for 01...,"(""2023-06-01"", True)"


In [None]:
create_calendar_entry_function = '''def create_calender_entry(title, date, time, duration):
"""
Create a calendar entry with the specified details and call the calendar API.

Parameters:
title (str): The title of the calendar entry.
date (str): The date of the calendar entry in the format 'YYYY-MM-DD'.
time (str): The time of the calendar entry in the format 'HH:MM'.
duration (int): The duration of the calendar entry in minutes.

Returns:
bool: True if the calendar API call was successful, False otherwise.
"""'''

delete_calendar_entry_function = '''def delete_calendar_entry(entry_id):
"""
Delete a calendar entry with the specified ID by calling the calendar API.

Parameters:
entry_id (str): The unique identifier of the calendar entry to be deleted.

Returns:
bool: True if the calendar API call was successful, False otherwise.
"""'''

list_calendar_entries_function = list_calendar_entries_function = '''def list_calendar_entries(date, only_not_finished=False):
"""
List all calendar entries for a specified date by calling the calendar API.
Optionally, return only the entries that have not finished.

Parameters:
date (str): The date for which to list calendar entries in the format 'YYYY-MM-DD'.
only_not_finished (bool): If True, only return entries that have not finished. Defaults to False.

Returns:
list: A list of dictionaries, each representing a calendar entry with the following keys:
    - 'title': The title of the entry.
    - 'start': The start datetime of the entry in ISO format.
    - 'end': The end datetime of the entry in ISO format.
    - 'duration': The duration of the entry in minutes.
"""'''

calendar_functions = {
    "create_calendar_entry": create_calendar_entry_function,
    "delete_calendar_entry": delete_calendar_entry_function,
    "list_calendar_entries": list_calendar_entries_function
}

In [None]:
prompt_template = """Below is the query from the users, please choose the correct function and generate the parameters to call the function.
Query: {query}
Response: """

In [None]:
training_data["prompt"] = training_data["command"].apply(lambda command: prompt_template.format(query=command))
training_data

Unnamed: 0,function_name,command,parameters,prompt
0,create_calendar_entry,Enter Training in the gym for the 18.06.2023 a...,"(""Training in the gym"", ""2023-06-18"", ""10:00"",...","Below is the query from the users, please choo..."
1,create_calendar_entry,Add a calendar entry for Math exam on 15. Dece...,"(""Math exam"", ""2023-12-15"", ""13:00"", 60)","Below is the query from the users, please choo..."
2,create_calendar_entry,Create a new calendar entry for Lecture on Mid...,"(""Lecture on Middle High German"", ""2026-01-23""...","Below is the query from the users, please choo..."
3,create_calendar_entry,Add a calendar entry for Concert on 12.04.2026...,"(""Concert"", ""2026-04-12"", ""20:00"", 150)","Below is the query from the users, please choo..."
4,create_calendar_entry,Create a calendar entry for Meeting with the d...,"(""Meeting with the design team"", ""2022-11-10"",...","Below is the query from the users, please choo..."
...,...,...,...,...
2995,list_calendar_entries,Which appointments do I still have on 21. July...,"(""2024-07-21"", True)","Below is the query from the users, please choo..."
2996,list_calendar_entries,Show all appointments on 15. February 2022,"(""2022-02-15"", False)","Below is the query from the users, please choo..."
2997,list_calendar_entries,Which entries are still in the calendar for 28...,"(""2025-10-28"", True)","Below is the query from the users, please choo..."
2998,list_calendar_entries,Which entries are still in the calendar for 01...,"(""2023-06-01"", True)","Below is the query from the users, please choo..."


In [None]:
function_numbers = {
    "create_calendar_entry": 1,
    "delete_calendar_entry": 2,
    "list_calendar_entries": 3
}

In [None]:
completion_template = """<oc_{function_number}>{function_parameters}<oc_end>
Function description: {function_description}"""

In [None]:
def format_completion_template(row):
  function_name = row["function_name"]
  function_parameters = row["parameters"]
  function_description = calendar_functions[function_name]
  function_number = function_numbers[function_name]
  return completion_template.format(function_number=function_number, function_parameters=function_parameters, function_description=function_description)

training_data["completion"] = training_data.apply(format_completion_template, axis=1)
training_data

Unnamed: 0,function_name,command,parameters,prompt,completion
0,create_calendar_entry,Enter Training in the gym for the 18.06.2023 a...,"(""Training in the gym"", ""2023-06-18"", ""10:00"",...","Below is the query from the users, please choo...","<oc_1>(""Training in the gym"", ""2023-06-18"", ""1..."
1,create_calendar_entry,Add a calendar entry for Math exam on 15. Dece...,"(""Math exam"", ""2023-12-15"", ""13:00"", 60)","Below is the query from the users, please choo...","<oc_1>(""Math exam"", ""2023-12-15"", ""13:00"", 60)..."
2,create_calendar_entry,Create a new calendar entry for Lecture on Mid...,"(""Lecture on Middle High German"", ""2026-01-23""...","Below is the query from the users, please choo...","<oc_1>(""Lecture on Middle High German"", ""2026-..."
3,create_calendar_entry,Add a calendar entry for Concert on 12.04.2026...,"(""Concert"", ""2026-04-12"", ""20:00"", 150)","Below is the query from the users, please choo...","<oc_1>(""Concert"", ""2026-04-12"", ""20:00"", 150)<..."
4,create_calendar_entry,Create a calendar entry for Meeting with the d...,"(""Meeting with the design team"", ""2022-11-10"",...","Below is the query from the users, please choo...","<oc_1>(""Meeting with the design team"", ""2022-1..."
...,...,...,...,...,...
2995,list_calendar_entries,Which appointments do I still have on 21. July...,"(""2024-07-21"", True)","Below is the query from the users, please choo...","<oc_3>(""2024-07-21"", True)<oc_end>\nFunction d..."
2996,list_calendar_entries,Show all appointments on 15. February 2022,"(""2022-02-15"", False)","Below is the query from the users, please choo...","<oc_3>(""2022-02-15"", False)<oc_end>\nFunction ..."
2997,list_calendar_entries,Which entries are still in the calendar for 28...,"(""2025-10-28"", True)","Below is the query from the users, please choo...","<oc_3>(""2025-10-28"", True)<oc_end>\nFunction d..."
2998,list_calendar_entries,Which entries are still in the calendar for 01...,"(""2023-06-01"", True)","Below is the query from the users, please choo...","<oc_3>(""2023-06-01"", True)<oc_end>\nFunction d..."


In [None]:
training_data["text"] = training_data["prompt"] + training_data["completion"]
training_data["text"].iloc[2]

'Below is the query from the users, please choose the correct function and generate the parameters to call the function.\nQuery: Create a new calendar entry for Lecture on Middle High German on 23. January 2026 at 10:15 for 100 minutes\nResponse: <oc_1>("Lecture on Middle High German", "2026-01-23", "10:15", 100)<oc_end>\nFunction description: def create_calender_entry(title, date, time, duration):\n"""\nCreate a calendar entry with the specified details and call the calendar API.\n\nParameters:\ntitle (str): The title of the calendar entry.\ndate (str): The date of the calendar entry in the format \'YYYY-MM-DD\'.\ntime (str): The time of the calendar entry in the format \'HH:MM\'.\nduration (int): The duration of the calendar entry in minutes.\n\nReturns:\nbool: True if the calendar API call was successful, False otherwise.\n"""'

In [None]:
training_data.to_csv("english-dataset.csv", index=False, sep=";")

In [None]:
training_data

Unnamed: 0,function_name,command,parameters,prompt,completion,text
0,create_calendar_entry,Enter Training in the gym for the 18.06.2023 a...,"(""Training in the gym"", ""2023-06-18"", ""10:00"",...","Below is the query from the users, please choo...","<oc_1>(""Training in the gym"", ""2023-06-18"", ""1...","Below is the query from the users, please choo..."
1,create_calendar_entry,Add a calendar entry for Math exam on 15. Dece...,"(""Math exam"", ""2023-12-15"", ""13:00"", 60)","Below is the query from the users, please choo...","<oc_1>(""Math exam"", ""2023-12-15"", ""13:00"", 60)...","Below is the query from the users, please choo..."
2,create_calendar_entry,Create a new calendar entry for Lecture on Mid...,"(""Lecture on Middle High German"", ""2026-01-23""...","Below is the query from the users, please choo...","<oc_1>(""Lecture on Middle High German"", ""2026-...","Below is the query from the users, please choo..."
3,create_calendar_entry,Add a calendar entry for Concert on 12.04.2026...,"(""Concert"", ""2026-04-12"", ""20:00"", 150)","Below is the query from the users, please choo...","<oc_1>(""Concert"", ""2026-04-12"", ""20:00"", 150)<...","Below is the query from the users, please choo..."
4,create_calendar_entry,Create a calendar entry for Meeting with the d...,"(""Meeting with the design team"", ""2022-11-10"",...","Below is the query from the users, please choo...","<oc_1>(""Meeting with the design team"", ""2022-1...","Below is the query from the users, please choo..."
...,...,...,...,...,...,...
2995,list_calendar_entries,Which appointments do I still have on 21. July...,"(""2024-07-21"", True)","Below is the query from the users, please choo...","<oc_3>(""2024-07-21"", True)<oc_end>\nFunction d...","Below is the query from the users, please choo..."
2996,list_calendar_entries,Show all appointments on 15. February 2022,"(""2022-02-15"", False)","Below is the query from the users, please choo...","<oc_3>(""2022-02-15"", False)<oc_end>\nFunction ...","Below is the query from the users, please choo..."
2997,list_calendar_entries,Which entries are still in the calendar for 28...,"(""2025-10-28"", True)","Below is the query from the users, please choo...","<oc_3>(""2025-10-28"", True)<oc_end>\nFunction d...","Below is the query from the users, please choo..."
2998,list_calendar_entries,Which entries are still in the calendar for 01...,"(""2023-06-01"", True)","Below is the query from the users, please choo...","<oc_3>(""2023-06-01"", True)<oc_end>\nFunction d...","Below is the query from the users, please choo..."


In [None]:
import random
import string

def add_random_errors(text, error_rate=0.1):
    error_types = ['swap', 'delete', 'insert', 'replace']
    result = list(text)

    for i in range(len(result)):
        if random.random() < error_rate:
            error_type = random.choice(error_types)

            if error_type == 'swap' and i < len(result) - 1:
                result[i], result[i+1] = result[i+1], result[i]
            elif error_type == 'delete':
                result[i] = ''
            elif error_type == 'insert':
                result.insert(i, random.choice(string.ascii_lowercase))
            elif error_type == 'replace':
                result[i] = random.choice(string.ascii_lowercase)

    return ''.join(result)

In [None]:
add_random_errors(calendar_functions["create_calendar_entry"], 0.05)

'def crreate_calender_entry(thitle, date, time, duration):\n"""\nErstelft einen Kalendfereintrag mit ven angegebenen Details hnd ruft die alender-API auf.\n\nParameter:\ntitle (str): Der Tittel des Kalendereintrags.\noate (str): aDs Datum des Kalendereintrags im Formatc\'YYYY-MM-DD\'.\ntime (str): Die Ukhizeit des Kalendereintrags m Format \'HH:MM\'.\nduration (itn): Die Dauer eds Kalendereintragsi h Minuten.t\n\nRückgabe:\nbool: True, wenn hder Aufruf er Kalender-API erfolgreicn war, andrnfalls False.\n"""'

In [None]:
training_data_dpo = list()

In [None]:
for _ in range(619):
  min_year = 2022
  max_year = 2026
  language = "de"
  use_month_name = np.random.choice([True, False])
  leading_zero = np.random.choice([True, False])
  has_minutes = np.random.choice([True, False])
  only_minutes = np.random.choice([True, False])
  decimal_hours = np.random.choice([True, False])

  activity = np.random.choice(activities)
  date, parameter_date = generate_random_date(min_year=min_year, max_year=max_year, use_month_name=use_month_name, language=language)
  time, parameter_time = generate_random_time(leading_zero=leading_zero, has_minutes=has_minutes)
  duration, parameter_duration = generate_random_duration(only_minutes=only_minutes, decimal_hours=decimal_hours, language=language)

  command = np.random.choice(command_templates).format(activity=activity, date=date, time=time, duration=duration)

  parameters = "(" + "\"" + activity + "\"" + ", " + parameter_date + ", " + parameter_time + ", " + str(parameter_duration) + ")"

  training_data_dpo.append({
      "function_name": "create_calendar_entry",
      "command": command,
      "parameters": parameters
  })

for _ in range(381):
  min_year = 2022
  max_year = 2026
  language = "de"
  use_month_name = np.random.choice([True, False])
  leading_zero = np.random.choice([True, False])
  has_minutes = np.random.choice([True, False])

  timed_activity = np.random.choice(timed_activities)
  date, parameter_date = generate_random_date(min_year=min_year, max_year=max_year, use_month_name=use_month_name, language=language)
  time, parameter_time = generate_random_time(leading_zero=leading_zero, has_minutes=has_minutes)

  activity = timed_activity["activity"]
  parameter_duration = timed_activity["duration"]

  command = np.random.choice(command_templates_timed).format(timed_activity=timed_activity["timed_activity"], date=date, time=time, duration=duration)

  parameters = "(" + "\"" + activity + "\"" + ", " + parameter_date + ", " + parameter_time + ", " + str(parameter_duration) + ")"

  training_data_dpo.append({
      "function_name": "create_calendar_entry",
      "command": command,
      "parameters": parameters
  })

In [None]:
for _ in range(1000):
  entry_id = generate_short_uuid(10)
  delete_command_template = np.random.choice(delete_command_templates)
  command = delete_command_template.format(entry_id=entry_id)
  parameters = "(" + "\"" + entry_id + "\"" + ")"
  training_data_dpo.append({
      "function_name": "delete_calendar_entry",
      "command": command,
      "parameters": parameters
  })

In [None]:
for _ in range(1000):
  min_year = 2022
  max_year = 2026
  not_finished = np.random.choice([True, False])
  leading_zero = np.random.choice([True, False])
  use_month_name=np.random.choice([True, False])
  language="de"

  date, parameter_date = generate_random_date(min_year=min_year, max_year=max_year, use_month_name=use_month_name, language=language)

  if not_finished:
    command = np.random.choice(list_command_templates_not_finished).format(timed_activity=timed_activity["timed_activity"], date=date, time=time, duration=duration)
  else:
    command = np.random.choice(list_command_templates).format(timed_activity=timed_activity["timed_activity"], date=date, time=time, duration=duration)

  parameters = "(" + parameter_date + ", " + str(not_finished) + ")"

  training_data_dpo.append({
      "function_name": "list_calendar_entries",
      "command": command,
      "parameters": parameters
  })

In [None]:
training_data_dpo_df = pd.DataFrame(training_data_dpo)
training_data_dpo_df

Unnamed: 0,function_name,command,parameters
0,create_calendar_entry,Füge einen Kalendareintrag für Training im Fit...,"(""Training im Fitnessstudio"", ""2026-11-27"", ""2..."
1,create_calendar_entry,Trage Vorlesung Mittelhochdeutsch für den 14. ...,"(""Vorlesung Mittelhochdeutsch"", ""2023-02-14"", ..."
2,create_calendar_entry,Plane Vorbesprechung Bachelorarbeit am 25. Sep...,"(""Vorbesprechung Bachelorarbeit"", ""2025-09-25""..."
3,create_calendar_entry,Plane Weihnachtsessen bei meinen Eltern am 13....,"(""Weihnachtsessen bei meinen Eltern"", ""2026-10..."
4,create_calendar_entry,Erstelle einen Kalendareintrag für wöchentlich...,"(""wöchentliches Teammeeting"", ""2023-12-25"", ""2..."
...,...,...,...
2995,list_calendar_entries,Liste mir alle Kalendareinträge für den 13. Ok...,"(""2022-10-13"", False)"
2996,list_calendar_entries,Welche Termine stehen am 04.12.2023 noch für m...,"(""2023-12-04"", True)"
2997,list_calendar_entries,Zeige mir alle Kalendareinträge für den 07.12....,"(""2022-12-07"", False)"
2998,list_calendar_entries,Zeige alle Kalendareinträge am 19. August 2024 an,"(""2024-08-19"", False)"


In [None]:
training_data_dpo_df["prompt"] = training_data_dpo_df["command"].apply(lambda command: prompt_template.format(query=command))
training_data_dpo_df

Unnamed: 0,function_name,command,parameters,prompt
0,create_calendar_entry,Füge einen Kalendareintrag für Training im Fit...,"(""Training im Fitnessstudio"", ""2026-11-27"", ""2...","Unten steht ein Befehl des Benutzer, bitte wäh..."
1,create_calendar_entry,Trage Vorlesung Mittelhochdeutsch für den 14. ...,"(""Vorlesung Mittelhochdeutsch"", ""2023-02-14"", ...","Unten steht ein Befehl des Benutzer, bitte wäh..."
2,create_calendar_entry,Plane Vorbesprechung Bachelorarbeit am 25. Sep...,"(""Vorbesprechung Bachelorarbeit"", ""2025-09-25""...","Unten steht ein Befehl des Benutzer, bitte wäh..."
3,create_calendar_entry,Plane Weihnachtsessen bei meinen Eltern am 13....,"(""Weihnachtsessen bei meinen Eltern"", ""2026-10...","Unten steht ein Befehl des Benutzer, bitte wäh..."
4,create_calendar_entry,Erstelle einen Kalendareintrag für wöchentlich...,"(""wöchentliches Teammeeting"", ""2023-12-25"", ""2...","Unten steht ein Befehl des Benutzer, bitte wäh..."
...,...,...,...,...
2995,list_calendar_entries,Liste mir alle Kalendareinträge für den 13. Ok...,"(""2022-10-13"", False)","Unten steht ein Befehl des Benutzer, bitte wäh..."
2996,list_calendar_entries,Welche Termine stehen am 04.12.2023 noch für m...,"(""2023-12-04"", True)","Unten steht ein Befehl des Benutzer, bitte wäh..."
2997,list_calendar_entries,Zeige mir alle Kalendareinträge für den 07.12....,"(""2022-12-07"", False)","Unten steht ein Befehl des Benutzer, bitte wäh..."
2998,list_calendar_entries,Zeige alle Kalendareinträge am 19. August 2024 an,"(""2024-08-19"", False)","Unten steht ein Befehl des Benutzer, bitte wäh..."


In [None]:
training_data_dpo_df["completion"] = training_data_dpo_df.apply(format_completion_template, axis=1)
training_data_dpo_df

Unnamed: 0,function_name,command,parameters,prompt,completion
0,create_calendar_entry,Füge einen Kalendareintrag für Training im Fit...,"(""Training im Fitnessstudio"", ""2026-11-27"", ""2...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Training im Fitnessstudio"", ""2026-11-2..."
1,create_calendar_entry,Trage Vorlesung Mittelhochdeutsch für den 14. ...,"(""Vorlesung Mittelhochdeutsch"", ""2023-02-14"", ...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Vorlesung Mittelhochdeutsch"", ""2023-02..."
2,create_calendar_entry,Plane Vorbesprechung Bachelorarbeit am 25. Sep...,"(""Vorbesprechung Bachelorarbeit"", ""2025-09-25""...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Vorbesprechung Bachelorarbeit"", ""2025-..."
3,create_calendar_entry,Plane Weihnachtsessen bei meinen Eltern am 13....,"(""Weihnachtsessen bei meinen Eltern"", ""2026-10...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Weihnachtsessen bei meinen Eltern"", ""2..."
4,create_calendar_entry,Erstelle einen Kalendareintrag für wöchentlich...,"(""wöchentliches Teammeeting"", ""2023-12-25"", ""2...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""wöchentliches Teammeeting"", ""2023-12-2..."
...,...,...,...,...,...
2995,list_calendar_entries,Liste mir alle Kalendareinträge für den 13. Ok...,"(""2022-10-13"", False)","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2022-10-13"", False)<oc_end>\nFunktions..."
2996,list_calendar_entries,Welche Termine stehen am 04.12.2023 noch für m...,"(""2023-12-04"", True)","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2023-12-04"", True)<oc_end>\nFunktionsb..."
2997,list_calendar_entries,Zeige mir alle Kalendareinträge für den 07.12....,"(""2022-12-07"", False)","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2022-12-07"", False)<oc_end>\nFunktions..."
2998,list_calendar_entries,Zeige alle Kalendareinträge am 19. August 2024 an,"(""2024-08-19"", False)","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2024-08-19"", False)<oc_end>\nFunktions..."


In [None]:
completion_rejected_template = """{function_parameters}
Funktionsbeschreibung: {function_description}"""

In [None]:
def format_completion_rejected_template(row):
  function_name = row["function_name"]
  function_parameters = row["parameters_rejected"]
  function_description = add_random_errors(calendar_functions[function_name], 0.05)
  function_number = np.random.choice([i+1 for i in range(len(calendar_functions.keys()))])
  return completion_rejected_template.format(function_number=function_number, function_parameters=function_parameters, function_description=function_description)

In [None]:
training_data_dpo_df["completion_rejected"] = training_data_dpo_df.apply(format_completion_rejected_template, axis=1)
training_data_dpo_df

Unnamed: 0,function_name,command,parameters,prompt,completion,completion_rejected
0,create_calendar_entry,Füge einen Kalendareintrag für Training im Fit...,"(""Training im Fitnessstudio"", ""2026-11-27"", ""2...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Training im Fitnessstudio"", ""2026-11-2...","<oc_3>(""Training im Fitnessstudio"", ""2026-11-2..."
1,create_calendar_entry,Trage Vorlesung Mittelhochdeutsch für den 14. ...,"(""Vorlesung Mittelhochdeutsch"", ""2023-02-14"", ...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Vorlesung Mittelhochdeutsch"", ""2023-02...","<oc_2>(""Vorlesung Mittelhochdeutsch"", ""2023-02..."
2,create_calendar_entry,Plane Vorbesprechung Bachelorarbeit am 25. Sep...,"(""Vorbesprechung Bachelorarbeit"", ""2025-09-25""...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Vorbesprechung Bachelorarbeit"", ""2025-...","<oc_1>(""Vorbesprechung Bachelorarbeit"", ""2025-..."
3,create_calendar_entry,Plane Weihnachtsessen bei meinen Eltern am 13....,"(""Weihnachtsessen bei meinen Eltern"", ""2026-10...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Weihnachtsessen bei meinen Eltern"", ""2...","<oc_1>(""Weihnachtsessen bei meinen Eltern"", ""2..."
4,create_calendar_entry,Erstelle einen Kalendareintrag für wöchentlich...,"(""wöchentliches Teammeeting"", ""2023-12-25"", ""2...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""wöchentliches Teammeeting"", ""2023-12-2...","<oc_3>(""wöchentliches Teammeeting"", ""2023-12-2..."
...,...,...,...,...,...,...
2995,list_calendar_entries,Liste mir alle Kalendareinträge für den 13. Ok...,"(""2022-10-13"", False)","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2022-10-13"", False)<oc_end>\nFunktions...","<oc_2>(""2022-10-13"", False)<oc_end>\nFunktions..."
2996,list_calendar_entries,Welche Termine stehen am 04.12.2023 noch für m...,"(""2023-12-04"", True)","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2023-12-04"", True)<oc_end>\nFunktionsb...","<oc_3>(""2023-12-04"", True)<oc_end>\nFunktionsb..."
2997,list_calendar_entries,Zeige mir alle Kalendareinträge für den 07.12....,"(""2022-12-07"", False)","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2022-12-07"", False)<oc_end>\nFunktions...","<oc_2>(""2022-12-07"", False)<oc_end>\nFunktions..."
2998,list_calendar_entries,Zeige alle Kalendareinträge am 19. August 2024 an,"(""2024-08-19"", False)","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2024-08-19"", False)<oc_end>\nFunktions...","<oc_1>(""2024-08-19"", False)<oc_end>\nFunktions..."


In [None]:
training_data_dpo_df.to_csv("german-dataset-dpo-creation.csv", sep=";", index=False)

In [None]:
dpo_df = pd.read_csv("dpo-data.csv", sep=";")
dpo_df

Unnamed: 0,function_name,command,parameters,prompt,completion,completion_rejected,completion_parameters_rejected
0,create_calendar_entry,Füge einen Kalendareintrag für Training im Fit...,"(""Training im Fitnessstudio"", ""2026-11-27"", ""2...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Training im Fitnessstudio"", ""2026-11-2...","<oc_3>(""Training im Fitnessstudio"", ""2026-11-2...","Unten steht ein Befehl des Benutzer, bitte wäh..."
1,create_calendar_entry,Trage Vorlesung Mittelhochdeutsch für den 14. ...,"(""Vorlesung Mittelhochdeutsch"", ""2023-02-14"", ...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Vorlesung Mittelhochdeutsch"", ""2023-02...","<oc_2>(""Vorlesung Mittelhochdeutsch"", ""2023-02...","Unten steht ein Befehl des Benutzer, bitte wäh..."
2,create_calendar_entry,Plane Vorbesprechung Bachelorarbeit am 25. Sep...,"(""Vorbesprechung Bachelorarbeit"", ""2025-09-25""...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Vorbesprechung Bachelorarbeit"", ""2025-...","<oc_1>(""Vorbesprechung Bachelorarbeit"", ""2025-...","Unten steht ein Befehl des Benutzer, bitte wäh..."
3,create_calendar_entry,Plane Weihnachtsessen bei meinen Eltern am 13....,"(""Weihnachtsessen bei meinen Eltern"", ""2026-10...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Weihnachtsessen bei meinen Eltern"", ""2...","<oc_1>(""Weihnachtsessen bei meinen Eltern"", ""2...","Unten steht ein Befehl des Benutzer, bitte wäh..."
4,create_calendar_entry,Erstelle einen Kalendareintrag für wöchentlich...,"(""wöchentliches Teammeeting"", ""2023-12-25"", ""2...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""wöchentliches Teammeeting"", ""2023-12-2...","<oc_3>(""wöchentliches Teammeeting"", ""2023-12-2...","Unten steht ein Befehl des Benutzer, bitte wäh..."
...,...,...,...,...,...,...,...
2995,list_calendar_entries,Liste mir alle Kalendareinträge für den 13. Ok...,"(""2022-10-13"", False)","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2022-10-13"", False)<oc_end>\nFunktions...","<oc_2>(""2022-10-13"", False)<oc_end>\nFunktions...","Unten steht ein Befehl des Benutzer, bitte wäh..."
2996,list_calendar_entries,Welche Termine stehen am 04.12.2023 noch für m...,"(""2023-12-04"", True)","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2023-12-04"", True)<oc_end>\nFunktionsb...","<oc_3>(""2023-12-04"", True)<oc_end>\nFunktionsb...","Unten steht ein Befehl des Benutzer, bitte wäh..."
2997,list_calendar_entries,Zeige mir alle Kalendareinträge für den 07.12....,"(""2022-12-07"", False)","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2022-12-07"", False)<oc_end>\nFunktions...","<oc_2>(""2022-12-07"", False)<oc_end>\nFunktions...","Unten steht ein Befehl des Benutzer, bitte wäh..."
2998,list_calendar_entries,Zeige alle Kalendareinträge am 19. August 2024 an,"(""2024-08-19"", False)","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2024-08-19"", False)<oc_end>\nFunktions...","<oc_1>(""2024-08-19"", False)<oc_end>\nFunktions...","Unten steht ein Befehl des Benutzer, bitte wäh..."


In [None]:
def format_parameters_rejected(row):
  return row["completion_parameters_rejected"].replace(row["prompt"], "").strip()

dpo_df["parameters_rejected"] = dpo_df.apply(format_parameters_rejected, axis=1)
dpo_df["parameters_rejected"]

0       <oc_1>("Training inFitness Studio", "2026-11-2...
1       <oc_1>("Vorbesprechung Bachelorarbeit", "2026-...
2       <oc_1>("VorbeprechbungBachelorzeit", "2024-09-...
3       <oc_1>("Weihnachtssuch", "2025-10-13", "12:00"...
4       <oc_1>("wöschtragenen Termin mit ID b867c9bdae...
                              ...                        
2995                  <oc_3>("2025-10-13", False)<oc_end>
2996                   <oc_3>("2026-12-05", True)<oc_end>
2997                  <oc_3>("2026-12-07", False)<oc_end>
2998                  <oc_3>("2024-08-19", False)<oc_end>
2999                  <oc_3>("2026-06-05", False)<oc_end>
Name: parameters_rejected, Length: 3000, dtype: object

In [None]:
dpo_df["completion_rejected"] = dpo_df.apply(format_completion_rejected_template, axis=1)
dpo_df

Unnamed: 0,function_name,command,parameters,prompt,completion,completion_rejected,completion_parameters_rejected,parameters_rejected
0,create_calendar_entry,Füge einen Kalendareintrag für Training im Fit...,"(""Training im Fitnessstudio"", ""2026-11-27"", ""2...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Training im Fitnessstudio"", ""2026-11-2...","<oc_1>(""Training inFitness Studio"", ""2026-11-2...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Training inFitness Studio"", ""2026-11-2..."
1,create_calendar_entry,Trage Vorlesung Mittelhochdeutsch für den 14. ...,"(""Vorlesung Mittelhochdeutsch"", ""2023-02-14"", ...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Vorlesung Mittelhochdeutsch"", ""2023-02...","<oc_1>(""Vorbesprechung Bachelorarbeit"", ""2026-...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Vorbesprechung Bachelorarbeit"", ""2026-..."
2,create_calendar_entry,Plane Vorbesprechung Bachelorarbeit am 25. Sep...,"(""Vorbesprechung Bachelorarbeit"", ""2025-09-25""...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Vorbesprechung Bachelorarbeit"", ""2025-...","<oc_1>(""VorbeprechbungBachelorzeit"", ""2024-09-...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""VorbeprechbungBachelorzeit"", ""2024-09-..."
3,create_calendar_entry,Plane Weihnachtsessen bei meinen Eltern am 13....,"(""Weihnachtsessen bei meinen Eltern"", ""2026-10...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Weihnachtsessen bei meinen Eltern"", ""2...","<oc_1>(""Weihnachtssuch"", ""2025-10-13"", ""12:00""...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""Weihnachtssuch"", ""2025-10-13"", ""12:00""..."
4,create_calendar_entry,Erstelle einen Kalendareintrag für wöchentlich...,"(""wöchentliches Teammeeting"", ""2023-12-25"", ""2...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""wöchentliches Teammeeting"", ""2023-12-2...","<oc_1>(""wöschtragenen Termin mit ID b867c9bdae...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_1>(""wöschtragenen Termin mit ID b867c9bdae..."
...,...,...,...,...,...,...,...,...
2995,list_calendar_entries,Liste mir alle Kalendareinträge für den 13. Ok...,"(""2022-10-13"", False)","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2022-10-13"", False)<oc_end>\nFunktions...","<oc_3>(""2025-10-13"", False)<oc_end>\nFunktions...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2025-10-13"", False)<oc_end>"
2996,list_calendar_entries,Welche Termine stehen am 04.12.2023 noch für m...,"(""2023-12-04"", True)","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2023-12-04"", True)<oc_end>\nFunktionsb...","<oc_3>(""2026-12-05"", True)<oc_end>\nFunktionsb...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2026-12-05"", True)<oc_end>"
2997,list_calendar_entries,Zeige mir alle Kalendareinträge für den 07.12....,"(""2022-12-07"", False)","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2022-12-07"", False)<oc_end>\nFunktions...","<oc_3>(""2026-12-07"", False)<oc_end>\nFunktions...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2026-12-07"", False)<oc_end>"
2998,list_calendar_entries,Zeige alle Kalendareinträge am 19. August 2024 an,"(""2024-08-19"", False)","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2024-08-19"", False)<oc_end>\nFunktions...","<oc_3>(""2024-08-19"", False)<oc_end>\nFunktions...","Unten steht ein Befehl des Benutzer, bitte wäh...","<oc_3>(""2024-08-19"", False)<oc_end>"


In [None]:
dpo_df["completion_rejected"].iloc[0]

'<oc_1>("Training inFitness Studio", "2026-11-28", "20:00", 21)<oc_end>\nFunktionsbeschreibung: sdef create_calendear_eqtry(title, date, tinme, duration):\n"""\nErstellt einen Kalendereintrag mit den angegebenen Detailsund ruft die Kalender-API auf.\n\nParameter:r\ntitle (str): er Titel qdes Kalynderentrags.\ndate (str): Das Datum des Kalendereintrags im Formt \'zYYYY-MM-DD\'.\ntim ie(str): Die Uhrzeit des Kalendereintrags im Foqmat \'HH:MM\'.\ndurbtion (int): Die Dauer des alenderlintrags in iMnuten.\n\nRgckgabe:\nboolc True, wenn der Aufruf der Kalender-API erfolgrevich war, andernfalls False.\n"""'

In [None]:
dpo_df["completion"].iloc[0]

'<oc_1>("Training im Fitnessstudio", "2026-11-27", "20:00", 210)<oc_end>\nFunktionsbeschreibung: def create_calender_entry(title, date, time, duration):\n"""\nErstellt einen Kalendereintrag mit den angegebenen Details und ruft die Kalender-API auf.\n\nParameter:\ntitle (str): Der Titel des Kalendereintrags.\ndate (str): Das Datum des Kalendereintrags im Format \'YYYY-MM-DD\'.\ntime (str): Die Uhrzeit des Kalendereintrags im Format \'HH:MM\'.\nduration (int): Die Dauer des Kalendereintrags in Minuten.\n\nRückgabe:\nbool: True, wenn der Aufruf der Kalender-API erfolgreich war, andernfalls False.\n"""'

In [None]:
dpo_dataset = dpo_df[["prompt", "completion", "completion_rejected"]]

In [None]:
dpo_dataset.to_csv("german-dpo-dataset.csv", sep=";", index=False)