In [None]:
import sys
import os
import random
import re
import json
import logging
import pandas as pd
from tqdm import tqdm
from langchain_ollama import OllamaLLM
from langchain.prompts import PromptTemplate

# Ensure we're in the correct directory
if not os.path.exists('ableton_data.py'):
    script_dir = os.path.dirname(os.path.abspath(__file__))
    os.chdir(script_dir)

# Add the current directory to Python's path
sys.path.append('.')

# Load the Ableton data
from ableton_data import *

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load the Ollama LLM
llm = OllamaLLM(model="llama3.2")

# Define the prompt template
prompt_template = PromptTemplate(
    input_variables=["placeholders", "examples"],
    template="""
Generate a unique utterance for an Ableton Live task and its corresponding action order based on the provided placeholders. Use the same placeholder values in both the utterance and the action order.

Examples:

{examples}

Placeholders:
{placeholders}

Format the output as follows:
Utterance: [insert utterance here]
Action Order: ["step 1", "step 2", ...]

Now generate the utterance and action order.
"""
)

def format_placeholders(placeholders):
    return '\n'.join(f"- {key}: \"{value}\"" for key, value in placeholders.items())

examples_list = [
    {
        'placeholders': {
            'track_creation_action': 'add',
            'track_type': 'MIDI'
        },
        'utterance': 'Add a MIDI track',
        'action_order': '["add", "MIDI"]'
    },
    {
        'placeholders': {
            'track_number': 2,
            'track_action': 'mute'
        },
        'utterance': 'Mute track 2',
        'action_order': '["track 2", "mute"]'
    },
    # Add more examples as needed
]

def format_examples(examples):
    formatted_examples = ""
    for example in examples:
        placeholders_str = '\n'.join(f"- {key}: \"{value}\"" for key, value in example['placeholders'].items())
        formatted_examples += f"""
Example:
Placeholders:
{placeholders_str}

Utterance: "{example['utterance']}"
Action Order: {example['action_order']}
"""
    return formatted_examples

def extract_utterance_and_action(response):
    utterance_match = re.search(r'Utterance:\s*(.+)', response)
    action_order_match = re.search(r'Action Order:\s*(\[[^\]]*\])', response)
    
    utterance = utterance_match.group(1).strip() if utterance_match else None
    action_order_str = action_order_match.group(1).strip() if action_order_match else None
    
    try:
        action_order = json.loads(action_order_str.replace("'", '"'))
    except (json.JSONDecodeError, TypeError):
        action_order = None
    
    return utterance, action_order

def generate_utterance_and_action():
    # Randomly select elements from the Ableton data
    audio_effect = random.choice(audio_effects)
    instrument = random.choice(list(device_types.keys()))
    device_type = random.choice(device_types[instrument]) if device_types[instrument] else ''

    # Select actions and modifiers
    track_creation_action = random.choice(actions['track_creation_actions'])
    track_action = random.choice(actions['track_actions'])
    project_action = random.choice(actions['project_actions'])
    clip_action = random.choice(actions['clip_actions'])
    value_action = random.choice(actions['value_actions'])
    view_action = random.choice(actions['view_actions'])
    mapping_action = random.choice(actions['mapping_actions'])
    
    speed_modifier_category = random.choice(list(actions['speed_modifiers'].keys()))
    speed_modifier = random.choice(actions['speed_modifiers'][speed_modifier_category])
    
    # Randomly select a template
    template = random.choice(utterance_templates)
    
    # Generate random track numbers and other values
    track_number = random.randint(1, 8)
    track_number1 = random.randint(1, 8)
    track_number2 = random.randint(1, 8)
    while track_number2 == track_number1:
        track_number2 = random.randint(1, 8)
    
    clip_number = random.randint(1, 16)
    value = random.randint(0, 100)
    map_number = random.randint(1, 128)
    track_type = random.choice(track_types)
    device_name = random.choice(audio_effects + list(device_types.keys()))
    parameter = random.choice(parameters)
    
    # Create placeholders dictionary
    placeholders = {
        'track_number': track_number,
        'track_number1': track_number1,
        'track_number2': track_number2,
        'clip_number': clip_number,
        'value': value,
        'number': map_number,
        'audio_effect': audio_effect,
        'instrument': instrument,
        'device_type': device_type,
        'track_creation_action': track_creation_action,
        'track_action': track_action,
        'project_action': project_action,
        'clip_action': clip_action,
        'value_action': value_action,
        'view_action': view_action,
        'mapping_action': mapping_action,
        'speed_modifier': speed_modifier,
        'track_type': track_type,
        'device_name': device_name,
        'parameter': parameter
    }
    
    # Format placeholders
    placeholders_str = format_placeholders(placeholders)
    
    # Prepare examples
    examples = format_examples(examples_list)
    
    # Prepare the prompt
    prompt = prompt_template.format(placeholders=placeholders_str, examples=examples)
    
    # Invoke the LLM
    output = llm(prompt)
    
    # Extract the utterance and action order
    utterance, action_order = extract_utterance_and_action(output)
    
    if not utterance or not action_order:
        logger.warning(f"Failed to extract utterance or action order from output: {output}")
        return None, None
    
    return utterance, action_order

# Generate the data
num_generations = 100  # Adjust as needed
generated_data = []

for _ in tqdm(range(num_generations), desc="Generating utterances and actions"):
    utterance, action_order = generate_utterance_and_action()
    if utterance and action_order:
        generated_data.append({"Utterance": utterance, "Action_Order": action_order})

print(f"Generated {len(generated_data)} utterances and action orders.")


In [None]:
# Enable tqdm for pandas operations
tqdm.pandas()

def create_csv_from_ai_output(input_data, output_file):
    # Convert the input data to a pandas DataFrame
    df = pd.DataFrame(input_data)
    
    # Convert the Action_Order list to a comma-separated string
    df['Action_Order'] = df['Action_Order'].progress_apply(lambda x: ', '.join(x))
    
    # Write the DataFrame to a CSV file
    df.to_csv(output_file, index=False)
    print(f"CSV file '{output_file}' has been created successfully.")

# Create the CSV file
output_file = 'ableton_utterances_and_actions.csv'
create_csv_from_ai_output(generated_data, output_file)