In [None]:
import sys
import os
import random
import re
import pandas as pd
from tqdm import tqdm
from langchain_ollama import OllamaLLM
from langchain.prompts import PromptTemplate

# Check if running in Colab
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    # Colab-specific setup
    !pip install langchain langchain_ollama pandas tqdm
    !git clone https://github.com/ostinsolo/Audio-Engineer-Sound-Design-LLM.git
    %cd Audio-Engineer-Sound-Design-LLM/ai_gen
else:
    # Local system setup
    # Check if we're in the correct directory, if not, change to it
    if not os.path.exists('ableton_data.py'):
        os.chdir('/Users/ostinsolo/Documents/Code/Audio-Engineer-Sound-Design-LLM/ai_gen')
    
    # Create and activate a virtual environment named 'aigen'
    !python -m venv aigen
    if sys.platform == "win32":
        !aigen\Scripts\activate
    else:
        !source aigen/bin/activate
    
    # Install required packages in the virtual environment
    !pip install langchain langchain_ollama pandas tqdm

# Add the current directory to Python's path
sys.path.append('.')

# Load the Ableton data
from ableton_data import *

# Load the Ollama LLM
llm = OllamaLLM(model="llama3.2")

# Define the prompt template
prompt_template = PromptTemplate(
    input_variables=["ableton_data"],
    template="""
Generate a unique utterance for an Ableton Live task and its corresponding action order. Use the provided Ableton data to ensure relevance and accuracy.

Ableton Data:
{ableton_data}

Format the output as follows:
Utterance: [insert utterance here]
Action Order: ["step 1", "step 2", ...]

Rules:
1. Track-related actions always start with "track {track_number}" unless it's a global action.
2. Device-related actions always start with "search device" followed by the device name.
3. For instruments, the order is: "search device", "{instrument}", "{device_type}", then the action.
4. Audio effects are treated separately from instruments.
5. Control actions include one of the speed modifiers, followed by a value between 0 and 100.
6. When creating a track, don't mention a track number (it doesn't exist yet).
7. Use one of the common actions in the utterance.
8. You can use or adapt one of the utterance templates, or create a new utterance based on the provided data.

Now generate a new, unique utterance and action order:
"""
)

def extract_utterance_and_action(response):
    utterance_match = re.search(r'Utterance: (.+)', response)
    action_order_match = re.search(r'Action Order: (\[.+\])', response)
    
    utterance = utterance_match.group(1) if utterance_match else None
    action_order = eval(action_order_match.group(1)) if action_order_match else None
    
    return utterance, action_order

def generate_utterance_and_action():
    # Randomly select elements from the Ableton data to encourage variety
    audio_effect = random.choice(audio_effects)
    instrument = random.choice(list(device_types.keys()))
    device_type = random.choice(device_types[instrument])
    action_category = random.choice(list(actions.keys()))
    action = random.choice(actions[action_category])
    template = random.choice(utterance_templates)
    
    # Correctly handle speed modifiers
    speed_modifier_category = random.choice(list(actions['speed_modifiers'].keys()))
    speed_modifier = random.choice(actions['speed_modifiers'][speed_modifier_category])
    
    # Generate random track numbers
    track_number = random.randint(1, 8)  # Assuming a maximum of 8 tracks
    track_number1 = random.randint(1, 8)
    track_number2 = random.randint(1, 8)
    while track_number2 == track_number1:
        track_number2 = random.randint(1, 8)
    
    # Generate a random clip number
    clip_number = random.randint(1, 16)  # Assuming a maximum of 16 clips per track
    
    # Generate a random value
    value = random.randint(0, 100)
    
    # Generate a random number for mapping
    map_number = random.randint(1, 128)  # Assuming a maximum of 128 mappable controls
    
    # Create a simplified version of the Ableton data to pass to the AI
    simplified_data = {
        'track_number': track_number,
        'track_number1': track_number1,
        'track_number2': track_number2,
        'clip_number': clip_number,
        'value': value,
        'number': map_number,
        'audio_effect': audio_effect,
        'instrument': instrument,
        'device_type': device_type,
        'action': action,
        'template': template,
        'speed_modifier': speed_modifier,
        'track_type': random.choice(track_types),
        'device_name': random.choice(audio_effects + list(device_types.keys())),
        'parameter': "some_parameter",  # You might want to define a list of parameters in ableton_data.py
        'project_action': random.choice(actions['project_actions']),
        'track_action': random.choice(actions['track_actions']),
        'clip_action': random.choice(actions['clip_actions']),
        'value_action': random.choice(actions['value_actions']),
        'view_action': random.choice(actions['view_actions']),
        'mapping_action': random.choice(actions['mapping_actions']),
        'action_order_templates': action_order_templates
    }
    
    prompt = prompt_template.format(ableton_data=str(simplified_data))
    output = llm(prompt)
    return extract_utterance_and_action(output)

# Generate the data
num_generations = 100  # You can adjust this number
generated_data = []

for _ in tqdm(range(num_generations), desc="Generating utterances and actions"):
    utterance, action_order = generate_utterance_and_action()
    if utterance and action_order:
        generated_data.append({"Utterance": utterance, "Action_Order": action_order})

print(f"Generated {len(generated_data)} utterances and action orders.")


In [None]:
# Enable tqdm for pandas operations
tqdm.pandas()

def create_csv_from_ai_output(input_data, output_file):
    # Convert the input data to a pandas DataFrame
    df = pd.DataFrame(input_data)
    
    # Convert the Action_Order list to a comma-separated string
    df['Action_Order'] = df['Action_Order'].progress_apply(lambda x: ', '.join(x))
    
    # Write the DataFrame to a CSV file
    df.to_csv(output_file, index=False)
    print(f"CSV file '{output_file}' has been created successfully.")

# Create the CSV file
output_file = 'ableton_utterances_and_actions.csv'
create_csv_from_ai_output(generated_data, output_file)