# Generating Synthetic Data with LOGO-Programs, Descriptions and the respective Graphics

In [1]:
# load
from _1_logo_pseudo_code_generator import generateLOGOPseudoCode
from _2_sampler import LOGOProgramSampler
from _3_executable_logo_primitives import ReGALLOGOPrimitives
from _4_logo_graphic_generator import PseudoProgramInterpreter

generator=generateLOGOPseudoCode()
sampler=LOGOProgramSampler(generator)
interpreter = PseudoProgramInterpreter()

import json
import pandas as pd
import os

In [2]:
#LOGO
train_logo_data = "logo_data/python/train_200_dataset.jsonl"
test_logo_data = "logo_data/python/test_dataset.jsonl"

# Load train and test dataset
with open(f"../external/dependencies/{train_logo_data}", 'r') as f:
    train_data = [json.loads(line) for line in f]

with open(f"../external/dependencies/{test_logo_data}", 'r') as f:
    test_data = [json.loads(line) for line in f]

In [3]:
# Extract descriptions and programs from train_data and transform into pandas DataFrame
def extract_descriptions_and_programs(data):
    extracted_data = []
    for item in data:
        description = None
        program = None
        for message in item.get('messages', []):
            if message['from'] == 'human':
                description = message['value']
            elif message['from'] == 'gpt':
                program = message['value']
        if description and program:
            extracted_data.append([description, program])
    extracted_data = pd.DataFrame(extracted_data, columns=['Description', 'Program'])
    return extracted_data

# Extract descriptions and programs from train_data
df_train = extract_descriptions_and_programs(train_data)
df_test = extract_descriptions_and_programs(test_data)

#display(df_train)
#display(df_test)

# Append the test data to the train data
df_all = pd.concat([df_train, df_test], ignore_index=True)
#display(df_all)


#####################
# Drop all duplicate rows
df_all = df_all.drop_duplicates(subset=['Description', 'Program'])
#####################
display(df_all)

Unnamed: 0,Description,Program
0,4 concentric square s,"for i in range(5):\n embed(""""""for j in rang..."
1,6 sided snowflake with a medium line and a med...,"for j in range(6):\n embed(""""""forward(8)\nl..."
2,5 sided snowflake with a medium line and a sma...,"for j in range(5):\n embed(""""""forward(8)\nl..."
3,6 short line s in a row,"for j in range(6):\n embed(""""""forward(2)\nl..."
4,a small triangle connected by a big line to a ...,for i in range(3):\n forward(2)\n left(1...
...,...,...
306,8 sided snowflake with a medium circle and a s...,"for j in range(8):\n embed(""""""penup()\nforw..."
307,5 sided snowflake with 2 small circle s as arms,"for j in range(5):\n embed(""""""penup()\nforw..."
308,3 sided snowflake with a small square and a sm...,"for j in range(3):\n embed(""""""penup()\nforw..."
309,5 sided snowflake with a small 5 gon and a sma...,"for j in range(5):\n embed(""""""penup()\nforw..."


In [4]:
# Generator 
concentric = generator.concentric_semicircle(4, semicircle=False, left=True)
print(concentric)
print("\n")
seq_start_line = generator.shape_sequence([generator.generate_line(2),generator.generate_polygon(2, 4, left=True)])
print(seq_start_line)
print("\n")
seq_start_space=generator.shape_sequence([generator.generate_space(2),generator.generate_polygon(2, 4, left=True)])
print(seq_start_space)

for j in range(4):
    for i in range(180):
        forward(0.03490481287456702 * j)
        left(1.0)
    for i in range(180):
        forward(0.03490481287456702 * j)
        left(1.0)


forward(2)
for i in range(2):
    forward(4)
    left(180.0)


penup()
forward(2)
pendown()
for i in range(2):
    forward(4)
    left(180.0)


In [22]:
interpreter.reset_state()
concentric = interpreter.execute(concentric)
interpreter.save_graphics("concentric.png")

In [None]:
# Sampler
sampled_data = sampler.sample(5)

# Print the synthetic data
for item in sampled_data:
    print(f"Program: {item['Program']}")
    print(f"Description: {item['Description']}")
    print()

In [14]:
# TEST 
# Create directory for saving graphics
output_dir = "logo_graphic/11testshapes"
os.makedirs(output_dir, exist_ok=True)

# create a test dataset form the all_data
test_indices = [98, 44, 100, 99, 200, 212, 214, 201, 53, 54, 282]
df_test_subset = df_all.loc[test_indices].reset_index(drop=True)
display(df_test_subset)

# Load dataset and generate graphics

for index, row in df_test_subset.iterrows():
    human_message = row['Description']
    gpt_message = row['Program']
    filename = f"{index}_{human_message.replace(' ', '_')}.png"
    filepath = os.path.join(output_dir, filename)
    
    interpreter.reset_state()

    # Execute program and save the graphic
    interpreter.execute(gpt_message)
    interpreter.save_graphics(filepath)

Unnamed: 0,Description,Program
0,a medium 8 gon,for i in range(8):\n forward(4)\n left(4...
1,a small 7 gon separated by a big space from a ...,for i in range(7):\n forward(2)\n left(5...
2,8 concentric circle s,for j in range(9):\n for i in range(HALF_IN...
3,5 short line s in a row,"for j in range(5):\n embed(""""""forward(2)\nl..."
4,a greek spiral with 7 turns,for i in range(8):\n forward(1*i)\n left...
5,a 3 stepped staircase,for i in range(3):\n forward(2)\n left(9...
6,a 3 stepped zigzag,forward(0)\nleft(45.0)\n\nfor i in range(3):\n...
7,a 7 pointed star,for i in range(7):\n forward(16)\n left(...
8,6 sided snowflake with a small square as arms,"for j in range(6):\n embed(""""""for i in rang..."
9,5 sided snowflake with a medium line and a sma...,"for j in range(5):\n embed(""""""forward(8)\nl..."


In [10]:
print(synthetic_data["Description"].iloc[1])
print(synthetic_data["Program"].iloc[1])

7 big 6-gon in a row
for j in range(7):
    embed("""for i in range(6):
    forward(20)
    left(60.0)""", locals())
    penup()
    forward(2)
    left(0.0)

    pendown()


In [7]:
# Synthetic data
synthetic_data = sampler.sample(25)
synthetic_data = pd.DataFrame(synthetic_data, columns=['Description', 'Program'])
display(synthetic_data)

Unnamed: 0,Description,Program
0,connected sequence of shapes: a medium triangl...,for i in range(3):\n forward(4)\n left(1...
1,7 big 6-gon in a row,"for j in range(7):\n embed(""""""for i in rang..."
2,a 8 sided snowflake with arms of separated seq...,"for j in range(8):\n embed(""""""penup()\nforw..."
3,"separated sequence of shapes: a medium 5-gon, ...",for i in range(5):\n forward(4)\n left(7...
4,a 6 sided snowflake with an arm of a big 8-gon,"for j in range(6):\n embed(""""""for i in rang..."
5,4 big 9-gon in a row,"for j in range(4):\n embed(""""""for i in rang..."
6,a 5 sided snowflake with arms of connected seq...,"for j in range(5):\n embed(""""""for i in rang..."
7,5 concentric circles,for j in range(5):\n for i in range(180):\n...
8,a 8 sided snowflake with arms of connected seq...,"for j in range(8):\n embed(""""""for i in rang..."
9,1 small 5-gon in a row,"for j in range(1):\n embed(""""""for i in rang..."


In [8]:
# Synthetic data
output_dir = "logo_graphic/synthetic"
os.makedirs(output_dir, exist_ok=True)

# Generate and save graphics
for index, row in synthetic_data.iterrows():
    description = row['Description']
    program = row['Program']
    filename = f"{index}_{description.replace(' ', '_')}.png"
    filepath = os.path.join(output_dir, filename)
    
    interpreter.reset_state()
    
    # Execute program and save the graphic
    interpreter.execute(program)
    interpreter.save_graphics(filepath)

In [None]:
# TEST 
# Create directory for saving graphics
output_dir = "logo_graphic/11testshapes"
os.makedirs(output_dir, exist_ok=True)

# create a test dataset form the all_data
test_indices = [98, 44, 100, 99, 200, 212, 214, 201, 53, 54, 282]
df_test_subset = df_all.loc[test_indices].reset_index(drop=True)
#display(df_test_subset)

# Load dataset and generate graphics

for index, row in df_test_subset.iterrows():
    description = row['Description']
    program = row['Program']
    filename = f"{index}_{description.replace(' ', '_')}.png"
    filepath = os.path.join(output_dir, filename)
    
    interpreter.reset_state()

    # Execute program and save the graphic
    interpreter.execute(program)
    interpreter.save_graphics(filepath)

In [9]:
# ReGAL DATA GRAPHICS
# Create directory for saving graphics
output_dir = "logo_graphic/train200_test"
os.makedirs(output_dir, exist_ok=True)
#df_all_test=df_all.head(10)

# Load dataset and generate graphics
interpreter = PseudoProgramInterpreter()

for index, row in df_all.iterrows():
    human_message = row['Description']
    gpt_message = row['Program']
    filename = f"{index}_{human_message.replace(' ', '_')}.png"
    filepath = os.path.join(output_dir, filename)
    
    interpreter.reset_state()

    # Execute program and save the graphic
    interpreter.execute(gpt_message)
    interpreter.save_graphics(filepath)