In [2]:
import json
import pandas as pd
import re

In [3]:
#LOGO
train_logo_data = "logo_data/python/train_200_dataset.jsonl"
test_logo_data = "logo_data/python/test_dataset.jsonl"

# Load train and test dataset
with open(f"external/dependencies/{train_logo_data}", 'r') as f:
    train_data = [json.loads(line) for line in f]

with open(f"external/dependencies/{test_logo_data}", 'r') as f:
    test_data = [json.loads(line) for line in f]

In [4]:
# Extract descriptions and programs from train_data and transform into pandas DataFrame
def extract_descriptions_and_programs(data):
    extracted_data = []
    for item in data:
        description = None
        program = None
        for message in item.get('messages', []):
            if message['from'] == 'human':
                description = message['value']
            elif message['from'] == 'gpt':
                program = message['value']
        if description and program:
            extracted_data.append([description, program])
    extracted_data = pd.DataFrame(extracted_data, columns=['Description', 'Program'])
    return extracted_data

# Extract descriptions and programs from train_data
df_train = extract_descriptions_and_programs(train_data)
df_test = extract_descriptions_and_programs(test_data)

#display(df_train)
#display(df_test)

# Append the test data to the train data
df_all = pd.concat([df_train, df_test], ignore_index=True)
#display(df_all)


#####################
# Drop all duplicate rows
df_all = df_all.drop_duplicates(subset=['Description', 'Program'])
#####################
display(df_all)


Unnamed: 0,Description,Program
0,4 concentric square s,"for i in range(5):\n embed(""""""for j in rang..."
1,6 sided snowflake with a medium line and a med...,"for j in range(6):\n embed(""""""forward(8)\nl..."
2,5 sided snowflake with a medium line and a sma...,"for j in range(5):\n embed(""""""forward(8)\nl..."
3,6 short line s in a row,"for j in range(6):\n embed(""""""forward(2)\nl..."
4,a small triangle connected by a big line to a ...,for i in range(3):\n forward(2)\n left(1...
...,...,...
306,8 sided snowflake with a medium circle and a s...,"for j in range(8):\n embed(""""""penup()\nforw..."
307,5 sided snowflake with 2 small circle s as arms,"for j in range(5):\n embed(""""""penup()\nforw..."
308,3 sided snowflake with a small square and a sm...,"for j in range(3):\n embed(""""""penup()\nforw..."
309,5 sided snowflake with a small 5 gon and a sma...,"for j in range(5):\n embed(""""""penup()\nforw..."


#### Synthetic creation of new Programs

The generated code is in the same pseudo-code format as in the original train and test data. So, it is of type string and can't be execued printing a graphic.

In [None]:
# Top-level grammar rules for LOGO
<Program> ::= <Position> | <Shape-Sequence> | <SpecialShape> | <Snowflake> 
<Shape-Sequence> ::= <Shape> | <Shape> " & " <Shape-Sequence> | <Space> " & " <Shape-Sequence>
<Sub-Program> ::= <Connected-Sub-Program> | <Separated-Sub-Program>
<Connected-Sub-Program> ::= "embed("  <Shape-Sequence> ", locals())"
<Separated-Sub-Program> ::= "embed(" <Space> " & "  <Shape-Sequence> ", locals())"

## POSITION
<Position> ::= <InARow> | <Concentric>
<InARow> ::= "for j in range(" <N-Times> "): " <Sub-Program>
<Concentric> ::= "for j in range(" <N-Times> "): " <Shape> 
            NOTE: "forward(" <Length> | <Size>" * " <N-Times> ")" # parameter within sub-program

## SHAPE
<Shape> ::= <Line> | <Polygon> | <Semicircle> | <Circle> 
<Line> ::= "forward(" <Length> ") left(" <Angle> ")" | "forward(" <Length> ")
<Polygon> ::= "for i in range(" <Sides> "): forward(" <Length> ") left(" <Angle> = (360/ <Sides>)")" 
<Semicircle> ::= "for i in range(HALF_INF): forward(" <Size> ") left(EPS_ANGLE)"
<Circle> ::= <Semicircle> <Semicircle>

## SPECIAL SHAPE
<SpecialShape> ::= <GreekSpiral> | <Staircase> | <Zigzag> | <Star>
<GreekSpiral> ::= "for i in range(" <Sides> "): forward(1 * " <Sides> ") left(90.0)"
<Staircase> ::= "for i in range(" <N-Times> "): forward(" <Length> ") left(90.0) forward(" <Length> ") left(90.0) forward(0) left(180.0)"
<Zigzag> ::= "for i in range(" <N-Times> "): forward(" <Length> ") left(90.0) forward(" <Length> ") left(270.0)"
<Star> ::= "for i in range(" <Sides> "): forward(16) left(" <Angle> = (360/ <Sides>)")"

## SPACE
<Space> ::= "penup() " <Line> " pendown()"

## SNOWFLAKE
<Snowflake> ::= "for j in range(" <Sides> "): " <Sub-Program> 
                "forward(0) left(360 / " <Sides> ")"

## Parameters
<Sides> ::= "3" | "5" | "6" | "7" | "8"

<Length> ::= "2" | "4" | "20"          # short/small, medium, big
<Size> ::= "EPS_DIST" | "EPS_DIST * 2" # small, medium applying to (semi)circle

<Angle> ::= <Digit> "." <Digit>
<Sides> ::= "3" | "4" | "5" | "6" | "7" | "8" | "9"
<N-Times> ::= <Digit> 
<Digit> ::= "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"

In [5]:
import re
class generateLOGOPseudoCode():
    def __init__(self):
        pass

#Basic Shapes
    def generate_line(self, length: int, angle: float = 0.0, left: bool = True) -> str:
        if not (0.0 <= angle <= 360.0):
            raise ValueError("Angle must be between 0.0 and 360.0")
        if angle == 0:
            return f"forward({length})"
        else:
            direction = "left" if left else "right"
            return f"forward({length})\n{direction}({angle})"
            
    def generate_polygon(self, sides: int, length: int, left: bool = True) -> str:
        angle = 360 / sides
        direction = "left" if left else "right"
        return f"for i in range({sides}):\n    forward({length})\n    {direction}({angle})"

    def generate_semicircle(self, size: int, semicircle: bool = True, left: bool = True) -> str:
        EPS_ANGLE = 90.0
        EPS_DIST = 2  # Using the defined EPS_DIST value
        HALF_INF = 180
        size = EPS_DIST * size

        direction = "left" if left else "right"
        semicircle_code = f"for i in range({HALF_INF}):\n    forward({size})\n    {direction}({EPS_ANGLE})"
        
        if semicircle:
            return semicircle_code
        else:  # circle
            return f"{semicircle_code}\n{semicircle_code}"
            
        
#Special Shapes            
    def generate_greek_spiral(self, size: int, left: bool = True) -> str:
        if not (5 <= size <= 9):
            raise ValueError("A greek-spiral must have at least 5 turns")
        direction = "left" if left else "right"
        return f"for i in range({size}):\n    forward(1 * {size})\n    {direction}(90.0)"

    def generate_staircase(self, n_times: int, length: int, left: bool = True) -> str:
        direction = "left" if left else "right"
        return f"for i in range({n_times}):\n    forward({length})\n    {direction}(90.0)\n    forward({length})\n    {direction}(90.0)\n    forward(0)\n    {direction}(180.0)"

        
    def generate_zigzag(self, n_times: int, length: int, left: bool = True) -> str:
        direction = "left" if left else "right"
        return f"for i in range({n_times}):\n    forward({length})\n    {direction}(90.0)\n    forward({length})\n    {direction}(270.0)"
    
    def generate_star(self, sides: int, left: bool = True) -> str:
        if not (5 <= sides <= 9):
            raise ValueError("Stars must have between 5 and 9 points")
        angle = 360 / sides
        direction = "left" if left else "right"
        return f"for i in range({sides}):\n    forward(16)\n    {direction}({angle})"

    def generate_space(self, length: int, angle: float = 0.0, left: bool = True) -> str:
        if not (0.0 <= angle <= 360.0):
            raise ValueError("Angle must be between 0.0 and 360.0")
        if angle == 0:
            return f"penup()\nforward({length})\npendown()"
        else:
            direction = "left" if left else "right"
            return f"penup()\nforward({length})\n{direction}({angle})\npendown()"
            
#Position
    def in_a_row(self, n_times: int, sub_program: str, left: bool = True) -> str:
        #indented_program = re.sub(r"(\n)", r"\1    ", sub_program)
        direction = "left" if left else "right"
        return f"for j in range({n_times}):\n    {sub_program}\n    penup()\n    forward(2)\n    {direction}(0.0)\n\n    pendown()"

    def concentric_semicircle(self, n_times: int, semicircle: bool = True, left: bool = True) -> str:
        EPS_ANGLE = 90.0
        EPS_DIST = 2
        HALF_INF = 180
        direction = "left" if left else "right"
        semicircle_code = (
            f"for j in range({n_times}):\n    for i in range({HALF_INF}):\n        forward({EPS_DIST} * j)\n        {direction}({EPS_ANGLE})"
        )
        if semicircle:
            return semicircle_code
        else:
            return f"{semicircle_code}\n{semicircle_code}"

    def concentric_polygon(self, n_times: int, sides: int, length: int, left: bool = True) -> str:
        angle = 360 / sides
        direction = "left" if left else "right"
        return (
            f"for j in range({n_times}):\n    for i in range({sides}):\n        forward({length} * j)\n        {direction}({angle})"
        )


# Combination of shapes and positions
    def shape_sequence(self, shape1: str, shape2: str, left: bool = True) -> str:
        return f"{shape1}\n{shape2}"

    def sub_program(self, shape_sequence: str, locals: bool = True) -> str:
        return f"embed(\"\"\"{shape_sequence}\"\"\", locals())"

    # Snowflake
    def snowflake(self, sides: int, sub_program: str, left: bool = True) -> str:
        direction = "left" if left else "right"
        return f"for j in range({sides}):\n    {sub_program}\n    forward(0)\n    {direction}({360 / sides})"

In [None]:
# seemeslike there is a rule the arms of a snowflake if they contain a space and or a line then this is executed before the other basic geometric shapes such as polygons and circles.
    # probably because these start with an for-loop which would also affect lines or shapes since they are not separated by a closed parenthesis or embed 
#Are several polygons and circle if they are as arms or foolowing each other in the main program (aka shape_sequence) somehow embedded in embed() functions or how is this done

# what should I impute for the semicircle variables: EPS_ANGLE = 90.0 EPS_DIST = 2 HALF_INF = 180? (What did I impute when calculating the semantic length)

# I might need to generate a parsing function so that i can run the generated programs in turtle seeing if they execute (Remember: the python turtle library doesn't run on the server)

In [None]:
generator = generateLOGOPseudoCode()
triangle = generator.generate_polygon(3, 2)
print(triangle)
print("\n")
gon5 = generator.generate_polygon(5, 2)
print(gon5)
print("\n")
triangle4_in_row = generator.in_a_row(4, generator.sub_program(triangle))
print(triangle4_in_row)
print("\n")
concentric_semicircle = generator.concentric_semicircle(3, semicircle=True, left = False)
print(concentric_semicircle)
print("\n")
concentric_circle = generator.concentric_semicircle(3, semicircle=False, left = True)
print(concentric_circle)
print("\n")
space = generator.generate_space(2)
print(space)
print("\n")
triangle4_in_row

In [7]:
space_triangle = generator.shape_sequence(space, triangle)
gon5_space_triangle = generator.shape_sequence(gon5, space_triangle)
print(space_triangle)
print("\n")
print(gon5_space_triangle)
gon5_space_triangle

penup()
forward(2)
pendown()
for i in range(3):
    forward(2)
    left(120.0)


for i in range(5):
    forward(2)
    left(72.0)
penup()
forward(2)
pendown()
for i in range(3):
    forward(2)
    left(120.0)


'for i in range(5):\n    forward(2)\n    left(72.0)\npenup()\nforward(2)\npendown()\nfor i in range(3):\n    forward(2)\n    left(120.0)'

In [8]:
snowflake1 = generator.snowflake(4, generator.sub_program(gon5_space_triangle))
print(snowflake1)
snowflake1

for j in range(4):
    embed("""for i in range(5):
    forward(2)
    left(72.0)
penup()
forward(2)
pendown()
for i in range(3):
    forward(2)
    left(120.0)""", locals())
    forward(0)
    left(90.0)


'for j in range(4):\n    embed("""for i in range(5):\n    forward(2)\n    left(72.0)\npenup()\nforward(2)\npendown()\nfor i in range(3):\n    forward(2)\n    left(120.0)""", locals())\n    forward(0)\n    left(90.0)'

#### Random generation of LOGO Programs

Randomly generate new LOGO Programs in pseudo code. (Ist sozusagen ein Stichprobe ohne zurücklegen von der Grundgesamtheit aller Möglichen LOGO Programme gegeben der Einschränkungen meiner LOGO Syntax)

0.03490481287456702

#### Interpreter for pseudo code generating LOGO graphics

The purpose is to define the LOGO functions and an envirnoment in which they can be executed producing graphic output.
This is neccessary to validate that the programs produce valid graphics and to have an output which can be ASCII-transformed and fed into the LLM later on.



In [16]:
import matplotlib.pyplot as plt
import numpy as np

# Implementation of ReGAL LOGO primitives using Matplotlib
class ReGALLOGOPrimitives:
    '''Defining all the LOGO ReGAL primitives'''

    def __init__(self):
        self.x, self.y = 0, 0  # Current position
        self.angle = 0         # Current angle in degrees
        self.is_drawing = True
        self.path = []         # List of drawn lines
        self.pen_up_path = []  # List of pen-up moves

    def _add_to_path(self, x1, y1, x2, y2):
        if self.is_drawing:
            self.path.append(((x1, y1), (x2, y2)))
        else:
            self.pen_up_path.append(((x1, y1), (x2, y2)))

    def forward(self, distance):
        x2 = self.x + distance * np.cos(np.radians(self.angle))
        y2 = self.y + distance * np.sin(np.radians(self.angle))
        self._add_to_path(self.x, self.y, x2, y2)
        self.x, self.y = x2, y2

    def left(self, angle):
        self.angle = (self.angle + angle) % 360

    def right(self, angle):
        self.angle = (self.angle - angle) % 360

    def penup(self):
        self.is_drawing = False

    def pendown(self):
        self.is_drawing = True

    def teleport(self, x, y):
        self.penup()
        self.x, self.y = x, y
        self.pendown()

    def heading(self, angle):
        self.angle = angle % 360

    def isdown(self):
        return self.is_drawing

# Interpreter class
class PseudoProgramInterpreter:
    def __init__(self):
        self.state = ReGALLOGOPrimitives()
        self.circle_vars = {
            "EPS_ANGLE": 1.0,    # incrementing by 1° at each step   
            "EPS_DIST": 0.03490481287456702, 
            "HALF_INF": 180    # half-circle has 180°        
            }

    def execute(self, program, local_vars=None):
        """
        Execute a program sequence.
        program: str, the program text
        local_vars: dict, the local variables for `embed` functionality
        """
        local_scope = {"forward": self.state.forward,
                       "left": self.state.left,
                       "right": self.state.right,
                       "penup": self.state.penup,
                       "pendown": self.state.pendown,
                       "teleport": self.state.teleport,
                       "heading": self.state.heading,
                       "isdown": self.state.isdown,
                       "embed": self.embed}
        
        # Update the local scope with circle variables
        local_scope.update(self.circle_vars)  # Add circle variables
        if local_vars:
            local_scope.update(local_vars)
        
        #print(f"Executing program: {program}") # Debugging
        exec(program, {}, local_scope)

    def embed(self, subprogram, local_vars):
        """
        Executes an embedded subprogram with access to the given locals.
        """
        self.execute(subprogram, local_vars)

    def save_graphics(self, filename="output.png"):
        """
        Saves the generated graphics as an image file.
        """
        fig, ax = plt.subplots()
        ax.set_aspect('equal', adjustable='datalim') # this I need to adjust so that small graphics also come accross as smaller then large graphics 

        # Draw paths
        for (x1, y1), (x2, y2) in self.state.path:
            ax.plot([x1, x2], [y1, y2], 'k-')  # Pen-down lines in black

        # Draw pen-up paths for visualization (optional)
        for (x1, y1), (x2, y2) in self.state.pen_up_path:
            ax.plot([x1, x2], [y1, y2], 'r--', alpha=0.5)  # Pen-up lines in dashed red (if alpha > 0.0)

        plt.axis('off')  # Hide axes
        plt.savefig(filename, bbox_inches='tight')
        plt.close(fig)
    
    def reset_state(self):
        """
        Resets the graphics state for a new drawing.
        """
        self.state = ReGALLOGOPrimitives()

In [8]:
print(df_all['Description'][201])
print(df_all['Program'][201])
print("\n")

print(df_all['Description'][36])
print(df_all['Program'][36])
print("\n")


a 7 pointed star
for i in range(7):
    forward(16)
    left(180.0 - 51.42857142857143)


a 9 pointed star
for i in range(9):
    forward(16)
    left(180.0 - 40.0)




In [29]:
# Executing the programs generated by the pseudo code generator
import os

output_dir = "logo_graphic/synthetic"
os.makedirs(output_dir, exist_ok=True)
name = f"{triangle4_in_row}"

interpreter = PseudoProgramInterpreter()
interpreter.reset_state()

# Execute program and save the graphic
triangle4_in_row = triangle4_in_row
interpreter.execute(triangle4_in_row)
interpreter.save_graphics(os.path.join(output_dir, name + ".png"))

Maybe first come up with a systematic way to generat the data and pass it as jsonl format then try again and see if it still fails

**there is still a need to check if the graph generator works as it should**

In [9]:
import os
# Create directory for saving graphics
output_dir = "logo_graphic/train200_test"
os.makedirs(output_dir, exist_ok=True)
#df_all_test=df_all.head(10)

# Load dataset and generate graphics
interpreter = PseudoProgramInterpreter()

for index, row in df_all.iterrows():
    human_message = row['Description']
    gpt_message = row['Program']
    filename = f"{index}_{human_message.replace(' ', '_')}.png"
    filepath = os.path.join(output_dir, filename)
    
    interpreter.reset_state()

    # Execute program and save the graphic
    interpreter.execute(gpt_message)
    interpreter.save_graphics(filepath)

Executing program: for i in range(5):
    embed("""for j in range(4):
    forward(2*i)
    left(90.0)""", locals())
Executing program: for j in range(4):
    forward(2*i)
    left(90.0)
Executing program: for j in range(4):
    forward(2*i)
    left(90.0)
Executing program: for j in range(4):
    forward(2*i)
    left(90.0)
Executing program: for j in range(4):
    forward(2*i)
    left(90.0)
Executing program: for j in range(4):
    forward(2*i)
    left(90.0)
Executing program: for j in range(6):
    embed("""forward(8)
left(0.0)
for i in range(5):
    forward(4)
    left(72.0)""", locals())
    forward(0)
    left(60.0)

Executing program: forward(8)
left(0.0)
for i in range(5):
    forward(4)
    left(72.0)
Executing program: forward(8)
left(0.0)
for i in range(5):
    forward(4)
    left(72.0)
Executing program: forward(8)
left(0.0)
for i in range(5):
    forward(4)
    left(72.0)
Executing program: forward(8)
left(0.0)
for i in range(5):
    forward(4)
    left(72.0)
Executing pr

In [17]:
import os
# Create directory for saving graphics
output_dir = "logo_graphic/11testshapes"
os.makedirs(output_dir, exist_ok=True)

# create a test dataset form the all_data
test_indices = [98, 44, 100, 99, 200, 212, 214, 201, 53, 54, 282]
df_test_subset = df_all.loc[test_indices].reset_index(drop=True)
display(df_test_subset)

# Load dataset and generate graphics
interpreter = PseudoProgramInterpreter()

for index, row in df_test_subset.iterrows():
    human_message = row['Description']
    gpt_message = row['Program']
    filename = f"{index}_{human_message.replace(' ', '_')}.png"
    filepath = os.path.join(output_dir, filename)
    
    interpreter.reset_state()

    # Execute program and save the graphic
    interpreter.execute(gpt_message)
    interpreter.save_graphics(filepath)

Unnamed: 0,Description,Program
0,a medium 8 gon,for i in range(8):\n forward(4)\n left(4...
1,a small 7 gon separated by a big space from a ...,for i in range(7):\n forward(2)\n left(5...
2,8 concentric circle s,for j in range(9):\n for i in range(HALF_IN...
3,5 short line s in a row,"for j in range(5):\n embed(""""""forward(2)\nl..."
4,a greek spiral with 7 turns,for i in range(8):\n forward(1*i)\n left...
5,a 3 stepped staircase,for i in range(3):\n forward(2)\n left(9...
6,a 3 stepped zigzag,forward(0)\nleft(45.0)\n\nfor i in range(3):\n...
7,a 7 pointed star,for i in range(7):\n forward(16)\n left(...
8,6 sided snowflake with a small square as arms,"for j in range(6):\n embed(""""""for i in rang..."
9,5 sided snowflake with a medium line and a sma...,"for j in range(5):\n embed(""""""forward(8)\nl..."


In [15]:
# create a test dataset form the all_data
test_indices = [98, 44, 100, 99, 200, 212, 214, 201, 53, 54, 282]
df_test_subset = df_all.loc[test_indices].reset_index(drop=True)
display(df_test_subset)
# gon
print(df_all['Description'][98])
print(df_all['Program'][98])

# small 7 gon + big space + medium square
print(df_all['Description'][44])
print(df_all['Program'][44])

# concentric circles
print(df_all['Description'][100])
print(df_all['Program'][100])

# 5 short lines in a row
print(df_all['Description'][99])
print(df_all['Program'][99])

# greek spiral
print(df_all['Description'][200])
print(df_all['Program'][200])
# 3 stepped staircase
print(df_all['Description'][212])
print(df_all['Program'][212])
# 3 stepped zigzag
print(df_all['Description'][214])
print(df_all['Program'][214])
# 7 pointed star
print(df_all['Description'][201])
print(df_all['Program'][201])

# snowflake arms=small square
print(df_all['Description'][53])
print(df_all['Program'][53])

# snowflake arms=medium line + small circle
print(df_all['Description'][54])
print(df_all['Program'][54])

# snowflake arms=short space + medium semi-circle
print(df_all['Description'][282])
print(df_all['Program'][282])

Unnamed: 0,Description,Program
0,a medium 8 gon,for i in range(8):\n forward(4)\n left(4...
1,a small 7 gon separated by a big space from a ...,for i in range(7):\n forward(2)\n left(5...
2,8 concentric circle s,for j in range(9):\n for i in range(HALF_IN...
3,5 short line s in a row,"for j in range(5):\n embed(""""""forward(2)\nl..."
4,a greek spiral with 7 turns,for i in range(8):\n forward(1*i)\n left...
5,a 3 stepped staircase,for i in range(3):\n forward(2)\n left(9...
6,a 3 stepped zigzag,forward(0)\nleft(45.0)\n\nfor i in range(3):\n...
7,a 7 pointed star,for i in range(7):\n forward(16)\n left(...
8,6 sided snowflake with a small square as arms,"for j in range(6):\n embed(""""""for i in rang..."
9,5 sided snowflake with a medium line and a sma...,"for j in range(5):\n embed(""""""forward(8)\nl..."


a medium 8 gon
for i in range(8):
    forward(4)
    left(45.0)
a small 7 gon separated by a big space from a medium square
for i in range(7):
    forward(2)
    left(51.42857142857143)
penup()
forward(20)
left(0.0)
pendown()
for i in range(4):
    forward(4)
    left(90.0)
8 concentric circle s
for j in range(9):
    for i in range(HALF_INF):
        forward(EPS_DIST*j)
        left(EPS_ANGLE)
    for i in range(HALF_INF):
        forward(EPS_DIST*j)
        left(EPS_ANGLE)
5 short line s in a row
for j in range(5):
    embed("""forward(2)
left(0.0)
""", locals())
    penup()
    forward(2)
    left(0.0)

    pendown()
a greek spiral with 7 turns
for i in range(8):
    forward(1*i)
    left(90.0)
a 3 stepped staircase
for i in range(3):
    forward(2)
    left(90.0)

    forward(2)
    left(90.0)

    forward(0)
    left(180.0)

a 3 stepped zigzag
forward(0)
left(45.0)

for i in range(3):
    forward(2)
    left(90.0)

    forward(2)
    left(180.0 + 90.0)

a 7 pointed star
for i in r