In [1]:
import json
import pandas as pd
import ast
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')
import re

[nltk_data] Downloading package stopwords to /home/pratz/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
#LOGO
train_logo_data = "logo_data/python/train_200_dataset.jsonl"
test_logo_data = "logo_data/python/test_dataset.jsonl"

# Load train and test dataset
with open(f"external/dependencies/{train_logo_data}", 'r') as f:
    train_data = [json.loads(line) for line in f]

with open(f"external/dependencies/{test_logo_data}", 'r') as f:
    test_data = [json.loads(line) for line in f]

In [3]:
# List containing LOGO primitives from the ReGAL paper
logo_primitives = ['forward', 'left', 'right', 'penup', 'pendown', 'teleport', 'heading', 'isdown', 'embed']

In [4]:
# Extract descriptions and programs from train_data and transform into pandas DataFrame
def extract_descriptions_and_programs(data):
    extracted_data = []
    for item in data:
        description = None
        program = None
        for message in item.get('messages', []):
            if message['from'] == 'human':
                description = message['value']
            elif message['from'] == 'gpt':
                program = message['value']
        if description and program:
            extracted_data.append([description, program])
    extracted_data = pd.DataFrame(extracted_data, columns=['Description', 'Program'])
    return extracted_data

# Extract descriptions and programs from train_data
df_train = extract_descriptions_and_programs(train_data)
df_test = extract_descriptions_and_programs(test_data)

#display(df_train)
#display(df_test)

# Append the test data to the train data
df_all = pd.concat([df_train, df_test], ignore_index=True)
#display(df_all)


#####################
# Drop all duplicate rows
df_all = df_all.drop_duplicates(subset=['Description', 'Program'])
#####################
display(df_all)


Unnamed: 0,Description,Program,Description Repeats,Program Repeats,Same Index
43,5 sided snowflake with a short line and a smal...,"for j in range(5):\n embed(""""""penup()\nforw...",[43],"[43, 287]",False
287,5 sided snowflake with a short space and a sho...,"for j in range(5):\n embed(""""""penup()\nforw...",[287],"[43, 287]",False


Unnamed: 0,Description,Program,Description Repeats,Program Repeats,Same Index,n-Times Desc. Repeated,n-Times Prog. Repeated
0,4 concentric square s,"for i in range(5):\n embed(""""""for j in rang...","[0, 34, 80, 103, 180, 183]","[0, 34, 80, 103, 180, 183]",True,6,6
1,6 sided snowflake with a medium line and a med...,"for j in range(6):\n embed(""""""forward(8)\nl...",[1],[1],True,1,1
2,5 sided snowflake with a medium line and a sma...,"for j in range(5):\n embed(""""""forward(8)\nl...",[2],[2],True,1,1
3,6 short line s in a row,"for j in range(6):\n embed(""""""forward(2)\nl...","[3, 113, 118, 164]","[3, 113, 118, 164]",True,4,4
4,a small triangle connected by a big line to a ...,for i in range(3):\n forward(2)\n left(1...,[4],[4],True,1,1
...,...,...,...,...,...,...,...
306,8 sided snowflake with a medium circle and a s...,"for j in range(8):\n embed(""""""penup()\nforw...",[306],[306],True,1,1
307,5 sided snowflake with 2 small circle s as arms,"for j in range(5):\n embed(""""""penup()\nforw...",[307],[307],True,1,1
308,3 sided snowflake with a small square and a sm...,"for j in range(3):\n embed(""""""penup()\nforw...",[308],[308],True,1,1
309,5 sided snowflake with a small 5 gon and a sma...,"for j in range(5):\n embed(""""""penup()\nforw...",[309],[309],True,1,1


n-Times Desc. Repeated
1    237
2     16
3      6
4      3
6      2
Name: count, dtype: int64
n-Times Prog. Repeated
1    235
2     18
3      6
4      3
6      2
Name: count, dtype: int64


#### Synthetic creation of new Programs

In [None]:
# Top-level grammar rules for LOGO
<Program> ::= <Position> | <Shape-Sequence> | <SpecialShape> | <Snowflake> 
<Shape-Sequence> ::= <Shape> | <Shape> " & " <Shape-Sequence> | <Space> " & " <Shape-Sequence>
<Sub-Program> ::= <Connected-Sub-Program> | <Separated-Sub-Program>
<Connected-Sub-Program> ::= "embed("  <Shape-Sequence> ", locals())"
<Separated-Sub-Program> ::= "embed(" <Space> " & "  <Shape-Sequence> ", locals())"

## POSITION
<Position> ::= <InARow> | <Concentric>
<InARow> ::= "for j in range(" <N-Times> "): " <Sub-Program>
<Concentric> ::= "for j in range(" <N-Times> "): " <Shape> 
            NOTE: "forward(" <Length> | <Size>" * " <N-Times> ")" # parameter within sub-program

## SHAPE
<Shape> ::= <Line> | <Polygon> | <Semicircle> | <Circle> 
<Line> ::= "forward(" <Length> ") left(" <Angle> ")" | "forward(" <Length> ")
<Polygon> ::= "for i in range(" <Sides> "): forward(" <Length> ") left(" <Angle> = (360/ <Sides>)")" 
<Semicircle> ::= "for i in range(HALF_INF): forward(" <Size> ") left(EPS_ANGLE)"
<Circle> ::= <Semicircle> <Semicircle>

## SPECIAL SHAPE
<SpecialShape> ::= <GreekSpiral> | <Staircase> | <Zigzag> | <Star>
<GreekSpiral> ::= "for i in range(" <Sides> "): forward(1 * " <Sides> ") left(90.0)"
<Staircase> ::= "for i in range(" <N-Times> "): forward(" <Length> ") left(90.0) forward(" <Length> ") left(90.0) forward(0) left(180.0)"
<Zigzag> ::= "for i in range(" <N-Times> "): forward(" <Length> ") left(90.0) forward(" <Length> ") left(270.0)"
<Star> ::= "for i in range(" <Sides> "): forward(16) left(" <Angle> = (360/ <Sides>)")"

## SPACE
<Space> ::= "penup() " <Line> " pendown()"

## SNOWFLAKE
<Snowflake> ::= "for j in range(" <Sides> "): " <Sub-Program> 
                "forward(0) left(360 / " <Sides> ")"

## Parameters
<Sides> ::= "3" | "5" | "6" | "7" | "8"

<Length> ::= "2" | "4" | "20"          # short/small, medium, big
<Size> ::= "EPS_DIST" | "EPS_DIST * 2" # small, medium applying to (semi)circle

<Angle> ::= <Digit> "." <Digit>
<Sides> ::= "3" | "4" | "5" | "6" | "7" | "8" | "9"
<N-Times> ::= <Digit> 
<Digit> ::= "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"

In [43]:
class generateLOGOShape():
    def __init__(self):
        pass

#Basic Shapes
    def generate_line(self, length: int, angle: float = 0.0, left: bool = True) -> str:
        if not (0.0 <= angle <= 360.0):
            raise ValueError("Angle must be between 0.0 and 360.0")
        if angle == 0:
            return f"forward({length})"
        else:
            if left == True:
                return f"forward({length})\nleft({angle})"
            else:
                return f"forward({length})\nright({angle})" 
            
    def generate_polygon(self, sides: int, length: int, left: bool = True) -> str:
        angle = 360 / sides
        if left == True:
            return f"for i in range({sides}):\n    forward({length})\n    left({angle})"
        else:
            return f"for i in range({sides}):\n    forward({length})\n    right({angle})"

    def generate_semicircle(self, size: int, semicircle: bool = True, left: bool = True) -> str:
        EPS_ANGLE = 90.0
        EPS_DIST = 2  # Using the defined EPS_DIST value
        HALF_INF = 180

        size = EPS_DIST*size
        if semicircle == True:
            if left == True:
                return f"for i in range({HALF_INF}):\n    forward({size})\n    left({EPS_ANGLE})"
            else:
                return f"for i in range({HALF_INF}):\n    forward({size})\n    right({EPS_ANGLE})"
        else:
            if left == True:
                return f"for i in range({HALF_INF}):\n    forward({size})\n    left({EPS_ANGLE}\nfor i in range({HALF_INF}):\n    forward({size})\n    left({EPS_ANGLE})"
            else:
                return f"for i in range({HALF_INF}):\n    forward({size})\n    right({EPS_ANGLE})\nfor i in range({HALF_INF}):\n    forward({size})\n    right({EPS_ANGLE})"

#Special Shapes            
    def generate_greek_spiral(self, size: int, left: bool = True) -> str:
        if not (5 <= size <= 9):
            raise ValueError("A greek-spiral must have at least 5 turns")
        if left == True:
            return f"for i in range({size}):\n    forward(1 * {size})\n    left(90.0)"
        else:
            return f"for i in range({size}):\n    forward(1 * {size})\n    right(90.0)"

    def generate_staircase(self, n_times: int, length: int, left: bool = True) -> str:
        if left == True:
            return f"for i in range({n_times}):\n    forward({length})\n    left(90.0)\n    forward({length})\n    left(90.0)\n    forward(0)\n    left(180.0)"
        else:
            return f"for i in range({n_times}):\n    forward({length})\n    right(90.0)\n    forward({length})\n    right(90.0)\n    forward(0)\n    right(180.0)"
        
    def generate_zigzag(self, n_times: int, length: int, left: bool = True) -> str:
        if left == True:
            return f"for i in range({n_times}):\n    forward({length})\n    left(90.0)\n    forward({length})\n    left(270.0)"
        else:
            return f"for i in range({n_times}):\n    forward({length})\n    right(90.0)\n    forward({length})\n    right(270.0)"
    
    def generate_star(self, sides: int, left: bool = True) -> str:
        if not (5 <= sides <= 9):
            raise ValueError("Stars must have between 5 and 9 points")
        angle = 360 / sides
        if left == True:
            return f"for i in range({sides}):\n    forward(16)\n    left({angle})"
        else:
            return f"for i in range({sides}):\n    forward(16)\n    right({angle})"

    def generate_space(self, length: int, angle: float = 0.0, left: bool = True) -> str:
        if not (0.0 <= angle <= 360.0):
            raise ValueError("Angle must be between 0.0 and 360.0")
        if angle == 0:
            return f"penup()\nforward({length})\npendown()"
        else:
            if left == True:
                return f"penup()\nforward({length})\nleft({angle})\npendown()"
            else:
                return f"penup()\nforward({length})\nright({angle})\npendown()"

#Position
import re
def in_a_row(n_times: int, sub_program: str) -> str:
    # adding dynamically white spaces between the commands in the sub_program
    new_line_space_current_match = re.search(r"\n\s*", sub_program)
    new_line_space_current = new_line_space_current_match.group(0)
    new_line_space_future = new_line_space_current + "    " # extend by 4
    sub_program = sub_program.replace(new_line_space_current, new_line_space_future)
    return f"for j in range({n_times}):\n    {sub_program}"

def concentric_semicircle(n_times: int, semicircle: bool = True, left: bool = True) -> str:
    EPS_ANGLE = 90.0
    EPS_DIST = 2  # Using the defined EPS_DIST value
    HALF_INF = 180

    if semicircle == True:
        if left == True:
            return f"for j in range({n_times}):\n    for i in range({HALF_INF}):\n        forward({EPS_DIST}*j)\n        left({EPS_ANGLE})"      
        else:
            return f"for j in range({n_times}):\n    for i in range({HALF_INF}):\n        forward({EPS_DIST}*j)\n        right({EPS_ANGLE})"
    else:
        if left == True:
            return f"for j in range({n_times}):\n    for i in range({HALF_INF}):\n        forward({EPS_DIST}*j)\n        left({EPS_ANGLE})\n    for i in range({HALF_INF}):\n        forward({EPS_DIST}*j)\n        left({EPS_ANGLE})"      
        else:
            return f"for j in range({n_times}):\n    for i in range({HALF_INF}):\n        forward({EPS_DIST}*j)\n        right({EPS_ANGLE})\n    for i in range({HALF_INF}):\n        forward({EPS_DIST}*j)\n        right({EPS_ANGLE})"

def concentric_polygon(n_times: int, sides: int, length: int, left: bool = True) -> str:
    angle = 360 / sides
    length = length * n_times
    if left == True:
        return f"for j in range({n_times}):\n    for i in range({sides}):\n        forward({length})\n        left({angle})"
    else:
        return f"for j in range({n_times}):\n    for i in range({sides}):\n        forward({length})\n        right({angle})"


# Combination of shapes and positions
def shape_sequence(shape1: str, shape2: str, left: bool = True) -> str:
    return f"{shape1}\n{shape2}"

def sub_program(shape_sequence: str, locals: bool = True) -> str:
    return f"embed(\"\"\"{shape_sequence}\"\"\", locals())"

# Snowflake
def snowflake(sides: int, sub_program: str) -> str:
    return f"for j in range({sides}):\n    {sub_program}\n    forward(0)\n    left({360 / sides})"
    

In [None]:
# seemeslike there is a rule the arms of a snowflake if they contain a space and or a line then this is executed before the other basic geometric shapes such as polygons and circles.
    # probably because these start with an for-loop which would also affect lines or shapes since they are not separated by a closed circle 

inf


In [40]:
shape1= generateLOGOShape().generate_line(2, 90)
shape2= generateLOGOShape().generate_polygon(3, 2)
space = generateLOGOShape().generate_space(2, 90)

#print(shape_sequence(shape1, shape2))

shapes3 = shape_sequence(space, shape_sequence(shape2, shape1))
print(shapes3)
shapes3

penup()
forward(2)
left(90)
pendown()
for i in range(3):
    forward(2)
    left(120.0)
forward(2)
left(90)


'penup()\nforward(2)\nleft(90)\npendown()\nfor i in range(3):\n    forward(2)\n    left(120.0)\nforward(2)\nleft(90)'

In [44]:
snowflake1 = snowflake(4, sub_program(shapes3))
print(snowflake1)
snowflake1

for j in range(4):
    embed("""penup()
forward(2)
left(90)
pendown()
for i in range(3):
    forward(2)
    left(120.0)
forward(2)
left(90)""", locals())
    forward(0)
    left(90.0)


'for j in range(4):\n    embed("""penup()\nforward(2)\nleft(90)\npendown()\nfor i in range(3):\n    forward(2)\n    left(120.0)\nforward(2)\nleft(90)""", locals())\n    forward(0)\n    left(90.0)'