In [1]:
from transformers import AutoTokenizer

# Load a tokenizer (example: GPT-2 tokenizer)
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-1.4b-deduped")

def count_hf_tokens(text):
    tokens = tokenizer.tokenize(text)
    print(tokens)
    return len(tokens)

example = "Tails"
print("Number of tokens:", count_hf_tokens(example))

['T', 'ails']
Number of tokens: 2


#### Original Logic 

In [None]:
import random
import csv

NAMES = ["Alice", "Bob", "Charlie", "Dana", "Eve", "Frank", "Grace", "Hank"]
DIRECTIONS = ["up", "down", "left", "right"]

def apply_new_move(x, y, direction, steps):
    if direction == "up":
        return x, y + steps
    elif direction == "down":
        return x, y - steps
    elif direction == "left":
        return x - steps, y
    elif direction == "right":
        return x + steps, y
    return x, y

def generate_clean_and_corrupted_no_neg_x(participants=4):
    while True:
        names = random.sample(NAMES, participants)
        moves = []
        instructions = []

        x, y = 10, 10  # Start position changed to (10, 10)

        for name in names:
            do_move = random.choice([True, False])
            if do_move:
                direction = random.choice(DIRECTIONS)
                steps = random.randint(1, 5)
                x, y = apply_new_move(x, y, direction, steps)
                moves.append((name, direction, steps))
                instructions.append(f"{name} moves it {direction} {steps} steps.")
            else:
                moves.append((name, None, None))
                instructions.append(f"{name} does nothing.")

        if x < 0:
            continue  # Skip if x is negative
        
        coord = random.choice(['x', 'y'])

        if coord == 'x':
            clean_prompt = f"A robot starts at (10, 10). {' '.join(instructions)} Final x-coordinate ="
            corrupted_prompt = f"A robot starts at (30, 30). {' '.join(instructions)} Final x-coordinate ="
            answer = str(x)
        else:
            clean_prompt = f"A robot starts at (10, 10). {' '.join(instructions)} Final y-coordinate ="
            corrupted_prompt = f"A robot starts at (30, 30). {' '.join(instructions)} Final y-coordinate ="
            answer = str(y)

        return clean_prompt, corrupted_prompt, answer



def generate_csv_train_val(train_file, val_file, num_examples=6000, val_ratio=0.1):
    generated_prompts = set()
    val_count = int(num_examples * val_ratio)
    train_count = num_examples - val_count
    
    with open(train_file, "w", newline='') as train_f, open(val_file, "w", newline='') as val_f:
        train_writer = csv.writer(train_f)
        val_writer = csv.writer(val_f)
        train_writer.writerow(["clean", "corrupted", "answer"])
        val_writer.writerow(["clean", "corrupted", "answer"])

        train_written = 0
        val_written = 0

        while train_written < train_count or val_written < val_count:
            clean, corrupted, answer = generate_clean_and_corrupted_no_neg_x()
            if clean in generated_prompts:
                continue
            generated_prompts.add(clean)

            if train_written < train_count:
                train_writer.writerow([clean, corrupted, answer])
                train_written += 1
            else:
                val_writer.writerow([clean, corrupted, answer])
                val_written += 1

# Run
generate_csv_train_val("robot_original/datasets_csv/train.csv", "robot_original/datasets_csv/validation.csv", num_examples=6000, val_ratio=0.1)
print("✅ Train and validation CSV files created!")

✅ Train and validation CSV files created!


#### Counterlogic

In [12]:
import random
import csv

NAMES = ["Alice", "Bob", "Charlie", "Dana", "Eve", "Frank", "Grace", "Hank"]
DIRECTIONS = ["up", "down", "left", "right"]

def apply_counter_move(x, y, direction, steps):
    if direction == "up":
        return x, y - steps
    elif direction == "down":
        return x, y + steps
    elif direction == "left":
        return x + steps, y
    elif direction == "right":
        return x - steps, y
    return x, y

def generate_clean_and_corrupted_no_neg_x(participants=4):
    while True:
        names = random.sample(NAMES, participants)
        moves = []
        instructions = []

        x, y = 10, 10  # Start position changed to (10, 10)

        for name in names:
            do_move = random.choice([True, False])
            if do_move:
                direction = random.choice(DIRECTIONS)
                steps = random.randint(1, 5)
                x, y = apply_counter_move(x, y, direction, steps)
                moves.append((name, direction, steps))
                instructions.append(f"{name} moves it {direction} {steps} steps.")
            else:
                moves.append((name, None, None))
                instructions.append(f"{name} does nothing.")

        if x < 0:
            continue  # Skip if x is negative

        clean_prompt = f"A robot starts at (10, 10). {' '.join(instructions)} Final x-coordinate ="
        corrupted_prompt = f"A robot starts at (30, 30). {' '.join(instructions)} Final x-coordinate ="
        answer = str(x)

        return clean_prompt, corrupted_prompt, answer


def generate_csv_train_val(train_file, val_file, num_examples=6000, val_ratio=0.1):
    generated_prompts = set()
    val_count = int(num_examples * val_ratio)
    train_count = num_examples - val_count
    
    with open(train_file, "w", newline='') as train_f, open(val_file, "w", newline='') as val_f:
        train_writer = csv.writer(train_f)
        val_writer = csv.writer(val_f)
        train_writer.writerow(["clean", "corrupted", "answer"])
        val_writer.writerow(["clean", "corrupted", "answer"])

        train_written = 0
        val_written = 0

        while train_written < train_count or val_written < val_count:
            clean, corrupted, answer = generate_clean_and_corrupted_no_neg_x()
            if clean in generated_prompts:
                continue
            generated_prompts.add(clean)

            if train_written < train_count:
                train_writer.writerow([clean, corrupted, answer])
                train_written += 1
            else:
                val_writer.writerow([clean, corrupted, answer])
                val_written += 1




def generate_csv_train_val(train_file, val_file, num_examples=6000, val_ratio=0.1):
    generated_prompts = set()
    val_count = int(num_examples * val_ratio)
    train_count = num_examples - val_count
    
    with open(train_file, "w", newline='') as train_f, open(val_file, "w", newline='') as val_f:
        train_writer = csv.writer(train_f)
        val_writer = csv.writer(val_f)
        train_writer.writerow(["clean", "corrupted", "answer"])
        val_writer.writerow(["clean", "corrupted", "answer"])

        train_written = 0
        val_written = 0

        while train_written < train_count or val_written < val_count:
            clean, corrupted, answer = generate_clean_and_corrupted_no_neg_x()
            if clean in generated_prompts:
                continue
            generated_prompts.add(clean)

            if train_written < train_count:
                train_writer.writerow([clean, corrupted, answer])
                train_written += 1
            else:
                val_writer.writerow([clean, corrupted, answer])
                val_written += 1

# Run
generate_csv_train_val("robot_counter/datasets_csv/train.csv", "robot_counter/datasets_csv/validation.csv", num_examples=6000, val_ratio=0.1)
print("✅ Train and validation CSV files created!")

✅ Train and validation CSV files created!


##### New Game

directions = ["]

In [None]:
import random
import csv

NAMES = ["Alice", "Bob", "Charlie", "Dana", "Eve", "Frank", "Grace", "Hank"]
DIRECTIONS = ["&", "%", "$", "§"]

def apply_counter_move(x, y, direction, steps):
    if direction == "&":  # swap x and y, then add steps to both
        return y + steps, x + steps
    elif direction == "%":  # multiply x by steps, y stays the same
        return x * steps, y
    elif direction == "$":  # multiply y by steps, x stays the same
        return x, y * steps
    elif direction == "§":  # no change but add steps to x and y
        return x + steps, y + steps
    return x, y

def generate_clean_and_corrupted_no_neg_x(participants=4):
    while True:
        names = random.sample(NAMES, participants)
        moves = []
        instructions = []

        x, y = 10, 10  # Start position changed to (10, 10)

        for name in names:
            do_move = random.choice([True, False])
            if do_move:
                direction = random.choice(DIRECTIONS)
                steps = random.randint(1, 5)
                x, y = apply_counter_move(x, y, direction, steps)
                moves.append((name, direction, steps))
                instructions.append(f"{name} moves it {direction} {steps} steps.")
            else:
                moves.append((name, None, None))
                instructions.append(f"{name} does nothing.")

        if x < 0:
            continue  # Skip if x is negative

        clean_prompt = f"A robot starts at (10, 10). {' '.join(instructions)} Final x-coordinate ="
        corrupted_prompt = f"A robot starts at (30, 30). {' '.join(instructions)} Final x-coordinate ="
        answer = str(x)

        return clean_prompt, corrupted_prompt, answer


def generate_csv_train_val(train_file, val_file, num_examples=6000, val_ratio=0.1):
    generated_prompts = set()
    val_count = int(num_examples * val_ratio)
    train_count = num_examples - val_count

    with open(train_file, "w", newline='') as train_f, open(val_file, "w", newline='') as val_f:
        train_writer = csv.writer(train_f)
        val_writer = csv.writer(val_f)
        train_writer.writerow(["clean", "corrupted", "answer"])
        val_writer.writerow(["clean", "corrupted", "answer"])

        train_written = 0
        val_written = 0

        while train_written < train_count or val_written < val_count:
            clean, corrupted, answer = generate_clean_and_corrupted_no_neg_x()
            if clean in generated_prompts:
                continue
            generated_prompts.add(clean)

            if train_written < train_count:
                train_writer.writerow([clean, corrupted, answer])
                train_written += 1
            else:
                val_writer.writerow([clean, corrupted, answer])
                val_written += 1

# Run
generate_csv_train_val("robot_new/datasets_csv/train.csv", "robot_new/datasets_csv/validation.csv", num_examples=6000, val_ratio=0.1)
print("✅ Train and validation CSV files created!")


✅ Train and validation CSV files created!
