In [1]:
import jax.numpy as jnp
from sklearn.model_selection import train_test_split
import os
import random

In [5]:
folder_path = "D:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado/Curriculum_Learning/Easy_Multidigit_Addition_Decimal/"

In [2]:
# Función para generar los datos y guardarlos directamente en test_dataset.txt
def generate_final_data_and_save():
    data = []  
    for a in range(100):  # Números de 0 a 99 (dos dígitos)
        for b in range(100):  # Números de 0 a 99 (dos dígitos)
            if a + b < 100:  # Verificar si la suma es válida
                data.append((a, b))  # Agregar la pareja como una tupla

    with open(f'{folder_path}test_dataset.txt', 'w') as f_test:
        f_test.write(str(data)) 
        
    print(f'Tamaño de test_dataset: {len(data)}')
    print("Conjunto de datos guardado como 'test_dataset.txt'")

In [20]:
# Llamada a la función
generate_final_data_and_save()

Tamaño de test_dataset: 5050
Conjunto de datos guardado como 'test_dataset.txt'


In [18]:
folder_path = ""

test_pairs_stimuli = [
    (4, 3), (3, 4), (25, 62), (62, 25), (6, 13), (13, 6), (37, 41), (41, 37),
    (7, 2), (2, 7), (24, 62), (62, 24), (12, 7), (7, 12), (42, 34), (34, 42),
    (5, 14), (14, 5), (21, 74), (74, 21), (12, 13), (13, 12), (24, 45), (45, 24),
    (16, 12), (12, 16), (24, 71), (71, 24), (4, 13), (13, 4), (41, 35), (35, 41),
    (14, 15), (15, 14), (65, 32), (32, 65), (13, 16), (16, 13), (43, 25), (25, 43),
    (5, 3), (3, 5), (65, 21), (21, 65), (12, 15), (15, 12), (42, 32), (32, 42),
    (14, 12), (12, 14), (41, 38), (38, 41), (15, 13), (13, 15), (74, 23), (23, 74),
    (13, 14), (14, 13), (31, 45), (45, 31), (4, 15), (15, 4), (65, 31), (31, 65),
    (2, 13), (13, 2), (24, 43), (43, 24), (2, 17), (17, 2), (61, 32), (32, 61),
    (14, 4), (4, 14), (32, 47), (47, 32), (12, 5), (5, 12), (53, 36), (36, 53),
    (16, 3), (3, 16), (28, 51), (51, 28), (5, 13), (13, 5), (36, 43), (43, 36),
    (14, 3), (3, 14), (67, 32), (32, 67), (17, 12), (12, 17), (26, 43), (43, 26),
    (5, 7), (7, 5), (47, 38), (38, 47), (13, 8), (8, 13), (26, 65), (65, 26),
    (8, 6), (6, 8), (29, 48), (48, 29), (9, 4), (4, 9), (39, 26), (26, 39),
    (8, 7), (7, 8), (37, 46), (46, 37), (17, 6), (6, 17), (46, 35), (35, 46),
    (7, 17), (17, 7), (34, 57), (57, 34), (19, 5), (5, 19), (36, 47), (47, 36),
    (6, 15), (15, 6), (52, 29), (29, 52), (4, 17), (17, 4), (34, 49), (49, 34),
    (7, 6), (6, 7), (27, 45), (45, 27), (13, 18), (18, 13), (38, 25), (25, 38),
    (9, 5), (5, 9), (25, 67), (67, 25), (5, 16), (16, 5), (65, 28), (28, 65),
    (5, 8), (8, 5), (35, 27), (27, 35), (7, 9), (9, 7), (45, 39), (39, 45),
    (18, 7), (7, 18), (46, 36), (36, 46), (17, 19), (19, 17), (29, 67), (67, 29),
    (14, 17), (17, 14), (39, 28), (28, 39), (19, 15), (15, 19), (49, 32), (32, 49),
    (15, 8), (8, 15), (64, 28), (28, 64), (8, 9), (9, 8), (24, 68), (68, 24),
    (9, 14), (14, 9), (29, 38), (38, 29), (7, 4), (4, 7), (28, 69), (69, 28)
]

# Escribir las parejas en el archivo stimuli.txt
file_path = os.path.join(folder_path, "stimuli.txt")
with open(file_path, "w") as file:
    file.write(str(test_pairs_stimuli))

print(f"Archivo guardado en: {file_path}")

# Filtrar combinaciones que no están en test_pairs y cuya suma sea < 100
all_combinations = [(a, b) for a in range(100) for b in range(100)]
train_couples_stimuli = [
    pair for pair in all_combinations 
    if pair not in test_pairs_stimuli and sum(pair) < 100
]

# Ordenar las parejas por la suma de sus elementos
train_couples_stimuli = sorted(train_couples_stimuli, key=lambda pair: sum(pair))

file_path_train = os.path.join(folder_path, "train_couples_stimuli.txt")
with open(file_path_train, "w") as file:
    file.write(str(train_couples_stimuli))

print(f"Archivo guardado en: {file_path_train}")

Archivo guardado en: stimuli.txt
Archivo guardado en: train_couples_stimuli.txt


In [8]:
def generate_carry_over_data_and_save():
    data = []  
    for a in range(100):  # Números de 0 a 99 (dos dígitos)
        for b in range(100):  # Números de 0 a 99 (dos dígitos)
            # Verificar si hay llevadas en unidades o decenas
            unidades_a, decenas_a = divmod(a, 10)
            unidades_b, decenas_b = divmod(b, 10)
            if ((unidades_a + unidades_b >= 10) or (decenas_a + decenas_b >= 10)) and a + b < 100:
                data.append((a, b))  # Agregar la pareja como una tupla

    with open(f'{folder_path}combinations_with_carry_over.txt', 'w') as f_test:
        f_test.write(str(data)) 
        
    print(f'Tamaño de combinations_with_carry_over: {len(data)}')
    print("Conjunto de datos guardado como 'combinations_with_carry_over.txt'")

In [9]:
generate_carry_over_data_and_save()

Tamaño de combinations_with_carry_over: 2025
Conjunto de datos guardado como 'combinations_with_carry_over.txt'


In [10]:
def generate_small_problem_size_data_and_save():
    data = []  
    for a in range(100):  # Números de 0 a 99 (dos dígitos)
        for b in range(100):  # Números de 0 a 99 (dos dígitos)
            if a + b < 40:  # Verificar si la suma es menor que 40
                data.append((a, b))  # Agregar la pareja como una tupla

    with open(f'{folder_path}combinations_small_problem_size.txt', 'w') as f_test:
        f_test.write(str(data)) 
        
    print(f'Tamaño de combinations_small_problem_size: {len(data)}')
    print("Conjunto de datos guardado como 'combinations_small_problem_size.txt'")

def generate_large_problem_size_data_and_save():
    data = []  
    for a in range(100):  # Números de 0 a 99 (dos dígitos)
        for b in range(100):  # Números de 0 a 99 (dos dígitos)
            if 100 > a + b > 60:  # Verificar si la suma es menor que 40
                data.append((a, b))  # Agregar la pareja como una tupla

    with open(f'{folder_path}combinations_large_problem_size.txt', 'w') as f_test:
        f_test.write(str(data)) 
        
    print(f'Tamaño de combinations_large_problem_size: {len(data)}')
    print("Conjunto de datos guardado como 'combinations_large_problem_size.txt'")

In [11]:
generate_small_problem_size_data_and_save()
generate_large_problem_size_data_and_save()

Tamaño de combinations_small_problem_size: 820
Conjunto de datos guardado como 'combinations_small_problem_size.txt'
Tamaño de combinations_large_problem_size: 3159
Conjunto de datos guardado como 'combinations_large_problem_size.txt'


In [7]:
# Cargar los datos desde los archivos .txt
def load_pairs(filename, path):
    file_path = os.path.join(path, filename)
    with open(file_path, "r") as file:
        content = file.read().strip()
        return eval(content)  # Convierte el contenido del archivo en una lista de tuplas

# Función para repetir elementos hasta completar un tamaño dado
def repeat_until_size(source_list, target_size):
    repeated_list = []
    while len(repeated_list) < target_size:
        repeated_list.extend(source_list)
    return repeated_list[:target_size]

def sum_of_pair(pair):
    a, b = pair 
    return a + b

In [8]:
folder_path = "D:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado/Curriculum_Learning/Multidigit_Addition_Decimal"

# Archivos de entrada
train_file = "train_couples_stimuli.txt"
carry_over_file = "combinations_with_carry_over.txt"
small_file = "combinations_small_problem_size.txt"
large_file = "combinations_large_problem_size.txt"

# Cargar datos
train_couples = load_pairs(train_file, folder_path)
carry_over = set(load_pairs(carry_over_file, folder_path))
small_problems = set(load_pairs(small_file, folder_path))
large_problems = set(load_pairs(large_file, folder_path))

In [23]:
# Filtrar y ordenar sin incluir carry_over
filtered_1 = sorted(
    [pair for pair in train_couples if pair in small_problems and pair not in carry_over],
    key=sum_of_pair
)

filtered_2 = sorted(
    [pair for pair in train_couples if pair not in small_problems and pair not in large_problems and pair not in carry_over],
    key=sum_of_pair
)

# Combinar los subgrupos
sorted_couples_without_carry = filtered_1 + filtered_2

# Filtrar y ordenar incluyendo carry_over
filtered_4 = sorted(
    [pair for pair in train_couples if pair in small_problems and pair in carry_over],
    key=sum_of_pair
)

filtered_5 = sorted(
    [pair for pair in train_couples if pair not in small_problems and pair not in large_problems and pair in carry_over],
    key=sum_of_pair
)

# Combinar los subgrupos incluyendo carry_over
sorted_couples_with_carry = filtered_4 + filtered_5

filtered_3 = sorted(
    [pair for pair in train_couples if pair in large_problems and pair not in carry_over],
    key=sum_of_pair
)

filtered_6 = sorted(
    [pair for pair in train_couples if pair in large_problems and pair in carry_over],
    key=sum_of_pair
)

# Combinar los subgrupos incluyendo carry_over
sorted_couples_large = filtered_3 + filtered_6

# Combinar ambas listas
final_sorted_couples = sorted_couples_without_carry + sorted_couples_with_carry + sorted_couples_large

# Guardar en el archivo
file_path = os.path.join(folder_path, "sorted_train_couples_stimuli.txt")
with open(file_path, "w") as file:
    file.write(str(final_sorted_couples))

print(f"Longitud del dataset: {len(final_sorted_couples)}")
print("Archivo guardado en:", file_path)

Longitud del dataset: 9808
Archivo guardado en: D:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado/Curriculum_Learning/Multidigit_Addition_Decimal\sorted_train_couples_stimuli.txt


In [4]:
# Crear subconjuntos de datos según las condiciones
subset_small_not_in_carry = [pair for pair in train_couples if pair not in carry_over and pair in small_problems][:6198]
subset_small_not_in_carry = repeat_until_size(subset_small_not_in_carry, 6198)

# Subset_2 con proporciones exactas y repetición
subset_small_in_carry = [pair for pair in train_couples if pair in small_problems and pair in carry_over][:6198]
subset_small_in_carry = repeat_until_size(subset_small_in_carry, 6198)

subset_large_not_in_carry = [pair for pair in train_couples if pair not in carry_over and pair in large_problems][:6198]
subset_large_not_in_carry = repeat_until_size(subset_large_not_in_carry, 6198)

# Subset_4 con proporciones exactas y repetición
subset_large_in_carry = [pair for pair in train_couples if pair in large_problems and pair in carry_over][:6198]
subset_large_in_carry = repeat_until_size(subset_large_in_carry, 6198)

sorted_couples = subset_small_not_in_carry + subset_small_in_carry + subset_large_not_in_carry + subset_large_in_carry
random.shuffle(sorted_couples)

# Guardar en el archivo
file_path = os.path.join(folder_path, "not_sorted_train_couples_stimuli.txt")
with open(file_path, "w") as file:
    file.write(str(sorted_couples))
    
print("Archivo guardado en:", file_path)

Archivo guardado en: D:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado/Curriculum_Learning/Multidigit_Addition_Decimal\not_sorted_train_couples_stimuli.txt


In [5]:
# Crear subconjuntos de datos según las condiciones
subset_small_not_in_carry = [pair for pair in train_couples if pair not in carry_over and pair in small_problems][:6198]
subset_small_not_in_carry = repeat_until_size(subset_small_not_in_carry, 6198)

# Subset_2 con proporciones exactas y repetición
subset_small_in_carry = [pair for pair in train_couples if pair in small_problems and pair in carry_over][:6198]
subset_small_in_carry = repeat_until_size(subset_small_in_carry, 6198)

subset_large_not_in_carry = [pair for pair in train_couples if pair not in carry_over and pair in large_problems][:6198]
subset_large_not_in_carry = repeat_until_size(subset_large_not_in_carry, 6198)

# Subset_4 con proporciones exactas y repetición
subset_large_in_carry = [pair for pair in train_couples if pair in large_problems and pair in carry_over][:6198]
subset_large_in_carry = repeat_until_size(subset_large_in_carry, 6198)

sorted_couples = subset_small_not_in_carry + subset_small_in_carry + subset_large_not_in_carry + subset_large_in_carry
sorted_couples = sorted(sorted_couples, key=lambda pair: sum(pair))

# Guardar en el archivo
file_path = os.path.join(folder_path, "sorted_train_couples_stimuli.txt")
with open(file_path, "w") as file:
    file.write(str(sorted_couples))
    
print("Archivo guardado en:", file_path)

Archivo guardado en: D:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado/Curriculum_Learning/Multidigit_Addition_Decimal\sorted_train_couples_stimuli.txt


In [10]:
# Crear subconjuntos de datos según las condiciones
subset_small_not_in_carry = [pair for pair in train_couples if pair not in carry_over and pair in small_problems][:6198]
subset_small_not_in_carry = repeat_until_size(subset_small_not_in_carry, 6198)

# Guardar en el archivo
file_path = os.path.join(folder_path, "small_no_carry_train_couples_stimuli.txt")
with open(file_path, "w") as file:
    file.write(str(subset_small_not_in_carry))
    
print("Archivo guardado en:", file_path)

Archivo guardado en: D:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado/Curriculum_Learning/Multidigit_Addition_Decimal\small_no_carry_train_couples_stimuli.txt


In [6]:
folder_path = "D:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado/Curriculum_Learning/Multidigit_Addition_Decimal"

# Archivos de entrada
train_file = "train_couples_stimuli.txt"
carry_over_file = "combinations_with_carry_over.txt"
small_file = "combinations_small_problem_size.txt"
large_file = "combinations_large_problem_size.txt"

# Cargar datos
train_couples = load_pairs(train_file, folder_path)
carry_over = set(load_pairs(carry_over_file, folder_path))
small_problems = set(load_pairs(small_file, folder_path))
large_problems = set(load_pairs(large_file, folder_path))

# Crear subconjuntos de datos según las condiciones
subset_1 = [pair for pair in train_couples if pair not in carry_over and pair in small_problems][:300]
subset_1 = repeat_until_size(subset_1, 300)

# Subset_2 con proporciones exactas y repetición
subset_2_in_carry = [pair for pair in train_couples if pair in small_problems and pair in carry_over][:150]
subset_2_not_in_carry = [pair for pair in train_couples if pair in small_problems and pair not in carry_over][:550]
subset_2_in_carry = repeat_until_size(subset_2_in_carry, 200)
subset_2_not_in_carry = repeat_until_size(subset_2_not_in_carry, 500)
subset_2 = random.sample(subset_2_in_carry + subset_2_not_in_carry, 700)

subset_3 = [pair for pair in train_couples if pair not in carry_over and pair in large_problems][:200]
subset_3 = repeat_until_size(subset_3, 200)

# Subset_4 con proporciones exactas y repetición
subset_4_in_carry = [pair for pair in train_couples if pair in large_problems and pair in carry_over][:50]
subset_4_not_in_carry = [pair for pair in train_couples if pair in large_problems and pair not in carry_over][:150]
subset_4_in_carry = repeat_until_size(subset_4_in_carry, 50)
subset_4_not_in_carry = repeat_until_size(subset_4_not_in_carry, 150)
subset_4 = random.sample(subset_4_in_carry + subset_4_not_in_carry, 200)

subset_5 = (
    repeat_until_size([pair for pair in train_couples if pair in large_problems and pair in carry_over], 50)
    + repeat_until_size([pair for pair in train_couples if pair in large_problems and pair not in carry_over], 50)
    + repeat_until_size([pair for pair in train_couples if pair in small_problems and pair in carry_over], 50)
    + repeat_until_size([pair for pair in train_couples if pair in small_problems and pair not in carry_over], 50)
)

# Combinar las primeras 1600 parejas
sorted_couples = subset_1 + subset_2 + subset_3 + subset_4 + subset_5

# Añadir las parejas restantes hasta completar 9808
remaining_couples = []
while len(sorted_couples) + len(remaining_couples) < 9808:
    remaining_couples.append(random.choice(train_couples))

# Cortar la lista final para que tenga exactamente 9808 parejas
sorted_couples = (sorted_couples + remaining_couples)[:9808]

# Guardar en el archivo
file_path = os.path.join(folder_path, "sorted_train_couples_stimuli.txt")
with open(file_path, "w") as file:
    file.write(str(sorted_couples))
    
print("Archivo guardado en:", file_path)

NameError: name 'random' is not defined