In [1]:
import pandas as pd
import numpy as np
import math

import jax
import jax.numpy as jnp
from jax import random, grad
from jax.nn import relu, sigmoid
from functools import partial

import matplotlib.pyplot as plt

import re
import time
import pytz
import os
import random
import sys
import pickle
from datetime import datetime

In [2]:
def initialize_random_weights(mean, std, shape = ()):
    return np.random.normal(loc=mean, scale=std, size=shape)

# We use a sinusoidal function to approximate odd numbers by their immediately preceding even number and preserve differentiability
def lower_even(x):
    return x - 0.5 * (1 - jnp.cos(jnp.pi * x))

# We use a sinusoidal function to approximate 0 for evens and 1 for odds while preserving differentiability
def differentiable_even_or_odd(x):
    return ((2 * x ** 3) / 3) - 3 * x ** 2 + ((10 * x) / 3)

folder = 'D:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado/Curriculum_Learning/Multidigit_Addition'

# Cargar las parejas desde el archivo
with open(f"{folder}/train_couples.txt", "r") as file:
    train_couples = eval(file.read())

with open(f"{folder}/combinations_with_carry_over.txt", "r") as file:
    combinations_with_carry_over = eval(file.read())  # Leer y convertir el contenido en una lista de tuplas

with open(f"{folder}/real_test_dataset.txt", 'r') as file:
    real_test_dataset = eval(file.read())  # Convertir el contenido del archivo a una lista de tuplas

with open(f"{folder}/real_test_dataset_with_carry_over.txt", 'r') as file:
    real_test_carry_over_dataset = eval(file.read())
    
# Separar parejas con y sin ceros
train_without_zeros = [pair for pair in train_couples if 0 not in pair]
train_with_carry_over = [pair for pair in train_couples if pair in combinations_with_carry_over]

# Function to generate dataset with multiplication
def generate_dataset_with_zeros(size):
    # Seleccionar aleatoriamente parejas con ceros
    selected_pairs = random.choices(train_couples, k=size)
    
    # Separar las columnas de las parejas seleccionadas
    column_1 = [pair[0] for pair in selected_pairs]
    column_2 = [pair[1] for pair in selected_pairs]

    # Crear el DataFrame
    dataset = pd.DataFrame({
        'Column_1': column_1,
        'Column_2': column_2
    })

    # Crear la tercera columna sumando las dos primeras
    dataset['Column_3'] = dataset['Column_1'] + dataset['Column_2']
    return dataset

def generate_dataset_without_zeros(size):
    # Seleccionar aleatoriamente parejas sin ceros
    selected_pairs = random.choices(train_without_zeros, k=size)
    
    # Separar las columnas de las parejas seleccionadas
    column_1 = [pair[0] for pair in selected_pairs]
    column_2 = [pair[1] for pair in selected_pairs]

    # Crear el DataFrame
    dataset = pd.DataFrame({
        'Column_1': column_1,
        'Column_2': column_2
    })

    # Crear la tercera columna sumando las dos primeras
    dataset['Column_3'] = dataset['Column_1'] + dataset['Column_2']

    return dataset

def generate_test_dataset(n_max=100):
    # Create the columns
    column_1 = list(range(n_max)) * n_max  # Numbers from 0 to 9 repeated 10 times
    column_2 = [i for i in range(n_max) for _ in range(n_max)]  # Numbers from 0 to 9 repeated sequentially 10 times

    # Create a DataFrame with the two columns
    dataset = pd.DataFrame({
        'Column_1': column_1,
        'Column_2': column_2,
    })

    # Create the third column by multiplying the first two
    dataset['Column_3'] = dataset['Column_1'] + dataset['Column_2']

    return dataset

def generate_real_test_dataset():
    # Desempaquetar las parejas (a_i, b_i)
    column_1, column_2 = zip(*real_test_dataset)

    # Crear un DataFrame con las dos primeras columnas
    dataset = pd.DataFrame({
        'Column_1': column_1,
        'Column_2': column_2,
    })

    # Crear la tercera columna como la suma de las dos primeras
    dataset['Column_3'] = dataset['Column_1'] + dataset['Column_2']

    return dataset

def decimal_to_binary(n, bits):
    if 0 <= n < 2**bits:
        # Convert the number to a binary string and then to an array of integers (0 and 1)
        return np.array(list(format(n, f'0{bits}b'))).astype(np.int8)
    else:
        raise ValueError("Number out of range")

# Function to convert binary number to decimal
def binary_to_decimal(binary_vector, bits):
    # Ensure the vector has the correct number of elements
    if len(binary_vector) != bits:
        raise ValueError(f"The vector must have exactly {bits} elements.")

    # Calculate the decimal number
    decimal = 0
    for i in range(bits):
        decimal += binary_vector[i] * (2 ** (bits - 1 - i))

    return decimal

def transform_to_tridimensional_matrix(dataset, bits_init=7, bits_end=8):
    rows, cols = dataset.shape
    if cols != 3:
        raise ValueError("The dataset must have exactly 3 columns.")

    # Initialize the three matrices
    matrix_column_1 = np.zeros((rows, bits_init), dtype=np.int8)
    matrix_column_2 = np.zeros((rows, bits_init), dtype=np.int8)
    matrix_column_3 = np.zeros((rows, bits_end), dtype=np.int8)

    # Fill the matrices with the binary representation of each column
    for i in range(rows):
        matrix_column_1[i] = decimal_to_binary(dataset.iloc[i, 0], bits_init)
        matrix_column_2[i] = decimal_to_binary(dataset.iloc[i, 1], bits_init)
        matrix_column_3[i] = decimal_to_binary(dataset.iloc[i, 2], bits_end)

    return matrix_column_1, matrix_column_2, matrix_column_3
    
    
def prepare_dataset(level, size=1, couples_included=[]):       
    if level == -3:
        column_1 = []
        column_2 = []
        pairs = couples_included
        while len(column_1) < size:
            choice = pairs[np.random.choice(len(pairs))]
            column_1.append(choice[0])
            column_2.append(choice[1])
        dataset = pd.DataFrame({'Column_1': column_1,'Column_2': column_2,})
        dataset['Column_3'] = dataset['Column_1'] * dataset['Column_2']
        return dataset

    elif level == -2:
        dataset = generate_dataset_with_zeros(size)
        return dataset
        
    elif level == -1:
        dataset = generate_dataset_without_zeros(size)
        return dataset

    elif level == 0:
        dataset = pd.DataFrame()
        while len(dataset) < size:
            column_1 = np.random.randint(1, 10, size)
            column_2 = np.random.randint(1, 10, size)
            temp_dataset = pd.DataFrame({'Column_1': column_1, 'Column_2': column_2})
            temp_dataset = temp_dataset[~temp_dataset[['Column_1', 'Column_2']].apply(tuple, axis=1).isin(combinations_with_carry_over)]
            dataset = pd.concat([dataset, temp_dataset])
        dataset = dataset.iloc[:size].reset_index(drop=True)
        dataset['Column_3'] = dataset['Column_1'] * dataset['Column_2']
        return dataset

    elif level == 1:
        pairs = random.choices(train_with_carry_over, k=size)
        column_1 = [pair[0] for pair in pairs]
        column_2 = [pair[1] for pair in pairs]
        dataset = pd.DataFrame({'Column_1': column_1, 'Column_2': column_2})
        dataset['Column_3'] = dataset['Column_1'] + dataset['Column_2']
        return dataset

    else:
        print('Bad index for the training stage.')
        return None


def prepare_outputs(stage, x1, x2, outputs_prev):
    if stage == 1:
        outputs = []
        for vec1, vec2 in zip(x1, x2):
            z2 = lower_even(vec1[6] + vec2[6])
            z3 = lower_even(vec1[5] + vec2[5] + z2 * 1/2)
            z4 = lower_even(vec1[4] + vec2[4] + z3 * 1/2)
            z5 = lower_even(vec1[3] + vec2[3] + z4 * 1/2)
            z6 = lower_even(vec1[2] + vec2[2] + z5 * 1/2)
            z7 = lower_even(vec1[1] + vec2[1] + z6 * 1/2)
            z8 = lower_even(vec1[0] + vec2[0] + z7 * 1/2)
            outputs.append([z8, z7, z6, z5, z4, z3, z2, 0])
        return np.array(outputs)

    elif stage == 2:
        return outputs_prev
        
    elif stage == 3:
        return outputs_prev

    else:
        print('Bad index for the training stage.')
        return None


# Perfect parameters needed for the stages where a part of the NN performs perfectly
# R vectors of dimension (14,1)
R2_perfect = np.zeros((14))
R3_perfect = np.zeros((14))
R4_perfect = np.zeros((14))
R5_perfect = np.zeros((14))
R6_perfect = np.zeros((14))
R7_perfect = np.zeros((14))
R8_perfect = np.zeros((14))

for i in range(2):
    R2_perfect[7*i + 6] = 1
    R3_perfect[7*i + 5] = 1
    R4_perfect[7*i + 4] = 1
    R5_perfect[7*i + 3] = 1
    R6_perfect[7*i + 2] = 1
    R7_perfect[7*i + 1] = 1
    R8_perfect[7*i + 0] = 1

# Scalar parameters v
v2_perfect = 1/2
v3_perfect = 1/2
v4_perfect = 1/2
v5_perfect = 1/2
v6_perfect = 1/2
v7_perfect = 1/2

# Matrix T of dimension (28,7)
T_perfect = np.zeros((14,8))
for i in range(7):
    for j in range(2):
        T_perfect[7*j + i, i + 1] = 1

# Parameter v
v_perfect = 1/2

# Neural network in every stage
def neural_network_1(params, x1, x2):
    R2, R3, R4, R5, R6, R7, R8, v2, v3, v4, v5, v6, v7 = params
    x = jnp.concatenate((x1, x2), axis=0)
    z2 = lower_even(jnp.dot(x, R2)) # z2 is a scalar with the first carry over
    z3 = lower_even(jnp.dot(x, R3) + jnp.dot(z2, v2)) # z3 is a scalar with the second carry over
    z4 = lower_even(jnp.dot(x, R4) + jnp.dot(z3, v3)) # z4 is a scalar with the third carry over
    z5 = lower_even(jnp.dot(x, R5) + jnp.dot(z4, v4)) # z5 is a scalar with the fourth carry over
    z6 = lower_even(jnp.dot(x, R6) + jnp.dot(z5, v5)) # z6 is a scalar with the fifth carry over
    z7 = lower_even(jnp.dot(x, R7) + jnp.dot(z6, v6)) # z7 is a scalar with the seventh carry over
    z8 = lower_even(jnp.dot(x, R8) + jnp.dot(z7, v7)) # z7 is a scalar with the seventh carry over
    z = jnp.array([z8, z7, z6, z5, z4, z3, z2, 0])
    #y = differentiable_even_or_odd(relu(jnp.dot(vec, T) + jnp.dot(z, v7)))
    return z

def neural_network_2(params, x1, x2):
    T, v = params
    x = jnp.concatenate((x1, x2), axis=0)
    z2 = lower_even(jnp.dot(x, R2_perfect)) # z2 is a scalar with the first carry over
    z3 = lower_even(jnp.dot(x, R3_perfect) + jnp.dot(z2, v2_perfect)) # z3 is a scalar with the second carry over
    z4 = lower_even(jnp.dot(x, R4_perfect) + jnp.dot(z3, v3_perfect)) # z4 is a scalar with the third carry over
    z5 = lower_even(jnp.dot(x, R5_perfect) + jnp.dot(z4, v4_perfect)) # z5 is a scalar with the fourth carry over
    z6 = lower_even(jnp.dot(x, R6_perfect) + jnp.dot(z5, v5_perfect)) # z6 is a scalar with the fifth carry over
    z7 = lower_even(jnp.dot(x, R7_perfect) + jnp.dot(z6, v6_perfect)) # z7 is a scalar with the seventh carry over
    z8 = lower_even(jnp.dot(x, R8_perfect) + jnp.dot(z7, v7_perfect)) # z7 is a scalar with the seventh carry over
    z = jnp.array([z8, z7, z6, z5, z4, z3, z2, 0])
    y = differentiable_even_or_odd(jnp.dot(x, T) + jnp.dot(z, v))
    return y
    
def neural_network_3(params, x1, x2):
    R2, R3, R4, R5, R6, R7, R8, v2, v3, v4, v5, v6, v7, T, v = params
    x = jnp.concatenate((x1, x2), axis=0)
    z2 = lower_even(jnp.dot(x, R2)) # z2 is a scalar with the first carry over
    z3 = lower_even(jnp.dot(x, R3) + jnp.dot(z2, v2)) # z3 is a scalar with the second carry over
    z4 = lower_even(jnp.dot(x, R4) + jnp.dot(z3, v3)) # z4 is a scalar with the third carry over
    z5 = lower_even(jnp.dot(x, R5) + jnp.dot(z4, v4)) # z5 is a scalar with the fourth carry over
    z6 = lower_even(jnp.dot(x, R6) + jnp.dot(z5, v5)) # z6 is a scalar with the fifth carry over
    z7 = lower_even(jnp.dot(x, R7) + jnp.dot(z6, v6)) # z7 is a scalar with the seventh carry over
    z8 = lower_even(jnp.dot(x, R8) + jnp.dot(z7, v7)) # z7 is a scalar with the seventh carry over
    z = jnp.array([z8, z7, z6, z5, z4, z3, z2, 0])
    y = differentiable_even_or_odd(jnp.dot(x, T) + jnp.dot(z, v))
    return y

# Loss functions in every stage
def loss_1(params, x1, x2, y):
    pred = neural_network_1(params, x1, x2)
    return jnp.mean((pred - y)**2)

def loss_2(params, x1, x2, y):
    pred = neural_network_2(params, x1, x2)
    return jnp.mean((pred - y)**2)

def loss_3(params, x1, x2, y):
    pred = neural_network_3(params, x1, x2)
    return jnp.mean((pred - y)**2)

# Loss functions in every step
@jax.jit
def update_params_1(params, x1, x2, y, lr):
    gradients = grad(loss_1)(params, x1, x2, y)
    step_loss = loss_1(params, x1, x2, y)
    return [(p - lr * g) for p, g in zip(params, gradients)], step_loss

@jax.jit
def update_params_2(params, x1, x2, y, lr):
    gradients = grad(loss_2)(params, x1, x2, y)
    step_loss = loss_2(params, x1, x2, y)
    return [(p - lr * g) for p, g in zip(params, gradients)], step_loss
    
@jax.jit
def update_params_3(params, x1, x2, y, lr):
    gradients = grad(loss_3)(params, x1, x2, y)
    step_loss = loss_3(params, x1, x2, y)
    return [(p - lr * g) for p, g in zip(params, gradients)], step_loss

def decide_training(params, x1, x2, y, lr, stage):
    if stage == 1:
        params, step_loss = update_params_1(params, x1, x2, y, lr)
        return params, step_loss

    elif stage == 2:
        params, step_loss = update_params_2(params, x1, x2, y, lr)
        return params, step_loss
        
    elif stage == 3:
        params, step_loss = update_params_3(params, x1, x2, y, lr)
        return params, step_loss

    else:
        print('Bad index for the training stage.')
        return None
        
# Main function to train the network
def train_stages_neural_network(params, stage, level, lr=0.01, epochs=100):
    decimal_dataset = prepare_dataset(level, epochs)
    inputs_1, inputs_2, outputs_prev = transform_to_tridimensional_matrix(decimal_dataset)
    outputs = prepare_outputs(stage, inputs_1, inputs_2, outputs_prev)
    final_loss = 0
    # Train the network
    for epoch in range(epochs):
        # Update parameters at each step
        params, step_loss = decide_training(params, inputs_1[epoch], inputs_2[epoch], outputs[epoch], lr, stage)
        final_loss += step_loss

    final_loss = final_loss / epochs
    #print(f"Loss: {final_loss:.6f}")
    return params, final_loss



def decide_test(params, stage, real_test=0, visualize_errors=0):
    if real_test == 1:
        test_size, correct_predictions_tested_count, train_size, correct_predictions_trained_count, carry_over_size, correct_carry_over_predictions_tested_count = real_test_stages_neural_network(params, stage, visualize_errors=0)
        print(f"STAGE {stage}: Out of {train_size}, {correct_predictions_trained_count} trained were predicted correctly in the current model.")
        print(f"STAGE {stage}: Out of {test_size}, {correct_predictions_tested_count} tested were predicted correctly in the current model.")
        print(f"STAGE {stage}: Out of {carry_over_size}, {correct_carry_over_predictions_tested_count} tested with carry-over were predicted correctly in the current model.")      

    else: 
        test_size, correct_predictions_tested_count, train_size, correct_predictions_trained_count = test_stages_neural_network(params, stage, visualize_errors=0)
        print(f"STAGE {stage}: Out of {train_size}, {correct_predictions_trained_count} trained were predicted correctly in the current model.")
        print(f"STAGE {stage}: Out of {test_size}, {correct_predictions_tested_count} tested were predicted correctly in the current model.")
        

# Main function to test the network
def test_stages_neural_network(params, stage, visualize_errors=0):
    decimal_dataset = generate_test_dataset()
    inputs_1, inputs_2, outputs_prev = transform_to_tridimensional_matrix(decimal_dataset)
    outputs = prepare_outputs(stage, inputs_1, inputs_2, outputs_prev)

    correct_predictions_count = 0
    correct_predictions_tested_count = 0
    correct_predictions_trained_count = 0  # Counter for trained couples
    set_size = inputs_1.shape[0]
    train_size = len(train_couples)
    test_size = set_size - train_size
    
    for i in range(set_size):
        prediction, binary_pred = predict(params, inputs_1[i], inputs_2[i], stage)
        # Check if the prediction matches the expected output
        if jnp.all(prediction == outputs[i]):  
            if (decimal_dataset.iloc[i, 0], decimal_dataset.iloc[i, 1]) in train_couples:
                correct_predictions_trained_count += 1  # Increment for trained couples
            else:
                correct_predictions_tested_count += 1 # Increment for tested couples
        elif visualize_errors == 1:
            print(f'{decimal_dataset.iloc[i, 0]} plus {decimal_dataset.iloc[i, 1]} has failed.')

    return test_size, correct_predictions_tested_count, train_size, correct_predictions_trained_count

# Predict using the trained neural network
def predict(params, x1, x2, stage):
    if stage == 1:
        binary_pred = neural_network_1(params, x1, x2)
        rounded_pred = np.round(binary_pred)
        return rounded_pred, binary_pred
        
    elif stage == 2:
        binary_pred = neural_network_2(params, x1, x2)
        rounded_pred = np.round(binary_pred)
        return rounded_pred, binary_pred
        
    elif stage == 3:
        binary_pred = neural_network_3(params, x1, x2)
        rounded_pred = np.round(binary_pred)
        return rounded_pred, binary_pred
        
    else:
        print('Bad index for the training stage.')
        return None

In [3]:
def perfect_model():
    # R vectors of dimension (14,1)
    R2_perfect = np.zeros((14))
    R3_perfect = np.zeros((14))
    R4_perfect = np.zeros((14))
    R5_perfect = np.zeros((14))
    R6_perfect = np.zeros((14))
    R7_perfect = np.zeros((14))
    R8_perfect = np.zeros((14))
    
    for i in range(2):
        R2_perfect[7*i + 6] = 1
        R3_perfect[7*i + 5] = 1
        R4_perfect[7*i + 4] = 1
        R5_perfect[7*i + 3] = 1
        R6_perfect[7*i + 2] = 1
        R7_perfect[7*i + 1] = 1
        R8_perfect[7*i + 0] = 1
    
    # Scalar parameters v
    v2_perfect = 1/2
    v3_perfect = 1/2
    v4_perfect = 1/2
    v5_perfect = 1/2
    v6_perfect = 1/2
    v7_perfect = 1/2
    
    # Matrix T of dimension (28,7)
    T_perfect = np.zeros((14,8))
    for i in range(7):
        for j in range(2):
            T_perfect[7*j + i, i + 1] = 1
    
    # Parameter v
    v_perfect = 1/2
    
    original_model = [R2_perfect, R3_perfect, R4_perfect, R5_perfect, R6_perfect, R7_perfect, R8_perfect,
            v2_perfect, v3_perfect, v4_perfect, v5_perfect, v6_perfect, v7_perfect, 
            T_perfect, v_perfect] 
    trainable_model = [R2_perfect, R3_perfect, R4_perfect, R5_perfect, R6_perfect, R7_perfect, R8_perfect,
            v2_perfect, v3_perfect, v4_perfect, v5_perfect, v6_perfect, v7_perfect, 
            T_perfect, v_perfect] 
    return trainable_model, original_model

def generate_model_random(mean=0.5, std=1):
    R2 = initialize_random_weights(mean, std, (14))  # 28 neurons that correctly calculate the carry for the second bit
    R3 = initialize_random_weights(mean, std, (14))  # 28 neurons that correctly calculate the carry for the third bit
    R4 = initialize_random_weights(mean, std, (14))  # 28 neurons that correctly calculate the carry for the third bit
    R5 = initialize_random_weights(mean, std, (14))  # 28 neurons that correctly calculate the carry for the third bit
    R6 = initialize_random_weights(mean, std, (14))  # 28 neurons that correctly calculate the carry for the third bit
    R7 = initialize_random_weights(mean, std, (14))  # 28 neurons that correctly calculate the carry for the third bit
    R8 = initialize_random_weights(mean, std, (14))  # 28 neurons that correctly calculate the carry for the third bit
    v2 = initialize_random_weights(mean, std)  # 1 neuron that calculates the contribution of the carry for the third bit
    v3 = initialize_random_weights(mean, std)  # 1 neuron that calculates the contribution of the carry for the third bit
    v4 = initialize_random_weights(mean, std)  # 1 neuron that calculates the contribution of the carry for the third bit
    v5 = initialize_random_weights(mean, std)  # 1 neuron that calculates the contribution of the carry for the third bit
    v6 = initialize_random_weights(mean, std)  # 1 neuron that calculates the contribution of the carry for the third bit
    v7 = initialize_random_weights(mean, std)  # 1 neuron that calculates the contribution of the carry for the third bit
    T = initialize_random_weights(mean, std, (14, 8))  # 196 neurons that allow performing the sum
    v = initialize_random_weights(mean, std)  # 1 neuron that calculates the contribution of the carry vector for all bits
    original_model = [R2, R3, R4, R5, R6, R7, R8, v2, v3, v4, v5, v6, v7, T, v]
    trainable_model = [R2, R3, R4, R5, R6, R7, R8, v2, v3, v4, v5, v6, v7, T, v]
    return trainable_model, original_model
    
def generate_model_AP(epsilon_non_zeros = 0.01, epsilon_zeros = 0.01):   
    R2_almost_perfect = np.zeros((14)) + epsilon_zeros * np.random.randint(-10, 10)
    R3_almost_perfect = np.zeros((14)) + epsilon_zeros * np.random.randint(-10, 10)
    R4_almost_perfect = np.zeros((14)) + epsilon_zeros * np.random.randint(-10, 10)
    R5_almost_perfect = np.zeros((14)) + epsilon_zeros * np.random.randint(-10, 10)
    R6_almost_perfect = np.zeros((14)) + epsilon_zeros * np.random.randint(-10, 10)
    R7_almost_perfect = np.zeros((14)) + epsilon_zeros * np.random.randint(-10, 10)
    R8_almost_perfect = np.zeros((14)) + epsilon_zeros * np.random.randint(-10, 10)

    for i in range(2):
        R2_almost_perfect[7*i + 6] = 1 + epsilon_non_zeros * np.random.randint(-10, 10)
        R3_almost_perfect[7*i + 5] = 1 + epsilon_non_zeros * np.random.randint(-10, 10)
        R4_almost_perfect[7*i + 4] = 1 + epsilon_non_zeros * np.random.randint(-10, 10)
        R5_almost_perfect[7*i + 3] = 1 + epsilon_non_zeros * np.random.randint(-10, 10)
        R6_almost_perfect[7*i + 2] = 1 + epsilon_non_zeros * np.random.randint(-10, 10)
        R7_almost_perfect[7*i + 1] = 1 + epsilon_non_zeros * np.random.randint(-10, 10)
        R8_almost_perfect[7*i + 0] = 1 + epsilon_non_zeros * np.random.randint(-10, 10)
    
    v2_almost_perfect = 1/2 + epsilon_non_zeros * np.random.randint(-10, 10)
    v3_almost_perfect = 1/2 + epsilon_non_zeros * np.random.randint(-10, 10)
    v4_almost_perfect = 1/2 + epsilon_non_zeros * np.random.randint(-10, 10)
    v5_almost_perfect = 1/2 + epsilon_non_zeros * np.random.randint(-10, 10)
    v6_almost_perfect = 1/2 + epsilon_non_zeros * np.random.randint(-10, 10)
    v7_almost_perfect = 1/2 + epsilon_non_zeros * np.random.randint(-10, 10)

    T_almost_perfect = np.zeros((14,8)) + epsilon_zeros * np.random.randint(-10, 10)
    
    for i in range(7):
        for j in range(2):
            T_almost_perfect[7*j + i, i + 1] = 1 + epsilon_non_zeros * np.random.randint(-10, 10)
    
    v_almost_perfect = 1/2 + epsilon_non_zeros * np.random.randint(-10, 10)
    
    original_model = [R2_almost_perfect, R3_almost_perfect, R4_almost_perfect, R5_almost_perfect, R6_almost_perfect, R7_almost_perfect, R8_almost_perfect,
                      v2_almost_perfect, v3_almost_perfect, v4_almost_perfect, v5_almost_perfect, v6_almost_perfect, v7_almost_perfect,
                      T_almost_perfect, v_almost_perfect]
    trainable_model = [R2_almost_perfect, R3_almost_perfect, R4_almost_perfect, R5_almost_perfect, R6_almost_perfect, R7_almost_perfect, R8_almost_perfect,
                      v2_almost_perfect, v3_almost_perfect, v4_almost_perfect, v5_almost_perfect, v6_almost_perfect, v7_almost_perfect,
                      T_almost_perfect, v_almost_perfect]
    return trainable_model, original_model

In [4]:
class Tee(object):
    def __init__(self, file, mode='w'):
        self.file = open(file, mode)
        self.console = sys.stdout  

    def write(self, data):
        self.console.write(data)   
        self.file.write(data)    

    def flush(self):
        self.console.flush()
        self.file.flush()

    def close(self):
        self.file.close()
        
def load_trainable_model(model, current_time, training_type, stage = 0):
    folder = 'D:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado/Curriculum_Learning/Multidigit_Addition'
    if stage == 0:
        model_path = f'{folder}/Parameters/{training_type}/{model}_{current_time}.pkl'
        with open(model_path, 'rb') as f:
            globals()[f'trainable_model'] = pickle.load(f)
        print(f'Model trainable_model_{current_time} loaded successfully.')
        return globals()[f'trainable_model']
        
    else:
        model_path = f'{folder}/Trained_models/Stages/{training_type}/Stage_{stage}/{model}_{stage}-{current_time}.pkl'
        with open(model_path, 'rb') as f:
            globals()[f'{model}_{stage}'] = pickle.load(f)
        print(f'Model {model}_{stage}_{current_time} loaded successfully.')
        return globals()[f'{model}_{stage}']
        
    

In [8]:
# Parámetros
epsilon_zeros = 0.1
epsilon_non_zeros = 0.1

# Ruta base
base_path = r"D:\OneDrive - Universidad Complutense de Madrid (UCM)\Doctorado\Curriculum_Learning\Multidigit_Addition\Parameters"
folder_name = f"AP_{epsilon_zeros}_{epsilon_non_zeros}"
output_path = os.path.join(base_path, folder_name)

# Crear la carpeta si no existe
os.makedirs(output_path, exist_ok=True)

# Generar y guardar cinco modelos
for _ in range(5):
    trainable_model, original_model = generate_model_AP(epsilon_non_zeros, epsilon_zeros)
    current_time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
    file_name = f"trainable_model_stage_{current_time}.pkl"
    file_path = os.path.join(output_path, file_name)
    with open(file_path, "wb") as file:
        pickle.dump(trainable_model, file)
    print(f"Modelo guardado como {file_path}")
    time.sleep(2)

Modelo guardado como D:\OneDrive - Universidad Complutense de Madrid (UCM)\Doctorado\Curriculum_Learning\Multidigit_Addition\Parameters\AP_0.1_0.1\trainable_model_stage_2024_12_04_12_42_00.pkl
Modelo guardado como D:\OneDrive - Universidad Complutense de Madrid (UCM)\Doctorado\Curriculum_Learning\Multidigit_Addition\Parameters\AP_0.1_0.1\trainable_model_stage_2024_12_04_12_42_02.pkl
Modelo guardado como D:\OneDrive - Universidad Complutense de Madrid (UCM)\Doctorado\Curriculum_Learning\Multidigit_Addition\Parameters\AP_0.1_0.1\trainable_model_stage_2024_12_04_12_42_04.pkl
Modelo guardado como D:\OneDrive - Universidad Complutense de Madrid (UCM)\Doctorado\Curriculum_Learning\Multidigit_Addition\Parameters\AP_0.1_0.1\trainable_model_stage_2024_12_04_12_42_06.pkl
Modelo guardado como D:\OneDrive - Universidad Complutense de Madrid (UCM)\Doctorado\Curriculum_Learning\Multidigit_Addition\Parameters\AP_0.1_0.1\trainable_model_stage_2024_12_04_12_42_08.pkl


In [11]:
model = 'trainable_model_stage'
training_type = 'AP_0.1_0.1'

folder = f'D:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado/Curriculum_Learning/Multidigit_Addition'
folder_path = f'{folder}/Parameters/{training_type}'
date_pattern = r'trainable_model_stage_(\d{4}_\d{2}_\d{2}_\d{2}_\d{2}_\d{2}).pkl'
files = sorted(
    (f for f in os.listdir(folder_path) if not f.startswith('.')),  # Filtrar archivos ocultos
    key=lambda x: re.search(date_pattern, x).group(1) if re.search(date_pattern, x) else ''
)

for filename in files:
    match = re.search(date_pattern, filename)
    if match:
        current_time = match.group(1)
    else:
        print('Error')
        break
    
    file_path = f"{folder_path}/{model}_{current_time}.pkl"
    with open(file_path, 'rb') as file:
        trainable_model = pickle.load(file)

    print(f'Loaded {model}_{current_time}.pkl')
    
    training_stages = 3
    trainings_needed = np.zeros(training_stages)    
    visualizer = 10
    lr_changer = 250
    stage_changer = 2500
    N = 500

    for stage in range(1,4):
        if stage == 1:
            trainable_model_stage_1 = [trainable_model[0],
                                       trainable_model[1],
                                       trainable_model[2],
                                       trainable_model[3],
                                       trainable_model[4],
                                       trainable_model[5],
                                       trainable_model[6],
                                       trainable_model[7],
                                       trainable_model[8],
                                       trainable_model[9],
                                       trainable_model[10],
                                       trainable_model[11],
                                       trainable_model[12]]
        elif stage == 2:
            trainable_model_stage_2 = [trainable_model[13], 
                                       trainable_model[14]]
        elif stage == 3:
            trainable_model_stage_3 = [trainable_model_stage_1[0], 
                                       trainable_model_stage_1[1], 
                                       trainable_model_stage_1[2], 
                                       trainable_model_stage_1[3], 
                                       trainable_model_stage_1[4], 
                                       trainable_model_stage_1[5], 
                                       trainable_model_stage_1[6], 
                                       trainable_model_stage_1[7], 
                                       trainable_model_stage_1[8], 
                                       trainable_model_stage_1[9], 
                                       trainable_model_stage_1[10],
                                       trainable_model_stage_1[11],
                                       trainable_model_stage_1[12],
                                       trainable_model_stage_2[0],
                                       trainable_model_stage_2[1]]
        
        save_dir = f"{folder}/Trained_models/Stages/{training_type}/Stage_{stage}" 
        os.makedirs(save_dir, exist_ok=True) 
        results_file = os.path.join(save_dir, f"Stage_{stage}_results_{current_time}.txt") 
        tee = Tee(results_file, 'w') 
        sys.stdout = tee
    
        try:
            test_size, correct_predictions_tested_count, train_size, correct_predictions_trained_count = test_stages_neural_network(params=globals()[f"{model}_{stage}"], stage=stage)
            final_loss = 1
            lr = 0.005
            level = -2
            response = "yes"
            while final_loss != 0:
                prev_model = globals()[f"{model}_{stage}"]
                globals()[f"{model}_{stage}"], final_loss = train_stages_neural_network(params=globals()[f"{model}_{stage}"], stage=stage, level=level, lr=lr, epochs=N)
                trainings_needed[stage-1] += 1
                if math.isnan(final_loss):
                    globals()[f"{model}_{stage}"] = prev_model
                    print('Loss is NaN.')
                    break                
                if trainings_needed[stage-1] % visualizer == 0:
                    if response.lower() == "yes":
                        test_size, correct_predictions_tested_count, train_size, correct_predictions_trained_count = test_stages_neural_network(params=globals()[f"{model}_{stage}"], stage=stage)
                        print(f"STAGE {stage}: Out of {train_size}, {correct_predictions_trained_count} trained were predicted correctly in the current model.")
                        print(f"STAGE {stage}: Out of {test_size}, {correct_predictions_tested_count} tested were predicted correctly in the current model.")
                    elif response.lower() == "no":
                        print(f"STAGE {stage}: Objective completed, all are predicted correctly in the current model.")
                    print(f"STAGE {stage}: Loss is {final_loss}.")
                #if trainings_needed[stage-1] % lr_changer == 0:
                #    new_lr = input(f"Change of learning rate? (Current one is {lr}, press enter if not): ")
                #    if new_lr != "":
                #        lr = float(new_lr)
                if trainings_needed[stage-1] % stage_changer == 0:
                    response_pre = 'yes'
                    if response_pre.lower() == "yes":
                        break
                if correct_predictions_trained_count == train_size:
                    response = "yes"
                    while response.lower() not in ["yes", "no"]:
                        response = input("Objective completed, skip to next stage? (yes/no): ")
                        if response.lower() not in ["yes", "no"]:
                            print('Incorrect answer')        
                    if response.lower() == "yes":
                        print('Objective completed')
                        break
                    elif response.lower() == "no":
                        train_size = correct_predictions_trained_count + 1
    
            print(f'Stage {stage} completed in {trainings_needed[stage-1]} trainings.')
            save_response = 'yes'
            if save_response.lower() == 'yes':
                save_path = os.path.join(save_dir, f"trainable_model_stage_{stage}-{current_time}.pkl")
                with open(save_path, 'wb') as f:
                    pickle.dump(globals()[f"{model}_{stage}"], f)
                print(f"Model trainable_model_stage_{stage} saved at {save_path}")

        finally:
            sys.stdout = tee.console
            tee.close()
        print(f"Results of Stage {stage} saved in {results_file}")

Loaded trainable_model_stage_2024_12_04_12_42_00.pkl
STAGE 1: Out of 8000, 5506 trained were predicted correctly in the current model.
STAGE 1: Out of 2000, 1366 tested were predicted correctly in the current model.
STAGE 1: Loss is 0.05013854801654816.
STAGE 1: Out of 8000, 8000 trained were predicted correctly in the current model.
STAGE 1: Out of 2000, 2000 tested were predicted correctly in the current model.
STAGE 1: Loss is 0.005366088822484016.
Objective completed
Stage 1 completed in 20.0 trainings.
Model trainable_model_stage_1 saved at D:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado/Curriculum_Learning/Multidigit_Addition/Trained_models/Stages/AP_0.1_0.1/Stage_1\trainable_model_stage_1-2024_12_04_12_42_00.pkl
Results of Stage 1 saved in D:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado/Curriculum_Learning/Multidigit_Addition/Trained_models/Stages/AP_0.1_0.1/Stage_1\Stage_1_results_2024_12_04_12_42_00.txt
Loss is NaN.
Stage 2 completed in 1.0 trai