# PG_Test_1 :  5 systems and 100 samples each.
* So, we first need to generate the data 
  * Rossler, Lorenz63, Aizawa, Sprott and Dadras
* Then calculate the chaotic features and the tsfresh features
* Normalize (Scale) 
* Variance Filtering
* Visualization using PCA, t-SNE and UMAP

# Step 1: Data Generation

In [8]:
import pandas as pd

def transition(df,freq):
  df = df.copy()
  df['date'] = pd.date_range(start='1970-01-01', periods=len(df), freq=freq)
  df = df.set_index('date')
  df = df.asfreq(freq)
  # Rename the 'Demand' column to 'y' in the DataFrame
  df = df.rename(columns = {'y': 'exo_1', 'z' : 'exo_2'})
  df = df.rename(columns = {'x': 'y'})
  df_transitioned = df

  return df_transitioned

### GENESIS

In [13]:
import json
import numpy as np
# from utils.helpers import sys_params_gen as params_gen
# from utils.helpers import initial_conditions_gen as ic_gen
# from data_generation.binder import Binder  
# from data_generation.tsdg import Sistema  
import os

In [14]:
file_path = '/home/think/Desktop/TESIS/pg_runs/templates/template.json'
destination_path = "/home/think/Desktop/TESIS/pg_runs/pg_test_2"

In [15]:
with open(file_path, 'r') as file:
    config = json.load(file)
    # Control variables
    mode = config['run_mode']
    warden = config['warden']
    models = config['models']
    shared_parameters = config['shared_parameters']

    # Extract the variables we need from the shared_parameters
    test_number = shared_parameters["test_number"]
    number_of_child_systems = shared_parameters["number_of_child_systems"]  
    t_span = tuple(shared_parameters["t_span"])
    num_points = shared_parameters["num_points"]  
    initial_conditions = shared_parameters["initial_conditions"]  
    t_eval = np.linspace(t_span[0], t_span[1], num_points)

In [16]:
print(mode)

global


In [17]:
print(warden)

{'aizawa': True, 'chen': False, 'chua': False, 'dadras': True, 'four_wing': False, 'halvorsen': False, 'lorenz63': True, 'lorenz83': False, 'ravinoch_fabrikant': False, 'rossler': True, 'sprott': True, 'thomas': False, 'tsucs': False}


In [18]:
print(models)

{'aizawa': {'params': {'a': [0.95, 1.1], 'b': [0.7, 0.9], 'c': [0.6, 0.8], 'd': [3.5, 4], 'e': [0.25, 0.3], 'f': [0.1, 0.2]}}, 'chen': {'params': {'alpha': [40, 5.1], 'beta': [-10, -9.99], 'delta': [-0.38, -0.37]}}, 'chua': {'params': {'sigma': [10, 15], 'beta': [2.66666666, 7], 'rho': [28.5, 70]}}, 'dadras': {'params': {'a': [3, 4], 'b': [2.7, 3], 'c': [1.7, 2], 'd': [2, 3], 'e': [9, 10]}}, 'four_wing': {'params': {'a': [0.2, 0.4], 'b': [0.01, 0.03], 'c': [-0.4, -0.3]}}, 'halvorsen': {'params': {'a': [1.89, 1.95]}}, 'lorenz63': {'params': {'sigma': [10, 15], 'beta': [2.66666666, 7], 'rho': [28.5, 70]}}, 'lorenz83': {'params': {'a': [0.95, 1], 'b': [7.91, 8.91], 'f': [4.83, 5.83], 'g': [4.66, 5.66]}}, 'ravinoch_fabrikant': {'params': {'alpha': [0.14, 0.6], 'gamma': [0.1, 0.4]}}, 'rossler': {'params': {'a': [0.2, 0.3], 'b': [0.2, 0.3], 'c': [5.7, 5.8]}}, 'sprott': {'params': {'a': [2.07, 2.3], 'b': [1.79, 2]}}, 'thomas': {'params': {'b': [0.20808186, 0.30808186]}}, 'tsucs': {'params': {

In [19]:
print(shared_parameters)

{'test_number': 2, 'number_of_child_systems': 100, 'kind_step': 'even', 't_span': [0, 105], 'num_points': 10000, 'initial_conditions': {'x': {'min': 0.1, 'max': 1}, 'y': {'min': 0.1, 'max': 1}, 'z': {'min': 0.1, 'max': 1}}}


In [20]:
print(test_number)

2


In [21]:
print(number_of_child_systems)

100


In [22]:
print(t_span)

(0, 105)


In [23]:
print(num_points)

10000


In [24]:
print(initial_conditions)

{'x': {'min': 0.1, 'max': 1}, 'y': {'min': 0.1, 'max': 1}, 'z': {'min': 0.1, 'max': 1}}


In [25]:
print(t_eval)

[0.00000000e+00 1.05010501e-02 2.10021002e-02 ... 1.04978998e+02
 1.04989499e+02 1.05000000e+02]


In [26]:
def frange(start, stop, step):
    while start <= stop:
        yield round(start, 10)  # Avoid floating-point errors
        start += step

In [27]:
def initial_conditions_gen(initial_conditions, number_of_child_systems):

  child_initial_conditions_bag = {}

  for key, value in initial_conditions.items():
    # Retrieve the dynamically created min and max values from the global namespace
    min_value = value["min"]
    max_value = value["max"]
    step = (max_value - min_value) / number_of_child_systems
    gen = frange(min_value, max_value, step)
    child_initial_conditions_bag.update({f"{key}": np.fromiter(gen, dtype=float)})

  systems_initial_dict = {}
  w = []
  for value in child_initial_conditions_bag.values():
    w.append(len(value))
  for i in range(min(w)):
    y = []
    for key,value in child_initial_conditions_bag.items():
      y.append(value[i])
    systems_initial_dict[f"initial_condition_{i}"] = y
  
  return systems_initial_dict

In [28]:
# Initial conditions for each system
systems_initial_dict = initial_conditions_gen(initial_conditions, number_of_child_systems)

In [29]:
systems_initial_dict

{'initial_condition_0': [0.1, 0.1, 0.1],
 'initial_condition_1': [0.109, 0.109, 0.109],
 'initial_condition_2': [0.118, 0.118, 0.118],
 'initial_condition_3': [0.127, 0.127, 0.127],
 'initial_condition_4': [0.136, 0.136, 0.136],
 'initial_condition_5': [0.145, 0.145, 0.145],
 'initial_condition_6': [0.154, 0.154, 0.154],
 'initial_condition_7': [0.163, 0.163, 0.163],
 'initial_condition_8': [0.172, 0.172, 0.172],
 'initial_condition_9': [0.181, 0.181, 0.181],
 'initial_condition_10': [0.19, 0.19, 0.19],
 'initial_condition_11': [0.199, 0.199, 0.199],
 'initial_condition_12': [0.208, 0.208, 0.208],
 'initial_condition_13': [0.217, 0.217, 0.217],
 'initial_condition_14': [0.226, 0.226, 0.226],
 'initial_condition_15': [0.235, 0.235, 0.235],
 'initial_condition_16': [0.244, 0.244, 0.244],
 'initial_condition_17': [0.253, 0.253, 0.253],
 'initial_condition_18': [0.262, 0.262, 0.262],
 'initial_condition_19': [0.271, 0.271, 0.271],
 'initial_condition_20': [0.28, 0.28, 0.28],
 'initial_cond

In [30]:
def system_params_gen(params, number_of_child_systems):

  child_parameters_bag = {}

  for key, value in params.items():
      min_value = value["min"]
      max_value = value["max"]
      step = (max_value - min_value) / number_of_child_systems
      child_parameters_bag[key] = list(frange(min_value, max_value, step))

  systems_params_dict = {}

  y = []
  for value in child_parameters_bag.values():
    y.append(len(value))
  for i in range(min(y)):
    y = []
    for key,value in child_parameters_bag.items():
      y.append(value[i])
    systems_params_dict[f"system_{i}_params"] = tuple(y) 
  
  return systems_params_dict  

In [31]:
import numpy as np
import importlib
from functools import partial

class Binder:
    def __init__(self, module_name, function_name, params):
    
        self.module_name = module_name
        self.function_name = function_name
        self.params = params
        self.module = None
        self.prepared_function = None

    def import_module(self):
        """Imports the module and assigns it to self.module."""
        self.module = importlib.import_module(self.module_name)
        #print(f"Module {self.module_name} imported successfully.")
     
    def fixer(self):
        """Fixes parameters to the function and returns a partially applied function."""
        if self.module is None:
            print("You have to import_module first :)")
            return None
        
        func = getattr(self.module, self.function_name, None)
        if func is None:
            print(f"Function {self.function_name} not found in module {self.module_name}.")
            return None

        self.prepared_function = partial(func, *self.params)
        return self.prepared_function

In [32]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.integrate import solve_ivp
import pandas as pd
import os
import matplotlib.animation as animation


class Sistema:

    """
    Clase para resolver ecuaciones diferenciales ordinarias (EDOs).

    Parámetros:
      - f: función que define el sistema de ecuaciones diferenciales (dy/dt = f(t, y)).
      - y0: condición inicial (puede ser un número o un array para sistemas).
      - t: array de tiempos donde se evaluará la solución.
      - metodo: método numérico para la integración (por defecto "RK45").
    """


    def __init__(self, f, y0, t, metodo = "RK45"):
        self.f = f
        self.y0 = np.atleast_1d(y0)
        self.t = t
        self.metodo = metodo
        self.solucion = None

    def set_metodo(self, nuevo_metodo):

        """
        Permite cambiar el método de integración numérica.
        """

        self.metodo = nuevo_metodo
        print(f"Método cambiado a {self.metodo}")

    def resolver(self):

        """Resuelve la EDO utilizando el método numérico definido."""

        self.solucion = solve_ivp(self.f, [self.t[0], self.t[-1]], self.y0,
                                    t_eval=self.t, method=self.metodo)
        return self.solucion
    
    def atractor_animation(self):
        
        sol = self.solucion  
        x, y, z = sol.y  # Extract trajectory data

        # Set up the figure and 3D axis
        fig = plt.figure(figsize=(8, 6))
        ax = fig.add_subplot(111, projection='3d')
        ax.set_xlim((np.min(x), np.max(x)))
        ax.set_ylim((np.min(y), np.max(y)))
        ax.set_zlim((np.min(z), np.max(z)))
        ax.set_title("Attractor Animation")

        # Initialize the plot elements
        trail_length = 150  # Number of points in fading trace
        line, = ax.plot([], [], [], 'r-', lw=1)  # Main trajectory
        trace, = ax.plot([], [], [], 'g-', lw=2, alpha=0.7)  # Fading trace
        point, = ax.plot([], [], [], 'bo', markersize=6)  # Moving point

        def update(i):
            if i < trail_length:
                trace_x = x[:i]
                trace_y = y[:i]
                trace_z = z[:i]
            else:
                trace_x = x[i - trail_length:i]
                trace_y = y[i - trail_length:i]
                trace_z = z[i - trail_length:i]

            # 🛠 Fix: Only apply fading if the trace has points
            if len(trace_x) > 0:
                fade_alpha = np.linspace(0.1, 1.0, len(trace_x))  # Gradient fade
                trace.set_alpha(fade_alpha[0])  # Apply fading effect

            # Update the trace
            trace.set_data(trace_x, trace_y)
            trace.set_3d_properties(trace_z)

            # Update main trajectory and moving point
            line.set_data(x[:i], y[:i])
            line.set_3d_properties(z[:i])
            point.set_data(x[i], y[i])
            point.set_3d_properties(z[i])

            return line, trace, point
        # Run animation
        ani = animation.FuncAnimation(fig, update, frames=len(self.t), interval=10, blit=False)

        plt.show()


        return
    def graficar(self, tipo='3d', guardar=False, show_plot=True, filename='plot.png'):
        """
        Genera la gráfica de la solución de la EDO.
        
        Parámetros:
        - tipo: '3d' para la trayectoria en 3D o 'series' para la serie de tiempo.
        - guardar: si True, guarda la gráfica en un archivo.
        - show_plot: si True, muestra la gráfica en pantalla.
        - filename: nombre del archivo donde se guardará la gráfica.
        
        Retorna el objeto figura (fig).
        """
        if self.solucion is None:
            raise ValueError("Primero debes resolver la ecuación.")

        if tipo == '3d':
            fig = plt.figure(figsize=(10, 8))
            ax = fig.add_subplot(111, projection='3d')
            ax.plot(self.solucion.y[0], self.solucion.y[1], self.solucion.y[2],
                    color='purple', lw=0.5)
            ax.set_xlabel('x')
            ax.set_ylabel('y')
            ax.set_zlabel('z')
            ax.set_title(f'Attractor (3D)')  # {self.f.func.__name__}
            ax.view_init(elev=30, azim=60)

        elif tipo == 'series':
            fig, axs = plt.subplots(self.solucion.y.shape[0], 1, figsize=(10, 8), sharex=True)
            labels = ['x', 'y', 'z']
            colors = ['r', 'g', 'b']

            for i in range(self.solucion.y.shape[0]):
                axs[i].plot(self.solucion.t, self.solucion.y[i], color=colors[i])
                axs[i].set_ylabel(labels[i])
                axs[i].grid()

            axs[-1].set_xlabel("Time")
            fig.suptitle(f"Series de Tiempo de {getattr(self.f, 'func', self.f).__name__}")

        else:
            raise ValueError("Tipo de gráfica no reconocido. Usa '3d' o 'series'.")

        plt.tight_layout()

        if guardar:
            directory = "/path/to/save/directory" 
            os.makedirs(directory, exist_ok=True)  
            full_path = os.path.join(directory, filename)
            fig.savefig(full_path)
            print(f"Gráfica guardada en {full_path}")

        if show_plot:
            plt.show()

        return fig
       
    # def csv_or_dataframe(self, filename = None):

    #     """Devuelve el dataframe de la series de tiempo, o un csv
    #      si es que le damos un nombre """
        
    #     if self.solucion is None:
    #         raise ValueError("Primero debes resolver la ecuación.")
        
    #     X = self.solucion.y[0]
    #     Y = self.solucion.y[1]
    #     Z = self.solucion.y[2]
     
    #     df = pd.DataFrame({'x':X,'y':Y,'z':Z })

    #     if filename:
    #         df.to_csv(filename, index=False)

    #     return df

    def csv_or_dataframe(self, filename=None, overwrite=True):
        """
        Devuelve el dataframe de la serie de tiempo, o lo guarda como CSV si se proporciona un nombre de archivo.
        Crea directorios si no existen. Puede sobrescribir el archivo si overwrite=True.

        Parameters
        ----------
        filename : str, optional
            Ruta completa del archivo donde se desea guardar el CSV.
        overwrite : bool, default=True
            Si True, sobrescribe el archivo CSV si ya existe. Si False, lanza una excepción si ya existe.
        """

        if self.solucion is None:
            raise ValueError("Primero debes resolver la ecuación.")

        X = self.solucion.y[0]
        Y = self.solucion.y[1]
        Z = self.solucion.y[2]

        df = pd.DataFrame({'x': X, 'y': Y, 'z': Z})

        if filename:
            # Crear directorios si no existen
            os.makedirs(os.path.dirname(filename), exist_ok=True)

            # Si no se permite sobrescribir y el archivo ya existe, lanzar error
            if not overwrite and os.path.exists(filename):
                raise FileExistsError(f"El archivo '{filename}' ya existe y overwrite=False.")

            df.to_csv(filename, index=False)

        return df


In [None]:
# Here we decide which model is allowed to be muahahaha
for key, value in warden.items():
    if value == True:
        parent_model = key
        # Extracting parameters (and them ranges)
        params = {
            key: {"min": value[0], "max": value[1]} 
            for key, value in models[key]["params"].items()
            }
        # Parameters for each system
        systems_params_dict = sys_params_gen(params, number_of_child_systems)
        print(systems_params_dict)

        for i, ((_, v1), (_, v2)) in enumerate(zip(systems_params_dict.items(), systems_initial_dict.items())):
            # Initialize the Binder object for dynamic function import
            binder = Binder(module_name=f"systems.{parent_model}", 
                            function_name=parent_model, 
                            params=v1)

            # Import the module
            binder.import_module()

            # Prepare the function
            fixed_function = binder.fixer()

            if fixed_function:  
                # Now we have the fixed function ready, so we can pass it to Sistema
                sistema = Sistema(f=fixed_function, 
                                    y0=v2, 
                                    t=t_eval, 
                                    metodo='RK45')
                
                # Solve the system
                sistema.resolver()
                # Get the DataFrame for the solution
                ruta = os.path.join(f"{destination_path}",f"test_{test_number}",f"{parent_model}" ,f"{parent_model}_{i}.csv")
                print(f'{parent_model}_{i} has been generated \n at {ruta}')
                # Display a cutie plot
                #sistema.graficar(tipo='series', guardar=False, show_plot=True)
                # Display a cutie animation
                #sistema.atractor_animation()
                # Save dataframe in said route
                sistema.csv_or_dataframe(filename=ruta, overwrite=False)

{'system_0_params': (0.95, 0.7, 0.6, 3.5, 0.25, 0.1), 'system_1_params': (0.9515, 0.702, 0.602, 3.505, 0.2505, 0.101), 'system_2_params': (0.953, 0.704, 0.604, 3.51, 0.251, 0.102), 'system_3_params': (0.9545, 0.706, 0.606, 3.515, 0.2515, 0.103), 'system_4_params': (0.956, 0.708, 0.608, 3.52, 0.252, 0.104), 'system_5_params': (0.9575, 0.71, 0.61, 3.525, 0.2525, 0.105), 'system_6_params': (0.959, 0.712, 0.612, 3.53, 0.253, 0.106), 'system_7_params': (0.9605, 0.714, 0.614, 3.535, 0.2535, 0.107), 'system_8_params': (0.962, 0.716, 0.616, 3.54, 0.254, 0.108), 'system_9_params': (0.9635, 0.718, 0.618, 3.545, 0.2545, 0.109), 'system_10_params': (0.965, 0.72, 0.62, 3.55, 0.255, 0.11), 'system_11_params': (0.9665, 0.722, 0.622, 3.555, 0.2555, 0.111), 'system_12_params': (0.968, 0.724, 0.624, 3.56, 0.256, 0.112), 'system_13_params': (0.9695, 0.726, 0.626, 3.565, 0.2565, 0.113), 'system_14_params': (0.971, 0.728, 0.628, 3.57, 0.257, 0.114), 'system_15_params': (0.9725, 0.73, 0.63, 3.575, 0.2575, 0

# STEP 2: DATA PREPARATION

In [41]:
import os
import re
import pandas as pd

def crawl_csvs_with_ids(destination_path):
    """
    Lee todos los CSVs recursivamente en destination_path, asigna un ID único por archivo,
    reordena las columnas para poner 'id' primero, y devuelve:
    1. Un DataFrame concatenado.
    2. Un DataFrame de referencia con ['id', 'sample_name'].
    """

    all_files = []

    # Walk through files and gather full paths
    for root, _, files in os.walk(destination_path):
        for file in files:
            if file.endswith(".csv"):
                full_path = os.path.join(root, file)
                all_files.append(full_path)

    # Regex function to extract numeric suffix
    def extract_number(path):
        filename = os.path.splitext(os.path.basename(path))[0]
        match = re.search(r'(\d+)$', filename)
        return int(match.group(1)) if match else float('inf')  # inf pushes unsortables to end

    # Sort files by numeric suffix
    sorted_files = sorted(all_files, key=extract_number)

    all_data = []
    id_lookup = []

    for current_id, full_path in enumerate(sorted_files, start=1):
        try:
            df = pd.read_csv(full_path)
        except Exception as e:
            print(f"Error loading {full_path}: {e}")
            continue

        df.insert(0, 'id', current_id)  # Put 'id' as the first column
        all_data.append(df)

        file_stem = os.path.splitext(os.path.basename(full_path))[0]
        id_lookup.append({'id': current_id, 'sample_name': file_stem})

    if not all_data:
        raise ValueError(f"No se encontraron archivos CSV en {destination_path}")

    all_data_df = pd.concat(all_data, ignore_index=True)
    id_lookup_df = pd.DataFrame(id_lookup)

    return all_data_df, id_lookup_df


In [36]:
import os
import re
import pandas as pd

def crawl_csvs_with_ids(destination_path):
    """
    Lee todos los CSVs en destination_path recursivamente, ordenados naturalmente,
    les asigna un ID único, y devuelve:
    1. Un DataFrame concatenado con 'id' como primera columna.
    2. Un DataFrame de referencia con ['id', 'sample_name'].
    """

    all_files = []

    for root, _, files in os.walk(destination_path):
        for file in files:
            if file.endswith(".csv"):
                full_path = os.path.join(root, file)
                all_files.append(full_path)

    # Natural sort key: split text and numbers
    def natural_key(path):
        filename = os.path.splitext(os.path.basename(path))[0]
        return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', filename)]

    sorted_files = sorted(all_files, key=natural_key)

    all_data = []
    id_lookup = []

    for current_id, full_path in enumerate(sorted_files, start=1):
        try:
            df = pd.read_csv(full_path)
        except Exception as e:
            print(f"Error loading {full_path}: {e}")
            continue

        df.insert(0, 'id', current_id)
        all_data.append(df)

        file_stem = os.path.splitext(os.path.basename(full_path))[0]
        id_lookup.append({'id': current_id, 'sample_name': file_stem})

    if not all_data:
        raise ValueError(f"No se encontraron archivos CSV en {destination_path}")

    all_data_df = pd.concat(all_data, ignore_index=True)
    id_lookup_df = pd.DataFrame(id_lookup)

    return all_data_df, id_lookup_df



In [44]:
import os
import re
import pandas as pd

def crawl_csvs_with_ids(destination_path):
    """
    Lee todos los CSVs en destination_path recursivamente, ordenados naturalmente,
    les asigna un ID único, y devuelve:
    1. Un DataFrame concatenado con 'id' como primera columna.
    2. Un DataFrame de referencia con ['id', 'sample_name'].
    """

    all_files = []

    for root, _, files in os.walk(destination_path):
        for file in files:
            if file.endswith(".csv"):
                full_path = os.path.join(root, file)
                all_files.append(full_path)

    # Natural sort key: split text and numbers
    def natural_key(path):
        filename = os.path.splitext(os.path.basename(path))[0]
        return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', filename)]

    sorted_files = sorted(all_files, key=natural_key)

    all_data = []
    id_lookup = []

    for current_id, full_path in enumerate(sorted_files, start=1):
        try:
            df = pd.read_csv(full_path)
        except Exception as e:
            print(f"Error loading {full_path}: {e}")
            continue

        df.insert(0, 'id', current_id)
        all_data.append(df)

        file_stem = os.path.splitext(os.path.basename(full_path))[0]
        id_lookup.append({'id': current_id, 'sample_name': file_stem})

    if not all_data:
        raise ValueError(f"No se encontraron archivos CSV en {destination_path}")

    all_data_df = pd.concat(all_data, ignore_index=True)
    id_lookup_df = pd.DataFrame(id_lookup)

    return all_data_df, id_lookup_df


In [45]:
file_path = '/home/think/Desktop/TESIS/pg_runs/templates/template.json'
destination_path = "/home/think/Desktop/TESIS/pg_runs/test_2"

In [47]:
all_df, df_id= crawl_csvs_with_ids(destination_path)

In [48]:
all_df.head()

Unnamed: 0,id,x,y,z
0,1,0.1,0.1,0.1
1,1,0.095657,0.10296,0.107116
2,1,0.091243,0.105749,0.114304
3,1,0.086763,0.108366,0.121566
4,1,0.082225,0.11081,0.128902


In [49]:
df_id.head()

Unnamed: 0,id,sample_name
0,1,aizawa_0
1,2,aizawa_1
2,3,aizawa_2
3,4,aizawa_3
4,5,aizawa_4


In [50]:
df_id['sample_name']

0       aizawa_0
1       aizawa_1
2       aizawa_2
3       aizawa_3
4       aizawa_4
         ...    
495    sprott_95
496    sprott_96
497    sprott_97
498    sprott_98
499    sprott_99
Name: sample_name, Length: 500, dtype: object

# Data Treatment

In [51]:
all_df

Unnamed: 0,id,x,y,z
0,1,0.100000,0.100000,0.100000
1,1,0.095657,0.102960,0.107116
2,1,0.091243,0.105749,0.114304
3,1,0.086763,0.108366,0.121566
4,1,0.082225,0.110810,0.128902
...,...,...,...,...
4999995,500,0.697298,-0.201389,0.617136
4999996,500,0.696305,-0.202385,0.618926
4999997,500,0.695297,-0.203362,0.620717
4999998,500,0.694274,-0.204320,0.622507


In [52]:
df_id

Unnamed: 0,id,sample_name
0,1,aizawa_0
1,2,aizawa_1
2,3,aizawa_2
3,4,aizawa_3
4,5,aizawa_4
...,...,...
495,496,sprott_95
496,497,sprott_96
497,498,sprott_97
498,499,sprott_98


In [163]:
all_df.to_csv("/home/think/Desktop/TESIS/pg_runs/test_2/all_df.csv")

In [53]:
df_id.to_csv("/home/think/Desktop/TESIS/pg_runs/test_2/df_id.csv")

### Mano, esta wea me crashea tanto que tengo que importar las librerias de nuevo

In [31]:
import pandas as pd

In [32]:
all_df = pd.read_csv("/home/think/Desktop/all_df.csv")

In [33]:
all_df.drop(all_df.columns[[0]], axis=1, inplace=True)

In [34]:
all_df.head()

Unnamed: 0,id,x,y,z
0,1,0.1,0.1,0.1
1,1,0.095657,0.10296,0.107116
2,1,0.091243,0.105749,0.114304
3,1,0.086763,0.108366,0.121566
4,1,0.082225,0.11081,0.128902


In [35]:
df_id = pd.read_csv("/home/think/Desktop/df_id.csv")

In [36]:
df_id.drop(df_id.columns[[0]], axis=1, inplace=True)

In [37]:
df_id.head()

Unnamed: 0,id,sample_name
0,1,aizawa_0
1,2,aizawa_1
2,3,aizawa_2
3,4,aizawa_3
4,5,aizawa_4


In [38]:
# 1. Extract initial conditions (first row for each id)
initial_conditions_df = all_df.groupby('id').first().reset_index()

# 2. Remove those rows from the main dataframe
# We'll do it by dropping the first timestamp for each id
all_df_cleaned = all_df.groupby('id').apply(lambda group: group.iloc[1:]).reset_index(drop=True)

  all_df_cleaned = all_df.groupby('id').apply(lambda group: group.iloc[1:]).reset_index(drop=True)


In [39]:
all_df_cleaned.head()

Unnamed: 0,id,x,y,z
0,1,0.095657,0.10296,0.107116
1,1,0.091243,0.105749,0.114304
2,1,0.086763,0.108366,0.121566
3,1,0.082225,0.11081,0.128902
4,1,0.077634,0.113081,0.136312


In [40]:
initial_conditions_df.head()

Unnamed: 0,id,x,y,z
0,1,0.1,0.1,0.1
1,2,0.109,0.109,0.109
2,3,0.118,0.118,0.118
3,4,0.127,0.127,0.127
4,5,0.136,0.136,0.136


In [41]:
# Optionally, rename x/y/z to ic_x/ic_y/ic_z for clarity
initial_conditions_df = initial_conditions_df.rename(
    columns={'x': 'ic_x', 'y': 'ic_y', 'z': 'ic_z'}
)

In [42]:
initial_conditions_df.head()

Unnamed: 0,id,ic_x,ic_y,ic_z
0,1,0.1,0.1,0.1
1,2,0.109,0.109,0.109
2,3,0.118,0.118,0.118
3,4,0.127,0.127,0.127
4,5,0.136,0.136,0.136


In [43]:
# Merge based on 'id'
df_lookup = pd.merge(
    df_id,
    initial_conditions_df[['id', 'ic_x', 'ic_y', 'ic_z']],
    on='id',
    how='inner'  # or 'left' if you want all df_id even if missing ICs
)

In [44]:
df_lookup

Unnamed: 0,id,sample_name,ic_x,ic_y,ic_z
0,1,aizawa_0,0.100,0.100,0.100
1,2,aizawa_1,0.109,0.109,0.109
2,3,aizawa_2,0.118,0.118,0.118
3,4,aizawa_3,0.127,0.127,0.127
4,5,aizawa_4,0.136,0.136,0.136
...,...,...,...,...,...
495,496,sprott_95,0.955,0.955,0.955
496,497,sprott_96,0.964,0.964,0.964
497,498,sprott_97,0.973,0.973,0.973
498,499,sprott_98,0.982,0.982,0.982


In [45]:
all_df_cleaned

Unnamed: 0,id,x,y,z
0,1,0.095657,0.102960,0.107116
1,1,0.091243,0.105749,0.114304
2,1,0.086763,0.108366,0.121566
3,1,0.082225,0.110810,0.128902
4,1,0.077634,0.113081,0.136312
...,...,...,...,...
4999495,500,0.697298,-0.201389,0.617136
4999496,500,0.696305,-0.202385,0.618926
4999497,500,0.695297,-0.203362,0.620717
4999498,500,0.694274,-0.204320,0.622507


In [46]:
import pandas as pd

def process_samples(df_lookup, all_df, select_ids=None, variables=None, calc_funcs=None):
    """
    Combines df_lookup and all_df_cleaned, and applies calculations to selected samples and variables.

    Parameters
    ----------
    df_lookup : pd.DataFrame
        Contains ['id', 'sample_name', 'ic_x', 'ic_y', 'ic_z', ...].
    all_df : pd.DataFrame
        Contains cleaned time series with columns ['id', 'x', 'y', 'z'].
    select_ids : list[int] or None
        If provided, only process these sample IDs. If None, process all.
    variables : list[str] or None
        Which variables ('x', 'y', 'z') to include. None means all present in all_df.
    calc_funcs : dict or None
        Mapping: {'func_name': func}, where func(series) returns a scalar.

    Returns
    -------
    result_df : pd.DataFrame
        Each row: id, sample_name, initial conditions, and calculated columns.
    """

    # Default values
    if select_ids is None:
        select_ids = df_lookup['id'].tolist()
    if variables is None:
        variables = [c for c in ['x', 'y', 'z'] if c in all_df.columns]
    if calc_funcs is None:
        calc_funcs = {
            'mean': pd.Series.mean,
            'std': pd.Series.std,
            'max': pd.Series.max,
            'min': pd.Series.min
        }

    records = []

    for sid in select_ids:
        lookup_row = df_lookup.loc[df_lookup['id'] == sid]
        if lookup_row.empty:
            print(f"⚠️ id {sid} not found in df_lookup — skipping.")
            continue

        entry = lookup_row.iloc[0].to_dict()
        ts = all_df[all_df['id'] == sid]

        if ts.empty:
            print(f"⚠️ id {sid} has no time series data — skipping.")
            continue

        for var in variables:
            if var not in ts.columns:
                print(f"⚠️ Column {var} not in time series — skipping for id {sid}.")
                continue
            series = ts[var].dropna()

            for func_name, func in calc_funcs.items():
                col = f"{var}_{func_name}"
                entry[col] = func(series)

        records.append(entry)

    result_df = pd.DataFrame(records)
    return result_df


In [47]:
ids = [1, 5, 42]
variables = ['x']
df_res = process_samples(df_lookup, all_df_cleaned, select_ids=ids, variables = variables)

In [52]:
import nolds

funcs = {
    'lyap': lambda s: nolds.lyap_r(s.values),
    'corr_dim': lambda s: nolds.corr_dim(s.values, emb_dim=10),
    'ap_entropy': lambda s: nolds.sampen(s.values),
}
ids = [1, 5, 42]

df_res = process_samples(df_lookup, 
                         all_df_cleaned,
                         select_ids=ids,
                         variables=['x','y','z'], 
                         calc_funcs=funcs)

In [53]:
df_res

Unnamed: 0,id,sample_name,ic_x,ic_y,ic_z,x_lyap,x_corr_dim,x_ap_entropy,y_lyap,y_corr_dim,y_ap_entropy,z_lyap,z_corr_dim,z_ap_entropy
0,1,aizawa_0,0.1,0.1,0.1,0.000676,1.31414,0.067119,0.000575,1.359338,0.068571,0.00076,0.887677,0.016531
1,5,aizawa_4,0.136,0.136,0.136,0.000679,1.582371,0.088806,0.000601,1.544334,0.088104,0.000613,1.114768,0.020834
2,42,aizawa_41,0.469,0.469,0.469,0.000318,1.464312,0.117789,0.000363,1.455167,0.122042,0.000638,1.128703,0.02961


### ?


In [None]:
import nolds
import pandas as pd

def extract_chaos_features(series):
    """Extract chaos features from a 1D time series."""
    return {
        'lyap': nolds.lyap_r(series),
        'corr_dim': nolds.corr_dim(series, emb_dim=10),
        'ap_entropy': nolds.sampen(series),
    }

def compute_features_by_id(df_all):
    """
    Agrupa por 'id' y calcula las features para cada grupo.

    Parameters
    ----------
    df_all : pd.DataFrame
        DataFramids = [1, 5, 42]
df_res = process_samples(df_lookup, all_df_cleaned, select_ids=ids)
e con columnas ['id', 'x', 'y', 'z']

    Returns
    -------
    pd.DataFrame
        DataFrame con columnas ['id', 'lyap', 'corr_dim', 'ap_entropy']
    """

    feature_rows = []

    for id_value, group in df_all.groupby("id"):
        try:
            x_series = group["x"].values
            features = extract_chaos_features(x_series)
            features["id"] = id_value
            feature_rows.append(features)
        except Exception as e:
            print(f"Error calculating features for id {id_value}: {e}")

    return pd.DataFrame(feature_rows)


In [5]:
# Assuming you already have:
# df_all, df_lookup = crawl_csvs_with_ids(destination_path)

features_df = compute_features_by_id(all_df)
features_df = features_df.sort_values("id").reset_index(drop=True)

print(features_df.head())


  """
  """


KeyboardInterrupt: 

In [None]:
features_df.to_csv('/home/think/Desktop/TESIS/csv_files/pg_2_chaotic_features')

# STEP 4: VISUALIZATION

In [None]:
from matplotlib.colors import ListedColormap

my_colors = ['red', 'blue', 'green', 'purple', 'orange','black','cyan']
my_cmap = ListedColormap(my_colors)

In [None]:
from sklearn.manifold import TSNE
from sklearn.preprocessing import LabelEncoder
import umap.umap_ as umap


def higher_visualization(df, cmap):
    # Separate labels and features
    y_raw = df.iloc[:, 0].values  # labels (first column)
    X = df.iloc[:, 1:].values     # features (rest columns)

    # Convert string labels to integers
    le = LabelEncoder()
    y = le.fit_transform(y_raw)

    # Prepare figure with 1 row and 3 subplots
    fig, axs = plt.subplots(1, 3, figsize=(18, 5))

    # PCA
    X_pca = PCA(n_components=2).fit_transform(X)
    axs[0].scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap=cmap)
    axs[0].set_title("PCA")

    # t-SNE
    X_tsne = TSNE(n_components=2, perplexity=5, random_state=0).fit_transform(X)
    axs[1].scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap=cmap)
    axs[1].set_title("t-SNE")

    # UMAP
    X_umap = umap.UMAP(random_state=0).fit_transform(X)
    axs[2].scatter(X_umap[:, 0], X_umap[:, 1], c=y, cmap=cmap)
    axs[2].set_title("UMAP")

    plt.tight_layout()
    plt.show()