<h2 style="font-size:24pt"> Proyecto DESI</h2>

<h2 style="font-size:24pt"> Julio 11, 2025</h2>

<p style="font-size:16pt">
ID connections of each ID according to the triangulation

In [1]:
import numpy as np
import matplotlib.tri as mtri
from scipy.spatial import Delaunay
import matplotlib.pyplot as plt
from astropy.table import Table
import pandas as pd
import networkx as nx
import scipy
import seaborn as sns
from scipy.stats import norm
from mpl_toolkits.mplot3d import Axes3D 
from sklearn.decomposition import PCA
from functools import reduce
import glob
from concurrent.futures import ThreadPoolExecutor, as_completed
import re
from astropy.io import ascii
from itertools import combinations
from concurrent.futures import ProcessPoolExecutor
from functools import partial

In [2]:
%%time
rosettes = list(range(20))
data = {}

for number_rosette in range(20):
    file = f"data_rosette/LRG_{number_rosette}_clustering_data.ecsv"
    table = Table.read(file, format="ascii.ecsv") 
    subset = table[['TARGETID','RA', 'DEC', 'Z','x','y','z']].to_pandas()
    data[f"data_{number_rosette}"] = subset
    data[f"data_{number_rosette}"]['type'] = 'data'

CPU times: total: 8.12 s
Wall time: 8.7 s


In [None]:
def compute_r(df, verbose=False, return_as_string=True):
    coords = df[['x', 'y', 'z']].values
    ids = df['TARGETID'].values
    types = df['type'].values
    is_data = (types == 'data')

    tri = Delaunay(coords)

    neighbors = {i: set() for i in range(len(coords))}
    for simplex in tri.simplices:
        for i, j in combinations(simplex, 2):
            neighbors[i].add(j)
            neighbors[j].add(i)

    r = np.zeros(len(coords), dtype=float)
    neighbor_ids_data = []
    neighbor_ids_rand = []

    for i, nbrs in neighbors.items():
        nbrs = list(nbrs)

        n_data = int(np.sum(is_data[nbrs]))
        n_rand = len(nbrs) - n_data

        if (n_data + n_rand) > 0:
            r[i] = (n_data - n_rand) / (n_data + n_rand)
        else:
            raise ValueError(f'No neighbors for point {i} en la triangulación.')

        data_ids = ids[nbrs][is_data[nbrs]]
        rand_ids = ids[nbrs][~is_data[nbrs]]

        if return_as_string:
            neighbor_ids_data.append(','.join(map(str, data_ids)))
            neighbor_ids_rand.append(','.join(map(str, rand_ids)))
        else:
            neighbor_ids_data.append(data_ids.tolist())
            neighbor_ids_rand.append(rand_ids.tolist())

        if verbose:
            print(f"TID {ids[i]} → data: {data_ids}, rand: {rand_ids}")

    out = df.copy()
    out['r'] = r
    out['neighbor_ids_data'] = neighbor_ids_data
    out['neighbor_ids_rand'] = neighbor_ids_rand
    
    return out

In [None]:
def parameter_r(data_all):
    df_tri = data_all[['x', 'y', 'z']].values
    tri = Delaunay(df_tri)

    G = nx.Graph()
    ids = data_all['TARGETID'].values
    types = data_all['type'].values

    for coords, tipo, id_ in zip(df_tri, types, ids):
        G.add_node(id_, pos=tuple(coords), type=tipo)

    G.add_edges_from(
        (ids[simplex[i]], ids[simplex[j]])
        for simplex in tri.simplices
        for i in range(3)
        for j in range(i + 1, 4)
    )

    degree_dict = dict(G.degree())
    data_all['degree'] = data_all['TARGETID'].map(degree_dict)

    n_data_dict = {}
    n_random_dict = {}
    data_neighbors_dict = {}
    rand_neighbors_dict = {}

    for node in G.nodes:
        neighbors = list(G.neighbors(node))
        neighbor_types = [G.nodes[n]['type'] for n in neighbors]

        data_ids = [n for n in neighbors if G.nodes[n]['type'] == 'data']
        rand_ids = [n for n in neighbors if G.nodes[n]['type'] == 'rand']

        n_data_dict[node] = len(data_ids)
        n_random_dict[node] = len(rand_ids)

        data_neighbors_dict[node] = ",".join(map(str, data_ids))
        rand_neighbors_dict[node] = ",".join(map(str, rand_ids))

    data_all['N_data'] = data_all['TARGETID'].map(n_data_dict)
    data_all['N_random'] = data_all['TARGETID'].map(n_random_dict)
    data_all['neighbor_ids_data'] = data_all['TARGETID'].map(data_neighbors_dict)
    data_all['neighbor_ids_rand'] = data_all['TARGETID'].map(rand_neighbors_dict)

    total = data_all['N_data'] + data_all['N_random']
    with np.errstate(divide='ignore', invalid='ignore'):
        r = np.where(total > 0, (data_all['N_data'] - data_all['N_random']) / total, 0)

    data_all['r'] = r

    return data_all

In [5]:
def classification(data,number_rand):

    data.loc[(data['r'] >= -1.0) & (data['r'] <= -0.9), f'class_{number_rand}'] = 'void'
    data.loc[(data['r'] >  -0.9) & (data['r'] <=  0.0), f'class_{number_rand}'] = 'sheet'
    data.loc[(data['r'] >   0.0) & (data['r'] <=  0.9), f'class_{number_rand}'] = 'filament'
    data.loc[(data['r'] >   0.9) & (data['r'] <=  1.0), f'class_{number_rand}'] = 'knot'

    data.sort_values('z', inplace=True)

    return data

In [10]:
%%time
rand = {}
rand_list = {}
df_merge = {}
df_entropy = {}
number_random = 100

for i in range(20):
    rand_list[i] = []
    print(f'Rosette {i}')
    for j in range(number_random):

        #Read files
        file = f'rand_rosette/LRG_rosette_{i}_random_{j}.ecsv'
        table = Table.read(file, format="ascii.ecsv") 
        subset = table[['TARGETID', 'RA', 'DEC', 'Z', 'x', 'y', 'z']].to_pandas()
        subset['type'] = 'rand'

        #Concat real and random data
        df_concat = pd.concat([subset, data[f'data_{i}'].copy().assign(type='data')], ignore_index=True)

        #Parameter r and classification
        #data_with_r = parameter_r(df_concat)
        data_with_r = compute_r(df_concat)
        #print(data_with_r)
        data_with_class = classification(data_with_r, j)
        #print(data_with_class)
        
        rand_list[i].append(data_with_class)
        
        #Neighbor connections
        table_connection = Table.from_pandas(data_with_class)
        filename_connection = f"connections_rosette/LRG_connection_rosette_{i}_rand_{j}.ecsv"
        ascii.write(table_connection, filename_connection, format='ecsv', overwrite=True)

Rosette 0
Rosette 1
Rosette 2
Rosette 3
Rosette 4
Rosette 5


ValueError: unable to convert data to Column for Table