<style>
body {
    font-size: 20pt !important;
}

.rendered_html {
    font-size: 20pt !important;
}

.CodeMirror pre {
    font-size: 20pt !important;
}

.output pre {
    font-size: 20pt !important;
}
</style>


<h2 style="font-size:24pt"> Proyecto DESI</h2>

<h2 style="font-size:24pt"> Julio 14, 2025</h2>

<p style="font-size:16pt">
Calculation of the count fraction for 100 random

In [1]:
import numpy as np
import matplotlib.tri as mtri
from scipy.spatial import Delaunay
import matplotlib.pyplot as plt
from astropy.table import Table
from astropy.io import ascii
import pandas as pd
import networkx as nx
import scipy
import seaborn as sns
from scipy.stats import norm
from mpl_toolkits.mplot3d import Axes3D 
from sklearn.decomposition import PCA

## Data

In [2]:
data = {}

for i in range(20):
    file = f"data_rosette/LRG_{i}_clustering_data.ecsv"
    table = Table.read(file, format="ascii.ecsv")
    data[f'data_{i}'] = table

In [3]:
def parameter_r(data_all):
    df_tri = data_all[['x', 'y', 'z']].values
    tri = Delaunay(df_tri)

    G = nx.Graph()
    ids = data_all['TARGETID'].values
    types = data_all['type'].values

    G.add_nodes_from(
        (id_, {"pos": tuple(coord), "type": tipo})
        for id_, coord, tipo in zip(ids, df_tri, types)
    )

    G.add_edges_from(
        (ids[simplex[i]], ids[simplex[j]])
        for simplex in tri.simplices
        for i in range(3)
        for j in range(i + 1, 4)
    )

    degree_dict = dict(G.degree())
    data_all['degree'] = data_all['TARGETID'].map(degree_dict)

    node_types = nx.get_node_attributes(G, 'type')

    n_data_dict = {}
    n_random_dict = {}
    data_neighbors_dict = {}
    rand_neighbors_dict = {}

    for node in G.nodes:
        neighbors = list(G.neighbors(node))

        data_ids = [n for n in neighbors if node_types[n] == 'data']
        rand_ids = [n for n in neighbors if node_types[n] == 'rand']

        n_data_dict[node] = len(data_ids)
        n_random_dict[node] = len(rand_ids)
        data_neighbors_dict[node] = ",".join(map(str, data_ids))
        rand_neighbors_dict[node] = ",".join(map(str, rand_ids))

    data_all['N_data'] = data_all['TARGETID'].map(n_data_dict)
    data_all['N_random'] = data_all['TARGETID'].map(n_random_dict)
    data_all['neighbor_ids_data'] = data_all['TARGETID'].map(data_neighbors_dict)
    data_all['neighbor_ids_rand'] = data_all['TARGETID'].map(rand_neighbors_dict)

    total = data_all['N_data'] + data_all['N_random']
    with np.errstate(divide='ignore', invalid='ignore'):
        r = np.where(total > 0, (data_all['N_data'] - data_all['N_random']) / total, 0)

    data_all['r'] = r

    return data_all


In [4]:
def classification(data):
    data['classification'] = 'unclassified'

    data.loc[(data['r'] >= -1.0) & (data['r'] <= -0.9), 'classification'] = 'void'
    data.loc[(data['r'] >  -0.9) & (data['r'] <=  0.0), 'classification'] = 'sheet'
    data.loc[(data['r'] >   0.0) & (data['r'] <=  0.9), 'classification'] = 'filament'
    data.loc[(data['r'] >   0.9) & (data['r'] <=  1.0), 'classification'] = 'knot'

    return data

In [5]:
def mask_classification(data):
    void     = data[data['classification'] == 'void']
    sheet    = data[data['classification'] == 'sheet']
    filament = data[data['classification'] == 'filament']
    knot     = data[data['classification'] == 'knot']

    return (void,sheet,filament,knot)

In [6]:
def count_fraction(data,type_data):
    
    data_filtered = data[data['type'] == type_data]
    parts = mask_classification(data_filtered)
    void, sheet, filament, knot = parts
            
    void_perc = len(void) / len (data_filtered) * 100    
    sheet_perc = len(sheet) / len (data_filtered) * 100 
    filament_perc = len(filament) / len (data_filtered) * 100 
    knot_perc = len(knot) / len (data_filtered) * 100 

    return void_perc,sheet_perc,filament_perc,knot_perc

In [7]:
from pathlib import Path

number_random = 100
num_rosettes = 20
base_path = Path("rand_rosette")

mean_values_data = {}
std_values_data = {}
mean_values_rand = {}
std_values_rand = {}

for i in range(num_rosettes):
    print(f'Rosette {i}')
    
    perc_data_list = []
    perc_rand_list = []

    df_data = data[f'data_{i}'][['TARGETID', 'RA', 'DEC', 'Z', 'x', 'y', 'z']].to_pandas().copy()
    df_data['type'] = 'data'

    for j in range(number_random):

        file = base_path / f"LRG_rosette_{i}_random_{j}.ecsv"
        subset = Table.read(file, format="ascii.ecsv")[['TARGETID', 'RA', 'DEC', 'Z', 'x', 'y', 'z']].to_pandas()
        subset['type'] = 'rand'

        df_concat = pd.concat([subset, df_data], ignore_index=True)

        data_with_class = classification(parameter_r(df_concat))

        void_d, sheet_d, fil_d, knot_d = count_fraction(data_with_class, 'data')
        void_r, sheet_r, fil_r, knot_r = count_fraction(data_with_class, 'rand')

        perc_data_list.append([void_d, sheet_d, fil_d, knot_d])
        perc_rand_list.append([void_r, sheet_r, fil_r, knot_r])

    perc_data_arr = np.array(perc_data_list)
    perc_rand_arr = np.array(perc_rand_list)

    mean_values_data[i] = perc_data_arr.mean(axis=0)
    std_values_data[i]  = perc_data_arr.std(axis=0)

    mean_values_rand[i] = perc_rand_arr.mean(axis=0)
    std_values_rand[i]  = perc_rand_arr.std(axis=0)


Rosette 0
Rosette 1
Rosette 2
Rosette 3
Rosette 4
Rosette 5
Rosette 6
Rosette 7
Rosette 8
Rosette 9
Rosette 10
Rosette 11
Rosette 12
Rosette 13
Rosette 14
Rosette 15
Rosette 16
Rosette 17
Rosette 18
Rosette 19


In [None]:
columnas = ["Void (%)", "Sheet (%)", "Filament (%)", "Knot (%)"]

filas = []
indices = []

for i in mean_values_data.keys():
    mean = mean_values_data[i]
    std  = std_values_data[i]
    fila = [f"{m:.2f} ± {s:.2f}" for m, s in zip(mean, std)]
    filas.append(fila)
    indices.append(f"Rosette {i}")

tabla_final = pd.DataFrame(filas, columns=columnas, index=indices)

print("===== DATOS REALES =====")
print(tabla_final)

===== DATOS REALES =====
               Void (%)     Sheet (%)  Filament (%)     Knot (%)
Rosette 0   0.18 ± 0.06  41.54 ± 0.56  56.03 ± 0.59  2.24 ± 0.26
Rosette 1   0.16 ± 0.04  44.21 ± 0.60  53.96 ± 0.62  1.68 ± 0.18
Rosette 2   0.26 ± 0.06  41.40 ± 0.60  56.67 ± 0.62  1.66 ± 0.18
Rosette 3   0.17 ± 0.05  41.99 ± 0.62  56.49 ± 0.63  1.35 ± 0.15
Rosette 4   0.18 ± 0.05  42.44 ± 0.64  55.70 ± 0.64  1.69 ± 0.17
Rosette 5   0.20 ± 0.05  43.06 ± 0.60  55.28 ± 0.61  1.46 ± 0.18
Rosette 6   0.18 ± 0.05  43.49 ± 0.55  55.13 ± 0.56  1.20 ± 0.15
Rosette 7   0.19 ± 0.05  43.45 ± 0.60  54.65 ± 0.60  1.70 ± 0.18
Rosette 8   0.17 ± 0.05  43.18 ± 0.62  54.90 ± 0.63  1.76 ± 0.17
Rosette 9   0.21 ± 0.05  42.20 ± 0.54  55.84 ± 0.57  1.75 ± 0.18
Rosette 10  0.20 ± 0.05  42.36 ± 0.59  55.58 ± 0.61  1.87 ± 0.22
Rosette 11  0.16 ± 0.05  43.22 ± 0.55  54.94 ± 0.58  1.68 ± 0.21
Rosette 12  0.18 ± 0.04  42.30 ± 0.52  55.93 ± 0.55  1.59 ± 0.22
Rosette 13  0.17 ± 0.06  44.60 ± 0.62  53.85 ± 0.61  1.39 ± 0.20


In [None]:
columnas = ["Void (%)", "Sheet (%)", "Filament (%)", "Knot (%)"]

filas = []
indices = []

for i in mean_values_rand.keys():
    mean = mean_values_rand[i]
    std  = std_values_rand[i]
    fila = [f"{m:.2f} ± {s:.2f}" for m, s in zip(mean, std)]
    filas.append(fila)
    indices.append(f"Rosette {i}")

tabla_final = pd.DataFrame(filas, columns=columnas, index=indices)

print("===== DATOS RANDOM =====")
print(tabla_final)

===== DATOS RANDOM =====
               Void (%)     Sheet (%)  Filament (%)     Knot (%)
Rosette 0   1.66 ± 0.18  73.41 ± 0.48  24.81 ± 0.48  0.11 ± 0.04
Rosette 1   1.21 ± 0.14  72.68 ± 0.46  26.03 ± 0.45  0.09 ± 0.04
Rosette 2   1.72 ± 0.17  72.29 ± 0.54  25.89 ± 0.51  0.10 ± 0.04
Rosette 3   1.99 ± 0.22  71.09 ± 0.63  26.83 ± 0.55  0.09 ± 0.04
Rosette 4   1.78 ± 0.20  72.50 ± 0.57  25.63 ± 0.49  0.10 ± 0.05
Rosette 5   2.10 ± 0.19  71.40 ± 0.51  26.42 ± 0.51  0.08 ± 0.04
Rosette 6   1.22 ± 0.16  72.57 ± 0.47  26.13 ± 0.44  0.09 ± 0.04
Rosette 7   1.16 ± 0.16  72.96 ± 0.48  25.79 ± 0.47  0.10 ± 0.04
Rosette 8   1.54 ± 0.18  73.49 ± 0.50  24.88 ± 0.44  0.10 ± 0.04
Rosette 9   1.74 ± 0.18  72.74 ± 0.45  25.43 ± 0.41  0.10 ± 0.03
Rosette 10  1.56 ± 0.19  73.40 ± 0.50  24.93 ± 0.46  0.11 ± 0.04
Rosette 11  1.41 ± 0.17  72.89 ± 0.56  25.59 ± 0.50  0.10 ± 0.04
Rosette 12  1.74 ± 0.18  72.58 ± 0.55  25.57 ± 0.50  0.10 ± 0.04
Rosette 13  1.28 ± 0.17  72.70 ± 0.48  25.94 ± 0.46  0.08 ± 0.03


In [23]:
all_means = np.vstack(list(mean_values_data.values()))  # stack de todos los arrays

global_mean = all_means.mean(axis=0)
global_std = all_means.std(axis=0)

columnas = ["Void (%)", "Sheet (%)", "Filament (%)", "Knot (%)"]
print('Promedio Datos reales')
for col, m, s in zip(columnas, global_mean, global_std):
    print(f"{col}: {m:.2f} ± {s:.2f}")

Promedio Datos reales
Void (%): 0.19 ± 0.03
Sheet (%): 42.99 ± 0.92
Filament (%): 55.24 ± 0.85
Knot (%): 1.58 ± 0.26


In [26]:
all_means = np.vstack(list(mean_values_rand.values()))  # stack de todos los arrays

global_mean = all_means.mean(axis=0)
global_std = all_means.std(axis=0)

columnas = ["Void (%)", "Sheet (%)", "Filament (%)", "Knot (%)"]
print('Promedio Datos random')
for col, m, s in zip(columnas, global_mean, global_std):
    print(f"{col}: {m:.2f} ± {s:.2f}")

Promedio Datos random
Void (%): 1.52 ± 0.26
Sheet (%): 72.62 ± 0.69
Filament (%): 25.77 ± 0.59
Knot (%): 0.09 ± 0.01
