<h2 style="font-size:24pt"> Proyecto DESI</h2>

<h2 style="font-size:24pt"> Julio 11, 2025</h2>

<p style="font-size:16pt">
Calculation of the entropy

In [57]:
import numpy as np
import matplotlib.tri as mtri
from scipy.spatial import Delaunay
import matplotlib.pyplot as plt
from astropy.table import Table
import pandas as pd
import networkx as nx
import scipy
import seaborn as sns
from scipy.stats import norm
from mpl_toolkits.mplot3d import Axes3D 
from sklearn.decomposition import PCA
from functools import reduce
import glob
from concurrent.futures import ThreadPoolExecutor, as_completed
import re
from astropy.io import ascii

In [11]:
%%time
rosettes = list(range(20))
data = {}

for number_rosette in range(20):
    file = f"data_rosette/LRG_{number_rosette}_clustering_data.ecsv"
    table = Table.read(file, format="ascii.ecsv") 
    subset = table[['TARGETID','RA', 'DEC', 'Z','x','y','z']].to_pandas()
    data[f"data_{number_rosette}"] = subset
    data[f"data_{number_rosette}"]['type'] = 'data'

CPU times: total: 7.38 s
Wall time: 7.86 s


In [9]:
def parameter_r(data_all):
    df_tri = data_all[['x', 'y', 'z']].values
    tri = Delaunay(df_tri)

    # Crear grafo y agregar nodos con atributo 'type' directamente
    G = nx.Graph()
    types = data_all['type'].values
    for idx, (coords, tipo) in enumerate(zip(df_tri, types)):
        G.add_node(idx, pos=tuple(coords), type=tipo)

    # Agregar aristas entre vértices de cada simplex (4 nodos → 6 combinaciones únicas)
    G.add_edges_from(
        (simplex[i], simplex[j])
        for simplex in tri.simplices
        for i in range(3)
        for j in range(i + 1, 4)
    )

    # Grado de cada nodo
    degree_dict = dict(G.degree())
    data_all['degree'] = data_all.index.map(degree_dict)

    # Conteo de conexiones por tipo usando numpy arrays
    n_data = np.zeros(len(data_all), dtype=int)
    n_random = np.zeros(len(data_all), dtype=int)

    for node in G.nodes:
        neighbors = list(G.neighbors(node))
        neighbor_types = [G.nodes[n]['type'] for n in neighbors]
        n_data[node] = neighbor_types.count('data')
        n_random[node] = neighbor_types.count('rand')

    data_all['N_data'] = n_data
    data_all['N_random'] = n_random

    # Calcular r de forma vectorizada, evitando división por cero
    total = n_data + n_random
    with np.errstate(divide='ignore', invalid='ignore'):
        r = np.where(total > 0, (n_data - n_random) / total, 0)
    data_all['r'] = r

    return data_all

In [29]:
def classification(data,number_rand):

    data.loc[(data['r'] >= -1.0) & (data['r'] <= -0.9), f'class_{number_rand}'] = 'void'
    data.loc[(data['r'] >  -0.9) & (data['r'] <=  0.0), f'class_{number_rand}'] = 'sheet'
    data.loc[(data['r'] >   0.0) & (data['r'] <=  0.9), f'class_{number_rand}'] = 'filament'
    data.loc[(data['r'] >   0.9) & (data['r'] <=  1.0), f'class_{number_rand}'] = 'knot'

    data.sort_values('z', inplace=True)

    return data

In [51]:
def entropy(data):
    count = data[data.columns].apply(lambda row: row.value_counts(), axis=1)
    probabilidades = count.div(number_random)
    sum_entropy = probabilidades.apply(lambda row: np.sum(row[row > 0] * np.log2(row[row > 0])), axis=1)
    entropy = -(1/np.log2(4))*sum_entropy
    entropy = np.abs(entropy)
    return entropy,probabilidades

In [61]:
%%time
rand = {}
rand_list = {}
df_merge = {}
df_entropy = {}
number_random = 100

for i in range(20):
    rand_list[i] = []
    
    for j in range(number_random):
        print(f"data_{i}_rand_{j}")

        #Read files
        file = f'rand_rosette/LRG_rosette_{i}_random_{j}.ecsv'
        table = Table.read(file, format="ascii.ecsv") 
        subset = table[['TARGETID', 'RA', 'DEC', 'Z', 'x', 'y', 'z']].to_pandas()
        subset['type'] = 'rand'

        #Concat real and random data
        df_concat = pd.concat([subset, data[f'data_{i}'].copy().assign(type='data')], ignore_index=True)

        #Parameter r and classification
        data_with_r = parameter_r(df_concat)
        data_with_class = classification(data_with_r, j)
        
        rand_list[i].append(data_with_class)

    #Merge of the same rossete
    df_merge[i] = data[f'data_{i}'][['TARGETID']].copy()
    for j in range(number_random):
        df_j = rand_list[i][j][['TARGETID', f'class_{j}']]
        df_merge[i] = df_merge[i].merge(df_j, on='TARGETID', how='left')

    #Entropy
    reset = entropy(df_merge[i].set_index('TARGETID'))
    reset_entropy = reset[0].rename('entropy').reset_index()
    df_merge[i] = df_merge[i].merge(reset_entropy, on='TARGETID', how='left')
    df_entropy[i] = df_merge[i][['TARGETID','entropy']].copy()

    #Save file of entropy
    table_entropy = Table.from_pandas(df_entropy[i])
    filename = f"entropy_rosette/LRG_entropy_rosette_{i}.ecsv"
    ascii.write(table_entropy, filename, format='ecsv', overwrite=True)

data_0_rand_0
data_0_rand_1
data_0_rand_2
data_0_rand_3
data_0_rand_4
data_0_rand_5
data_0_rand_6
data_0_rand_7
data_0_rand_8
data_0_rand_9
data_0_rand_10
data_0_rand_11
data_0_rand_12
data_0_rand_13
data_0_rand_14
data_0_rand_15
data_0_rand_16
data_0_rand_17
data_0_rand_18
data_0_rand_19
data_0_rand_20
data_0_rand_21
data_0_rand_22
data_0_rand_23
data_0_rand_24
data_0_rand_25
data_0_rand_26
data_0_rand_27
data_0_rand_28
data_0_rand_29
data_0_rand_30
data_0_rand_31
data_0_rand_32
data_0_rand_33
data_0_rand_34
data_0_rand_35
data_0_rand_36
data_0_rand_37
data_0_rand_38
data_0_rand_39
data_0_rand_40
data_0_rand_41
data_0_rand_42
data_0_rand_43
data_0_rand_44
data_0_rand_45
data_0_rand_46
data_0_rand_47
data_0_rand_48
data_0_rand_49
data_0_rand_50
data_0_rand_51
data_0_rand_52
data_0_rand_53
data_0_rand_54
data_0_rand_55
data_0_rand_56
data_0_rand_57
data_0_rand_58
data_0_rand_59
data_0_rand_60
data_0_rand_61
data_0_rand_62
data_0_rand_63
data_0_rand_64
data_0_rand_65
data_0_rand_66
data_