In [5]:
import numpy as np
import pandas as pd
import pyvista as pv
import duckdb

In [6]:
input_df = pd.read_csv("data_storage/12/12_2.csv")
input_df

Unnamed: 0,gal_tag,fof_halo_tag,gal_com_x,gal_com_y,gal_com_z,gal_mass,gal_count_star,gal_count_gas,gal_count_dm,gal_radius,simulation,time_step,object_type
0,246113781.0,251375070.0,120.370415,62.416222,61.06839,2849811000000.0,773.0,58.0,2381.0,0.042319,0,498,galaxyproperties
1,258198961.0,251375070.0,120.4832,62.552166,61.679855,825659200000.0,262.0,17.0,686.0,0.042319,0,498,galaxyproperties
2,260313583.0,251375070.0,119.522667,62.514469,61.27454,753617100000.0,202.0,16.0,633.0,0.042319,0,498,galaxyproperties
3,231475645.0,251375070.0,121.796036,62.378002,59.835892,700809100000.0,224.0,0.0,586.0,0.042319,0,498,galaxyproperties
4,260826585.0,251375070.0,119.717148,62.368248,61.303833,684652800000.0,181.0,35.0,571.0,0.042319,0,498,galaxyproperties
5,241390095.0,251375070.0,119.82682,62.436985,61.462391,665783300000.0,173.0,23.0,559.0,0.042319,0,498,galaxyproperties
6,263456285.0,251375070.0,121.962769,62.680302,60.777687,623511500000.0,147.0,9.0,527.0,0.042319,0,498,galaxyproperties
7,8111575.0,251375070.0,121.912338,63.46817,60.885403,555464900000.0,205.0,2.0,459.0,0.042319,0,498,galaxyproperties
8,266061361.0,251375070.0,121.958061,62.573231,60.939518,502596300000.0,129.0,5.0,424.0,0.042319,0,498,galaxyproperties
9,227230231.0,251375070.0,121.661934,62.963623,61.560192,430645300000.0,150.0,2.0,358.0,0.042319,0,498,galaxyproperties


In [7]:
# Filter for galaxyproperties rows
galaxy_df = input_df[input_df['object_type'] == 'galaxyproperties']

# Sort by galaxy mass in descending order and select top 10
top_10_galaxies = galaxy_df.sort_values('gal_mass', ascending=False).head(10)

# Calculate additional properties
top_10_galaxies['star_fraction'] = top_10_galaxies['gal_count_star'] / (top_10_galaxies['gal_count_star'] + top_10_galaxies['gal_count_gas'] + top_10_galaxies['gal_count_dm'])
top_10_galaxies['gas_fraction'] = top_10_galaxies['gal_count_gas'] / (top_10_galaxies['gal_count_star'] + top_10_galaxies['gal_count_gas'] + top_10_galaxies['gal_count_dm'])
top_10_galaxies['dm_fraction'] = top_10_galaxies['gal_count_dm'] / (top_10_galaxies['gal_count_star'] + top_10_galaxies['gal_count_gas'] + top_10_galaxies['gal_count_dm'])

# Calculate distance from halo center
top_10_galaxies['distance_from_halo_center'] = np.sqrt(
    (top_10_galaxies['gal_com_x'] - top_10_galaxies['sod_halo_center_x'])**2 +
    (top_10_galaxies['gal_com_y'] - top_10_galaxies['sod_halo_center_y'])**2 +
    (top_10_galaxies['gal_com_z'] - top_10_galaxies['sod_halo_center_z'])**2
)

# Select relevant columns for the result
result = top_10_galaxies[['gal_tag', 'gal_mass', 'gal_radius', 'star_fraction', 'gas_fraction', 'dm_fraction', 'distance_from_halo_center',
                          'gal_com_x', 'gal_com_y', 'gal_com_z', 'sod_halo_mass', 'sod_halo_R200m']]

# Add rank column
result['rank'] = range(1, 11)

# Reorder columns to put rank first
result = result[['rank'] + [col for col in result.columns if col != 'rank']]

KeyError: 'sod_halo_center_x'

In [7]:
# Get the reference halo coordinates
ref_halo = input_df[input_df['fof_halo_tag'] == 251375070].iloc[0]

# Calculate Euclidean distance for all halos
input_df['distance'] = ((input_df['fof_halo_center_x'] - ref_halo['fof_halo_center_x'])**2 +
                        (input_df['fof_halo_center_y'] - ref_halo['fof_halo_center_y'])**2 +
                        (input_df['fof_halo_center_z'] - ref_halo['fof_halo_center_z'])**2).pow(0.5)

# Sort by distance and select the 10 closest halos (excluding the reference halo)
result_df = input_df[input_df['fof_halo_tag'] != '251375070'].nsmallest(10, 'distance')
result_df

Unnamed: 0,fof_halo_tag,fof_halo_center_x,fof_halo_center_y,fof_halo_center_z,distance
0,251375070.0,120.370285,62.418175,61.067566,0.0
225448,257621504.0,120.634964,61.661469,60.639606,0.90874
216976,247737798.0,121.082611,61.841316,60.971027,0.92168
217770,248726076.0,120.846283,61.665443,61.370796,0.940812
221888,253467068.0,120.734375,61.692284,61.571365,0.955664
231657,265026024.0,120.875916,61.895432,61.695572,0.960892
226815,259228238.0,120.713974,61.644142,61.523071,0.961631
234342,268173774.0,121.187424,61.820312,61.233467,1.026001
225452,257625634.0,119.93766,63.341309,60.815285,1.050231
233366,267068976.0,119.875992,63.234344,60.543144,1.088796


In [4]:
reference_halo = input_df[input_df['fof_halo_tag'] == 251375070][['fof_halo_center_x', 'fof_halo_center_y', 'fof_halo_center_z']].iloc[0]
distances = ((input_df[['fof_halo_center_x', 'fof_halo_center_y', 'fof_halo_center_z']] - reference_halo) ** 2).sum(axis=1) ** 0.5
result_df = input_df.loc[distances.nsmallest(10).index]
result_df

Unnamed: 0,fof_halo_tag,fof_halo_center_x,fof_halo_center_y,fof_halo_center_z
0,251375070.0,120.370285,62.418175,61.067566
225448,257621504.0,120.634964,61.661469,60.639606
216976,247737798.0,121.082611,61.841316,60.971027
217770,248726076.0,120.846283,61.665443,61.370796
221888,253467068.0,120.734375,61.692284,61.571365
231657,265026024.0,120.875916,61.895432,61.695572
226815,259228238.0,120.713974,61.644142,61.523071
234342,268173774.0,121.187424,61.820312,61.233467
225452,257625634.0,119.93766,63.341309,60.815285
233366,267068976.0,119.875992,63.234344,60.543144


In [7]:
input_df = pd.read_csv("./data_storage/120_1.csv")
input_df

Unnamed: 0,fof_halo_tag,sod_halo_center_x,sod_halo_center_y,sod_halo_center_z,sod_halo_mass,distance
0,251375070.0,120.370285,62.418175,61.067566,523622900000000.0,0.0


In [None]:
# Extract coordinates and create PolyData object
points = input_df[['sod_halo_center_x', 'sod_halo_center_y', 'sod_halo_center_z']].to_numpy()
pdata = pv.PolyData(points)

# Add scalar fields
pdata['mass'] = input_df['sod_halo_mass'].to_numpy()
pdata['distance'] = input_df['distance'].to_numpy()

# Identify the largest halo (first row, as it's sorted)
largest_halo = pdata.points[0]

# Create a sphere to represent the largest halo
sphere = pv.Sphere(radius=0.5, center=largest_halo)

# Create a plotter
plotter = pv.Plotter(off_screen=True)

# Add the points (halos) to the plot, colored by mass
plotter.add_mesh(pdata, scalars='mass', cmap='viridis', point_size=10, render_points_as_spheres=True)

# Add the largest halo sphere
plotter.add_mesh(sphere, color='red')

# Set up the camera and other properties
plotter.camera_position = 'xy'
plotter.camera.zoom(1.5)
plotter.show_bounds(grid='front', location='outer', all_edges=True)

# # Save the plot as a VTK file
# plotter.export_vtksz('data_storage/halo_visualization.vtk')

# # Also save as a static image for quick viewing
# plotter.screenshot('data_storage/halo_visualization.png')

# # Close the plotter
# plotter.close()

In [23]:
db = duckdb.connect("data_storage/132.duckdb")
result = db.execute("SHOW ALL TABLES")
result.fetch_df()

Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,132,main,galaxyproperties,"[gal_tag, fof_halo_tag, gal_count, simulation,...","[BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, VARCHAR]",False


Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,130,main,data,"[fof_halo_count, fof_halo_max_cir_vel, fof_hal...","[DOUBLE, DOUBLE, DOUBLE, DOUBLE, DOUBLE, DOUBL...",False
1,130,main,galaxyproperties,"[gal_tag, fof_halo_tag, gal_count_dm, gal_ke, ...","[DOUBLE, DOUBLE, DOUBLE, DOUBLE, BIGINT, BIGIN...",False
2,130,main,haloproperties,"[fof_halo_tag, sod_halo_MStar200m, sod_halo_co...","[DOUBLE, DOUBLE, DOUBLE, BIGINT, BIGINT, VARCHAR]",False


In [20]:
sql = "SELECT fof_halo_tag, sod_halo_MStar200m FROM haloproperties WHERE simulation = 0 AND time_step = 498 AND object_type = 'halo' ORDER BY sod_halo_MStar200m DESC LIMIT 1;"
db.sql(sql).show()

CatalogException: Catalog Error: Table with name haloproperties does not exist!
Did you mean "galaxyproperties"?