In [41]:
from astropy.coordinates import SkyCoord, Angle
from astropy import units as u
from astropy.table import Table
import pandas as pd
from collections import Counter
import numpy as np
import re


# Function to convert string to integer for sorting
def convert_to_int(x):
    try:
        return (0, int(x))  # Tuple with 0 for numbers
    except ValueError:
        return (1, x)       # Tuple with 1 for strings
# Load Hipparcos catalog data (with RA, Dec)

hip_data = Table.read('hipparcos_catalog.fits')

# Load exoplanet host data with coordinates
exo_hosts = Table.read('exo_hosts.csv', format='csv', comment='#')

# Get all hip_name values
hip_names = exo_hosts['hip_name']

# Count distinct values (excluding None/nan)
hip_name_counts = Counter([str(name) for name in hip_names if name])# Create mask for non-null hip_names
mask = (exo_hosts['hip_name'] != '') & ~exo_hosts['hip_name'].mask
valid_hip_names = exo_hosts[mask]

# Clean names by removing 'HIP' prefix
clean_names = set()
for name in valid_hip_names['hip_name']:
    if name and not isinstance(name, np.ma.core.MaskedConstant):
        # Extract numeric part after 'HIP', ignoring suffixes
        match = re.search(r'HIP\s*(\d+)', name)
        if match:
            clean_name = match.group(1)  # Just the number as string
            clean_names.add(clean_name)

# Print results
print(f"Total number of distinct HIP IDs: {len(clean_names)}")

# Print sorted list
print("\nSorted HIP IDs (without 'HIP' prefix):")
for name in sorted(clean_names, key=convert_to_int):
    print(name)
# # Print results
# print("Distinct hip_names and their counts:")
# for name, count in hip_name_counts.most_common():
#     print(f"{name}: {count}")

# # Print total number of distinct valueship_name_counts
# print(f"\nTotal number of distinct hip_names: {len(hip_name_counts)}")
# # exo_hosts_df = pd.read_csv('exo_hosts.csv')
# # exo_hosts = Table.from_pandas(exo_hosts_df)

# # Create SkyCoord objects for both catalogs

# # Convert sexagesimal strings to Angle objects (hours/minutes/seconds for RA, degrees/arcmin/arcsec for Dec)
# # RA is in hours:minutes:seconds, so specify unit='hourangle'
# ra_angles = Angle(hip_data['RAhms'], unit='hourangle')

# # Dec is in degrees:arcmin:arcsec, so specify unit='deg'
# dec_angles = Angle(hip_data['DEdms'], unit='deg')

# # Create SkyCoord object
# hip_coords = SkyCoord(ra=ra_angles, dec=dec_angles, frame='icrs')

# print(hip_coords[:5])
# exo_coords = SkyCoord(ra=exo_hosts['ra']*u.degree, dec=exo_hosts['dec']*u.degree)

# # Cross-match with a 2 arcsecond tolerance
# idx, d2d, _ = exo_coords.match_to_catalog_sky(hip_coords)
# max_sep = 2 * u.arcsec
# matches = d2d < max_sep

# # Matched Hipparcos entries
# matched_hip = hip_data[idx[matches]]
# matched_exo = exo_hosts[matches]
# print(f"Found {len(matched_hip)} matches within {max_sep.to(u.arcsec)}")
# Show records from exo_hosts where hip_name is '43587'
# print("Exoplanet hosts with HIP 43587:")
# mask = exo_hosts['hip_name'] == 'HIP 43587'
# print(exo_hosts[mask]['pl_name','soltype'])

# print("Detailed records for 55 Cnc b:")
# mask = exo_hosts['hip_name'] == 'HIP 43587'
# # Show more columns to identify differences
# print(exo_hosts[mask]['pl_name', 'soltype', 'disc_year'])
#Create mask for non-null hip_names
# Print sorted list with proper type handling

# Get valid hip names and remove 'HIP' prefix



Total number of distinct HIP IDs: 759

Sorted HIP IDs (without 'HIP' prefix):
65
522
738
801
948
1292
1419
1475
1499
1547
1640
1666
1692
1931
2247
2350
2611
2736
3093
3143
3206
3391
3479
3497
3502
3574
3834
3869
3911
3975
4297
4552
4715
4739
4845
4872
5054
5158
5301
5447
5529
5643
5763
5806
5812
5957
6069
6365
6379
6390
6511
6643
6702
6993
7245
7513
7562
7599
7978
8102
8152
8159
8501
8541
8770
8928
9094
9242
9519
9618
9683
9884
9960
10037
10085
10117
10138
10301
10337
10626
10657
10743
10800
11048
11130
11397
11433
11791
12048
12184
12186
12191
12247
12436
12493
12653
12684
12961
13192
13224
13291
13363
13467
13642
13754
13993
14101
14417
14530
14810
14954
15249
15510
15526
15527
15578
15683
15799
16012
16038
16069
16085
16537
16711
17047
17054
17096
17187
17264
17515
17540
17747
17960
18387
18451
18527
18841
18893
19011
19165
19176
19207
19394
19428
19921
19950
19976
20199
20277
20528
20606
20723
20753
20889
21109
21152
21421
21547
21571
21850
21932
22320
22336
22491
22627
22762
22826