In [23]:
import os
import numpy as np
import pandas as pd
from iinuclear import isit
from iinuclear.utils import check_nuclear, get_closest_match
from astropy.coordinates import SkyCoord
import astropy.units as u

In [2]:
csv_file = "data/tns_public_objects.csv"
df = pd.read_csv(csv_file)

df.columns = df.columns.str.strip()
mask = df["type"].isna() | (df["type"].astype(str).str.strip() == "")
empty_type_rows = df[mask]

print(f"Found {len(empty_type_rows)} rows with empty or missing 'type'")
print(empty_type_rows)

Found 146457 rows with empty or missing 'type'
         objid name_prefix     name          ra  declination  redshift  \
0       180387          AT  2025lvs  322.597227   -12.525196       NaN   
1       180846          AT  2025mng  194.865098    16.938878       NaN   
2       181046          AT  2025muw  207.701299    22.237928       NaN   
3       181045          AT  2025muv  209.127453    20.202770       NaN   
4       180299          AT  2025lsi  211.089473    27.990580       NaN   
...        ...         ...      ...         ...          ...       ...   
165574   10033          AT   2016ae  115.381542    36.653389       NaN   
165577   10030          AT   2016ab   32.875375   -35.832136       NaN   
165580   10027          AT    2016Y  208.322708     5.764472       NaN   
165583   10024          AT    2016V  349.104958    24.496611       NaN   
165592   10015          AT    2016N   71.338658    73.394747       NaN   

        typeid type  reporting_groupid reporting_group  ...  dis

In [3]:
empty_type_rows = df[mask]

In [None]:
for name, ra, dec in zip(empty_type_rows["name"][118000:], empty_type_rows["ra"][118000:], empty_type_rows["declination"][118000:]):
    result = get_closest_match(ra, dec)
    try:
        if 'ra' in result.keys() and 'dec' in result.keys():
            sigma, chi2_val, p_value, is_nuclear = check_nuclear([ra], [dec], result['ra'].data[0], result['dec'].data[0], 0.2)
        else:
            sigma, chi2_val, p_value, is_nuclear = check_nuclear([ra], [dec], result['raMean'].data[0], result['decMean'].data[0], 0.2)
        # Create a csv file with name, ra, dec
        if is_nuclear:
            if os.path.exists("data/nuclear_galaxies.csv"):
                df = pd.read_csv("data/nuclear_galaxies.csv")
                new_row = pd.DataFrame([{"name": name, "ra": ra, "dec": dec}])
                df = pd.concat([df, new_row], ignore_index=True)
                df.to_csv("data/nuclear_galaxies.csv", index=False)
            else:
                df = pd.DataFrame({"name": [name], "ra": [ra], "dec": [dec]})
                df.to_csv("data/tns_public_objects_galaxy_nucleus.csv", index=False)
    except Exception as e:
        continue

In [None]:
df = pd.read_csv("data/copy.csv")

# Prepare output DataFrame
matched_rows = []

# Read all block data from txt files
block_dir = "blocks"
all_block_coords = []
all_redshifts = []
all_sources = []

for filename in os.listdir(block_dir):
    if filename.endswith(".txt"):
        filepath = os.path.join(block_dir, filename)
        try:
            block_df = pd.read_csv(filepath, delim_whitespace=True, header=None, dtype=str)

            block_df[6] = pd.to_numeric(block_df[6], errors='coerce')
            block_df[7] = pd.to_numeric(block_df[7], errors='coerce')
            block_df[10] = pd.to_numeric(block_df[10], errors='coerce')
            block_df = block_df.dropna(subset=[6, 7, 10])

            ra_block = block_df[6].astype(float).values
            dec_block = block_df[7].astype(float).values
            z_block = block_df[10].astype(float).values

            coords = SkyCoord(ra=ra_block * u.deg, dec=dec_block * u.deg)
            all_block_coords.append(coords)
            all_redshifts.append(z_block)
            all_sources.extend([(filename, idx) for idx in block_df.index])
        except Exception as e:
            print(f"Skipping {filename} due to error: {e}")

# Sanity check
assert len(all_sources) == len(np.concatenate(all_redshifts)) == len(np.concatenate([c.ra for c in all_block_coords]))

# Combine block data
all_block_coords = SkyCoord(
    ra=np.concatenate([c.ra.deg for c in all_block_coords]) * u.deg,
    dec=np.concatenate([c.dec.deg for c in all_block_coords]) * u.deg
)
all_redshifts = np.concatenate(all_redshifts)

# Match each CSV row to closest block point
for i, row in df.iloc[30000:].iterrows():
    src = SkyCoord(ra=row["ra"] * u.deg, dec=row["dec"] * u.deg)
    sep = src.separation(all_block_coords)
    min_idx = np.argmin(sep.arcsecond)
    
    if sep.arcsecond[min_idx] <= 3.0:
        match = row.copy()
        match["match_ra"] = all_block_coords[min_idx].ra.deg
        match["match_dec"] = all_block_coords[min_idx].dec.deg
        match["redshift"] = all_redshifts[min_idx]
        match["source_file"] = all_sources[min_idx][0]
        match["source_row"] = all_sources[min_idx][1]
        matched_rows.append(match)

# Save matched rows only
matched_df = pd.DataFrame(matched_rows)
matched_df.to_csv("data/objects_galaxy_nucleus_with_redshift.csv", index=False)


In [42]:
df = pd.read_csv("data/objects_galaxy_nucleus_with_redshift.csv")
df = df[df["redshift"] <= 0.02]
df.to_csv("data/filtered_objects_galaxy_nucleus_with_redshift.csv", index=False)