In [2]:
#| default_exp tycho2_main

importing dependencies

In [6]:
import polars as pl
import numpy as np
import torch
import math
import time
import heapq
from typing import List, Any
import tqdm
import pickle
import pyarrow as pa
from dataclasses import dataclass, field

read the simplified tycho2 catalog

In [7]:
df = pl.read_parquet('../support/tyc2-3.parquet')
print(df.describe())

shape: (9, 4)
┌────────────┬────────────┬────────────┬────────────┐
│ describe   ┆ RAmdeg     ┆ DEmdeg     ┆ Vmag       │
│ ---        ┆ ---        ┆ ---        ┆ ---        │
│ str        ┆ f64        ┆ f64        ┆ f64        │
╞════════════╪════════════╪════════════╪════════════╡
│ count      ┆ 2.430468e6 ┆ 2.430468e6 ┆ 2.430468e6 │
│ null_count ┆ 0.0        ┆ 0.0        ┆ 0.0        │
│ mean       ┆ 188.530502 ┆ -3.479685  ┆ 11.071772  │
│ std        ┆ 100.340485 ┆ 41.43108   ┆ 1.108483   │
│ min        ┆ 0.000339   ┆ -89.889664 ┆ 1.89558    │
│ 25%        ┆ 105.471535 ┆ -39.00515  ┆ 10.54628   │
│ 50%        ┆ 186.052124 ┆ -4.889748  ┆ 11.292219  │
│ 75%        ┆ 280.987732 ┆ 31.3225    ┆ 11.82646   │
│ max        ┆ 359.999878 ┆ 89.832336  ┆ 15.37354   │
└────────────┴────────────┴────────────┴────────────┘


In [8]:
#| export

@dataclass(order=True)
class ScoreItem:
    score: float
    region: int=field(compare=False)
    item: Any=field(compare=False)
    
@dataclass(order=True)
class Points:
    points: Any=field(compare=False)
    

def convert_to_cartesian(distances, ra, dec):
    x = distances * torch.cos(dec) * torch.cos(ra)
    y = distances * torch.cos(dec) * torch.sin(ra)
    z = distances * torch.sin(dec)
    return torch.stack((x, y, z), dim=1)

def calculate_distances(coords):
    dist_matrix = torch.cdist(coords, coords)
    unique_distances = torch.unique(dist_matrix)
    return unique_distances[unique_distances > 0]

def distance_from_magnitude(m, M):
    return 10**((m - M + 5) / 5)

def distance_from_magnitude_tensor(m: torch.Tensor, M: torch.Tensor) -> torch.Tensor:
    return 10**((m - M + 5) / 5)

# one of the possibilities
def tetrahedron_score(coords):
    distances = calculate_distances(coords)
    print(distances)
    mean_distance = torch.mean(distances)
    std_dev = torch.std(distances)

    # Assuming std_dev is small enough, the score will be close to 1
    # Otherwise, it will be closer to 0
    score = math.exp(-std_dev.item() / mean_distance.item())
    return score

def square_score(tensor):
    """Return a measure of how close the points in a tensor are to forming a square,
    as well as the standard deviation of their brightness."""
    
    # Calculate pairwise distances based on the x and y coordinates (first two dimensions)
    spatial_distances = torch.pdist(tensor, p=2)
    print(spatial_distances)
    
    # Sort the distances
    sorted_distances = torch.sort(spatial_distances)[0]
    
    # Take the 4 smallest distances and compute their standard deviation
    std_of_smallest_4 = sorted_distances[:4].std().item()
    
    # # Calculate the standard deviation of the brightness (third dimension)
    # brightness_std = tensor[:, 2].std().item()
    
    return std_of_smallest_4

def measure_squareness_old(tensor):
    """
    :param points: A tensor of shape (4, 2) representing the 4 2D points
    :return: A float indicating the squareness. Closer to 1 means more square.
    """
    
    # Calculate pairwise distances based on the x and y coordinates (first two dimensions)
    spatial_distances = torch.pdist(tensor, p=2)

    # Sort the distances
    distances = torch.sort(spatial_distances)[0]
    
    # Mean of sides and diagonals
    mean_sides = torch.mean(distances[:4])
    mean_diagonal = torch.mean(distances[4:])
    
    # Squareness measure
    squareness = mean_diagonal / mean_sides
    
    # Normalize with sqrt(2) to get values closer to 1 for squares
    return abs(1 - (squareness / torch.sqrt(torch.tensor(2.0)).item()))

def measure_squareness(tensor):
    # Calculate pairwise distances based on the x and y coordinates (first two dimensions)
    spatial_distances = torch.pdist(tensor, p=2)

    # Sort the distances
    distances = torch.sort(spatial_distances)[0]
   
    # Compute the standard deviation for the four shortest distances and the two longest distances
    std_sides = torch.std(distances[:4])
    std_diagonal = torch.std(distances[4:])
    
    # Ideally, for a perfect square, the standard deviations would be 0
    # We use exp(-x) as a measure to get values close to 1 for low standard deviations
    side_uniformity = torch.exp(-std_sides)
    diagonal_uniformity = torch.exp(-std_diagonal)
    
    # Mean of sides and diagonals
    mean_sides = torch.mean(distances[:4])
    mean_diagonal = torch.mean(distances[4:])
    
    # Squareness measure based on side to diagonal ratio
    squareness_ratio = mean_sides / mean_diagonal
    
    # Combine all the measures
    # Normalize with sqrt(2) to get values closer to 1 for squares
    final_squareness = (squareness_ratio / torch.sqrt(torch.tensor(2.0)).item()) * side_uniformity * diagonal_uniformity
    
    return abs(1 - final_squareness.item())

def mass_score_triangle_torch(points_tensor, device='cpu'):
    points_tensor = points_tensor.to(device)  # Transfer tensor to GPU if available
    idx_combinations = torch.combinations(torch.arange(points_tensor.shape[0]), r=3)
                                          
    p1, p2, p3 = points_tensor[idx_combinations[:, 0]], points_tensor[idx_combinations[:, 1]], points_tensor[idx_combinations[:, 2]]

    a = torch.linalg.norm(p2 - p1, dim=1)
    b = torch.linalg.norm(p3 - p2, dim=1)
    c = torch.linalg.norm(p1 - p3, dim=1)
    
    mean = (a + b + c) / 3
    std_dev = torch.sqrt(((a - mean)**2 + (b - mean)**2 + (c - mean)**2) / 3)
    
    scores = torch.where(mean != 0, std_dev / mean, torch.tensor([1.], device=mean.device))
    
     # Stack the points instead of flattening them
    points_combined = torch.stack([p1, p2, p3], dim=1)
    
    return scores, points_combined

# go from tycho2 to xyz coords
def transform_radecmag_from_numpy(stars):
    torch_tensors = [torch.from_numpy(star) for star in stars]
    zeroes = torch.zeros(len(torch_tensors[2]))
    print("one ", torch_tensors)
    torch_tensors[2] = distance_from_magnitude_tensor(torch_tensors[2], zeroes)
    print("two", torch_tensors)
    coords = convert_to_cartesian(*torch_tensors)
    return coords

def global_normalize_tensor(tensor):
    """Normalize a tensor based on its global min and max values. Also works for multiple tensors"""
    global_min = torch.min(tensor)
    global_max = torch.max(tensor)
    
    normalized = (tensor - global_min) / (global_max - global_min)
    return normalized
    
def radec_normalize_tensor(tensors):
    """Normalize tensors based on their global min and max values, excluding the 3rd column."""

    # Concatenate tensors while excluding the 3rd column
    # Drop the 3rd column from each tensor
    tensor = tensors[:, :2]

    # Compute global min and max excluding the 3rd column
    global_min = torch.min(tensor)
    global_max = torch.max(tensor)

    # Normalize tensors using the computed global min and max
    normalized = (tensor - global_min) / (global_max - global_min)
    return normalized

def mag_score(tensor):
    # Computing the standard deviation
    # stdev = t[:, 2].std()
    max = tensor[:, 2].max()
    min = tensor[:, 2].min()
    return max - min

def score_triangle(tensor):    
    # Calculate pairwise distances based on the x and y coordinates (first two dimensions)
    spatial_distances = torch.pdist(tensor, p=2)
    
    # Normalize with sqrt(2) to get values closer to 1 for squares
    return torch.std(spatial_distances)

def stars_for_point_and_radius(df, point, radius, max_mag):
    """ point is in the corner, not the center """
    ra, dec = point
    minra = ra
    maxra = ra + radius
    mindec = dec
    maxdec = dec + radius
    return df.filter((pl.col("RAmdeg") < maxra) & (pl.col("RAmdeg") > minra) & (pl.col("DEmdeg") < maxdec) & (pl.col("DEmdeg") > mindec) & (pl.col("Vmag") <= max_mag))

def stars_for_center_and_radius(df, center, radius, max_mag):
    ra, dec = center
    minra = ra - radius/2
    maxra = ra + radius/2
    mindec = dec - radius/2
    maxdec = dec + radius/2
    return df.filter((pl.col("RAmdeg") < maxra) & (pl.col("RAmdeg") > minra) & (pl.col("DEmdeg") < maxdec) & (pl.col("DEmdeg") > mindec) & (pl.col("Vmag") <= max_mag))


def get_grid_points(min_dec=-90, max_dec=90):
    RA_values = [ra for ra in range(0, 361, 4)]  # Increment by 4 for a 2-degree radius
    Dec_values = [dec for dec in range(min_dec, max_dec+1, 4)]  # Increment by 4 for a 2-degree radius
    grid_points = [(ra, dec) for ra in RA_values for dec in Dec_values]
    return grid_points

def get_grid_point_by_idx(idx):
    gp = get_grid_points()
    return gp[idx]

def get_region(df, idx, radius, max_mag):
    center = get_grid_point_by_idx(idx)
    return stars_for_center_and_radius(df, center, radius, max_mag)

resultdf = pl.DataFrame({
    "score": pl.Float64,
    "region": pl.Int64,
    "item": []
})


    


## Code to process each region

In [9]:
from itertools import combinations
from heapq import heappush, heappushpop

In [None]:

first = lambda h: 2**h - 1      # H stands for level height
last = lambda h: first(h + 1)
level = lambda heap, h: heap[first(h):last(h)]
prepare = lambda e, field: str(e).center(field)
def hprint(heap, width=None):
    if width is None:
        width = max(len(str(e)) for e in heap)
    height = int(math.log(len(heap), 2)) + 1
    gap = ' ' * width
    for h in range(height):
        below = 2 ** (height - h - 1)
        field = (2 * below - 1) * width
        print(gap.join(prepare(e, field) for e in level(heap, h)))
        
def process(stars, region, point, nr_stars) -> List[ScoreItem]:
    scores, points = mass_score_triangle_torch(torch.tensor(stars), device='cpu')
    resultdf = pl.DataFrame({
    "score": scores.cpu().numpy(), 
    "region": [region] * len(scores),
    "points": points.cpu().numpy()})
    final_result_df = resultdf.top_k(5, by="score", descending=True)
    print(f"Processed {region=} - {point} for length {len(stars)} with {len(scores)=}")
    return final_result_df
        

## Code to save progress

In [24]:
def add_to_result_and_save(resultdf, df: pl.DataFrame, filename):
    print("Saving result to", filename)
    for entry in df.rows():
        thisdf = pl.DataFrame({
            "score": entry[0], 
            "region": entry[1], 
            "stars": [entry[2].tolist()],
        })
        if resultdf.is_empty():
            resultdf = thisdf
        else:
            resultdf = pl.concat([resultdf, thisdf])
    resultdf.write_parquet(filename)
    return resultdf
# Define the schema
# Define the schema
schema = {
    "score": pl.Float64,
    "region": pl.Float64,
    "stars": pl.List(pl.List(pl.Float64))  # Nested list type
}

## Generate a grid of regions and process each region

In [25]:
grid_points = get_grid_points(-63,63)
print("Total grid points is:", len(grid_points))

result_filename = 'result_triangle2.parquet'
# resultdf = pl.DataFrame()
# try:
#     resultdf = pl.read_parquet(result_filename)
# except:
#     print("no previoous results")
#     pass
# print(f"Loaded results: {resultdf.head()}")

def process_regions(grid, start=0, end=0):
    if end == 0:
        end = len(grid)
    
    zipped_list = list(zip(range(len(grid)), grid))
    grid_points = zipped_list[start:end]
    global_result = pl.DataFrame()

    result = []
    for idx, point in grid_points:
        stars = stars_for_point_and_radius(df, point=point, radius=2, max_mag=10)
        if len(stars) > 0:
            #best = global_result.top_k(1, by="score", descending=True)['score'][0] if not global_result.is_empty() else -1
            #print("best is", best)
            #beststr = f"{best:.5f}"
            #print(f"\n\n---- Region: {idx}, {len(stars)} stars, best is {beststr} global_heap is now {len(global_result)} --------")
            result = process(stars.rows(), idx, point, 3)
            if result is not None and not result.is_empty():
                if global_result.is_empty():
                    global_result = result
                else:
                    global_result = global_result.vstack(result)
            else:
                print("no results")
            print("************************")
        #if idx % 1000 == 0:
        if idx == len(grid_points)-1:
            print(global_result.sort("score").head(5) if not global_result.is_empty() else "no score")
            #global_result.map_rows(lambda t: (t[0] * 2, t[1] * 3))
            write_df = pl.DataFrame()
            points = global_result
            add_to_result_and_save(write_df, points, result_filename)
    #time.sleep(10)
%time process_regions(grid_points, start=0)

Total grid points is: 2912
Processed region=0 - (0, -63) for length 11 with len(scores)=165
************************
Processed region=1 - (0, -59) for length 9 with len(scores)=84
************************
Processed region=2 - (0, -55) for length 6 with len(scores)=20
************************
Processed region=3 - (0, -51) for length 8 with len(scores)=56
************************
Processed region=4 - (0, -47) for length 9 with len(scores)=84
************************
Processed region=5 - (0, -43) for length 14 with len(scores)=364
************************
Processed region=6 - (0, -39) for length 15 with len(scores)=455
************************
Processed region=7 - (0, -35) for length 13 with len(scores)=286
************************
Processed region=8 - (0, -31) for length 21 with len(scores)=1330
************************
Processed region=9 - (0, -27) for length 14 with len(scores)=364
************************
Processed region=10 - (0, -23) for length 13 with len(scores)=286
**************

In [None]:
a = [[352.9835510253906, -61.950279235839844, 9.794389724731445], [352.44195556640625, -61.28193664550781, 9.73677921295166], [352.8981628417969, -62.92292022705078, 9.376359939575195], [353.7196960449219, -62.507789611816406, 9.891069412231445]]
square_score(radec_normalize_tensor(torch.Tensor(a)))

In [None]:



# Test
points = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
print(measure_squareness(points))  # This should be close to 1 for a square
print(measure_squareness(torch.tensor(a)))  # This should be close to 1 for a square



In [None]:
# importing mplot3d toolkits
from mpl_toolkits import mplot3d
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
import torch

In [None]:
fig = plt.figure(figsize=(12, 6))
gs = gridspec.GridSpec(1, 2, width_ratios=[1, 1])  # set width ratio for each subplot
coords = torch.tensor(a)
# First subplot with one viewing angle
ax1 = fig.add_subplot(gs[0], projection='3d')
ax1.scatter(coords[:, 0], coords[:, 1], coords[:, 2], c='green')
ax1.view_init(30, 30)  # Set elevation and azimuth
ax1.set_title('View 1')
ax1.set_xlabel('X')
ax1.set_ylabel('Y')
ax1.set_zlabel('Z')

# Second subplot with another viewing angle
ax2 = fig.add_subplot(gs[1], projection='3d')
ax2.scatter(coords[:, 0], coords[:, 1], coords[:, 2], c='green')
ax2.view_init(30, 120)  # Different elevation and azimuth
ax2.set_title('View 2')
ax2.set_xlabel('X')
ax2.set_ylabel('Y')
ax2.set_zlabel('Z')

plt.suptitle('xyz projection for stars from different views')
plt.tight_layout()
plt.show()

In [61]:
df = pl.DataFrame({"foo": [1, 2, 3], "bar": [-1, 5, 8]})

In [62]:
df.select(ScoreItem(1,2,pl.col("foo")))

TypeError: did not expect value ScoreItem(score=1, region=2, item=<polars.expr.expr.Expr object at 0x16844a410>) of type 'ScoreItem'

Try disambiguating with `lit` or `col`.