In [1]:
import numpy as np
import os

In [2]:
errs_path = 'orbits/detections'
err_files = os.listdir(errs_path)
err_files = [f for f in err_files if f.endswith('_errs.npy')]

detections_path = 'orbits/detections'
detections_files = os.listdir(detections_path)
detections_files = [f for f in detections_files if f.endswith('_detections.npy')]

In [3]:
def load_errs_and_detections(err_file):
    detections_file = err_file.replace('_errs', '_all_detections')
    if detections_file in detections_files:
        errs = np.load(os.path.join(errs_path, err_file))
        detections = np.load(os.path.join(detections_path, detections_file))
        return errs, detections, detections_file
    else:
        return None, None, None

In [4]:
def get_errs_float(errs, detections):
    detections = np.array(detections)
    ts = np.unique(detections[:,0])
    regions = errs[:,4]
    errs_left = errs[:, :4]
    errs_right = errs[:, 5:]
    errs_float = np.hstack([errs_left, errs_right])
    errs_float = np.float64(errs_float)
    return ts, regions, errs_float

In [5]:
import numpy as np
def remove_timesteps_with_few_detections(errs_float, detections, ts, threshold, regions):
    # Find the timesteps with less than 5 detections
    timesteps_to_remove = []
    for t in ts:
        err_t = errs_float[errs_float[:,0] == t]
        dets_t = detections[detections[:,0] == t]
        if len(dets_t) < threshold:
            timesteps_to_remove.append(t)

    indexes_to_keep = ~np.isin(detections[:,0], timesteps_to_remove)
    
    # Remove the timesteps from err_t, dets_t, and ts
    err_t = errs_float[indexes_to_keep]
    dets_t = detections[indexes_to_keep]
    ts = np.unique(dets_t[:,0])
    regions = regions[indexes_to_keep]
    return err_t, dets_t, ts, regions
    # err_t = np.array([err for err in err_t if err[0] not in timesteps_to_remove])
    # dets_t = np.array([det for det in dets_t if det[0] not in timesteps_to_remove])
    # ts = np.array([t for t in ts if t not in timesteps_to_remove])
    # return err_t, dets_t, ts

In [6]:
def remove_detections_outside_image_frame(detections):   
    frame_width = 4608
    frame_height = 2592

    # Get the x and y coordinates from the detections
    x_coords = detections[:, 3]
    y_coords = detections[:, 4]

    # Find the indices of detections outside of the image frame
    outside_indices = np.where((x_coords < 0) | (x_coords >= frame_width) | (y_coords < 0) | (y_coords >= frame_height))[0]

    return outside_indices


In [7]:
def delete_detections_outside_image_frame(err_t, dets_t, regions, outside_indices):
    # Remove the detections outside from err_t, dets_t
    err_t = np.delete(err_t, outside_indices, axis=0)
    dets_t = np.delete(dets_t, outside_indices, axis=0)
    regions = np.delete(regions, outside_indices, axis=0)

    return err_t, dets_t, regions


In [8]:
def remove_bad_classes(err_t, dets_t, regions, errs_float, detections):
    err_t_bc = err_t
    dets_t_bc = dets_t
    regions_bc = regions
    for region in np.unique(regions):
        #region_indices = np.where(regions_bc == region)
        #region_errs = errs_float[region_indices]
        #region_dets = detections[region_indices]
        #region_ts = np.unique(region_dets[:,0])

        cls = err_t_bc[:,3] * (regions == region)
        bad_classes = np.load('bad_classes/' + region + '_bad_classes.npy')
        
        if len(bad_classes) > 0:
            bad_class_indices = np.isin(cls, bad_classes)

            # Remove bad classes from err_t_bc
            err_t_bc = err_t_bc[~bad_class_indices]

            # Remove corresponding detections from dets_t_bc
            dets_t_bc = dets_t_bc[~bad_class_indices]

            # Remove corresponding regions from regions_bc
            regions_bc = regions_bc[~bad_class_indices]

            regions = regions[~bad_class_indices]

    if len(err_t_bc) == 0:
        #print('No detections left after removing bad classes')
        return None, None, None    
    else:
        #print('Detections left after removing bad classes')
        return err_t_bc, dets_t_bc, regions_bc

In [9]:
def get_outlier_lists(ts, dets_t_bc):
    outlier_lists = {}
    for t in ts:
        dets_t2 = dets_t_bc[dets_t_bc[:,0] == t]
        dets_t2_x_y_lon_lat = dets_t2[:, [3, 4, 1, 2]]
        outlier_lists[t] = dets_t2_x_y_lon_lat.tolist()
    return outlier_lists

In [10]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
def remove_outliers(outlier_list, residual_threshold=0.0005):
    outlier_arr = np.array(outlier_list)
    xs = outlier_arr[:,0]
    ys = outlier_arr[:,1]
    lons = outlier_arr[:,2]
    lats = outlier_arr[:,3]
    # Call the function to fit the model
    predicted_lon = fit_lon(xs, ys, lons)
    predicted_lat = fit_lat(xs, ys, lats)
    residuals = np.linalg.norm(np.column_stack((predicted_lon, predicted_lat)) - np.column_stack((lons, lats)), axis=1)
    inlier_indexes = np.where(residuals < residual_threshold)
    return inlier_indexes

# Define a function to fit the model and predict lon
def fit_lon(xs, ys, lons):
    # Create polynomial features
    poly_features = PolynomialFeatures(degree=2)
    X_poly = poly_features.fit_transform(np.column_stack((xs, ys)))

    # Fit the linear regression model
    lin_reg = LinearRegression()
    lin_reg.fit(X_poly, lons)

    # Predict lon based on x and y
    predicted_lon = lin_reg.predict(poly_features.transform(np.column_stack((xs, ys))))

    return predicted_lon

 # Define a function to fit the model and predict lat
def fit_lat(xs, ys, lats):
    # Create polynomial features
    poly_features = PolynomialFeatures(degree=2)
    X_poly = poly_features.fit_transform(np.column_stack((xs, ys)))

    # Fit the linear regression model
    lin_reg = LinearRegression()
    lin_reg.fit(X_poly, lats)

    # Predict lat based on x and y
    predicted_lat = lin_reg.predict(poly_features.transform(np.column_stack((xs, ys))))

    return predicted_lat

    # # Call the function to fit the model
    # degree = 2  # Set the degree of the polynomial
    # model = fit_lon_lat(xs, ys, lons, lats, degree)

    # # Use the model to predict inliers
    # predicted_lonlats = model.predict(np.column_stack((xs,ys), axis=1))
    # residuals = np.linalg.norm(predicted_lonlats - np.column_stack((lons, lats)), axis=1)
    # inlier_indexes = np.where(residuals < 0.0005)
    # return inlier_indexes

In [11]:
def get_inliers(ts, dets_t2, errs_t2, outlier_lists, threshold=0.0005):
    inlier_dets = []
    inlier_errs = []
    for t in ts:
        outlier_list = outlier_lists[t]
        inlier_indexes = remove_outliers(outlier_list, threshold)
        dets3 = np.array(dets_t2)[inlier_indexes]
        errs3 = np.array(errs_t2)[inlier_indexes]
        if len(dets3) > 0:
            for det, err in zip(dets3, errs3):
                if len(det) > 0:
                    inlier_dets.append(det)
                    inlier_errs.append(err)
        else:
            continue
    if len(inlier_dets) < 0:
        return None, None
    return np.array(inlier_dets), np.array(inlier_errs)

In [18]:
def save_dets(dets3, detection_file, thresh):
    if not os.path.exists('orbits/detections/newdets/thresh' + str(thresh)):
        os.makedirs('orbits/detections/newdets/thresh' + str(thresh))
    np.save('orbits/detections/newdets/' + 'thresh' + str(thresh) + '/' + detection_file, dets3)

In [17]:
def get_errs_for_file(thresh, err_file):
    errs, detections, detection_file = load_errs_and_detections(err_file)
    if len(detections) == 0:
        save_dets(detections, detection_file, thresh)
        return None
        
    ts, regions, errs_float = get_errs_float(errs, detections)
    err_t, dets_t, ts, regions_t = remove_timesteps_with_few_detections(errs_float, detections, ts, 5, regions)
    outside_indices = remove_detections_outside_image_frame(dets_t)
    err_t, dets_t, regions_t = delete_detections_outside_image_frame(err_t, dets_t, regions_t, outside_indices)
    err_t_bc, dets_t_bc, regions_t_bc = remove_bad_classes(err_t, dets_t, regions_t, errs_float, detections)
    if err_t_bc is None:
        save_dets(np.array([]), detection_file, thresh)
        return None
    err_t2, dets_t2, ts2, regions_t2 = remove_timesteps_with_few_detections(err_t_bc, dets_t_bc, ts, 3, regions_t_bc)
    outlier_lists = get_outlier_lists(ts2, dets_t2)
    inlier_dets, inlier_errs = get_inliers(ts2, dets_t2, err_t2, outlier_lists, thresh)
    #print(len(inlier_dets))
    if inlier_dets is None or len(inlier_dets) < 2:
        save_dets(np.array([]), detection_file, thresh)
        return None
    # print('Mean x error:', np.mean(inlier_errs[:, 1]))
    # print('Mean y error:', np.mean(inlier_errs[:, 2]))
    # print('Median x error:', np.median(inlier_errs[:, 1]))
    # print('Median y error:', np.median(inlier_errs[:, 2]))
    # print('Max x error:', np.max(inlier_errs[:, 1]))
    # print('Max y error:', np.max(inlier_errs[:, 2]))
    else:
        save_dets(inlier_dets, detection_file, thresh)
    
    return [np.mean(inlier_errs[:, 1]), np.mean(inlier_errs[:, 2]), np.median(inlier_errs[:, 1]), np.median(inlier_errs[:, 2]), np.max(inlier_errs[:, 1]), np.max(inlier_errs[:, 2]), len(inlier_dets)]

In [22]:
from tqdm import tqdm
from multiprocessing import Pool
errors_per_thresh = {}

#p = Pool(8)

for thresh in tqdm(np.linspace(0.0001, 0.0001, 1)):
    thresh = 10000
    errors_per_file = []
    for err_file in err_files:
        err = get_errs_for_file(thresh, err_file)
        if err is not None:
            errors_per_file.append(err)
    # errors_per_file = p.map(get_errs_for_file, err_files[:50])
    # p.close()
    # p.join()
    errors_per_thresh[thresh] = [np.mean([errors[0] for errors in errors_per_file]), np.mean([errors[1] for errors in errors_per_file]), np.median([errors[2] for errors in errors_per_file]), np.median([errors[3] for errors in errors_per_file]), np.max([errors[4] for errors in errors_per_file]), np.max([errors[5] for errors in errors_per_file]), np.sum([errors[6] for errors in errors_per_file])]
    

    # for err_file in err_files[:50]:
    #     errs, detections, detection_file = load_errs_and_detections(err_file)
    #     if len(detections) == 0:
    #         save_dets(detections, detection_file)
    #         continue
            
    #     ts, regions, errs_float = get_errs_float(errs, detections)
    #     err_t, dets_t, ts, regions_t = remove_timesteps_with_few_detections(errs_float, detections, ts, 5, regions)
    #     outside_indices = remove_detections_outside_image_frame(dets_t)
    #     err_t, dets_t, regions_t = delete_detections_outside_image_frame(err_t, dets_t, regions_t, outside_indices)
    #     err_t_bc, dets_t_bc, regions_t_bc = remove_bad_classes(err_t, dets_t, regions_t, errs_float, detections)
    #     if err_t_bc is None:
    #         save_dets(np.array([]), detection_file)
    #         continue
    #     err_t2, dets_t2, ts2, regions_t2 = remove_timesteps_with_few_detections(err_t_bc, dets_t_bc, ts, 3, regions_t_bc)
    #     outlier_lists = get_outlier_lists(ts2, dets_t2)
    #     inlier_dets, inlier_errs = get_inliers(ts2, dets_t2, err_t2, outlier_lists, thresh)
    #     #print(len(inlier_dets))
    #     if inlier_dets is None or len(inlier_dets) < 2:
    #         save_dets(np.array([]), detection_file)
    #         continue
    #     # print('Mean x error:', np.mean(inlier_errs[:, 1]))
    #     # print('Mean y error:', np.mean(inlier_errs[:, 2]))
    #     # print('Median x error:', np.median(inlier_errs[:, 1]))
    #     # print('Median y error:', np.median(inlier_errs[:, 2]))
    #     # print('Max x error:', np.max(inlier_errs[:, 1]))
    #     # print('Max y error:', np.max(inlier_errs[:, 2]))
    #     else:
    #         save_dets(inlier_dets, detection_file)
        
    #     errors_per_file[err_file] = [np.mean(inlier_errs[:, 1]), np.mean(inlier_errs[:, 2]), np.median(inlier_errs[:, 1]), np.median(inlier_errs[:, 2]), np.max(inlier_errs[:, 1]), np.max(inlier_errs[:, 2]), len(inlier_dets)]
    #     #errors_per_thresh[thresh] = [np.mean(inlier_errs[:, 1]), np.mean(inlier_errs[:, 2]), np.median(inlier_errs[:, 1]), np.median(inlier_errs[:, 2]), np.max(inlier_errs[:, 1]), np.max(inlier_errs[:, 2]), len(inlier_dets)]
    # errors_per_thresh[thresh] = [np.mean([errors_per_file[err_file][0] for err_file in errors_per_file]), np.mean([errors_per_file[err_file][1] for err_file in errors_per_file]), np.median([errors_per_file[err_file][2] for err_file in errors_per_file]), np.median([errors_per_file[err_file][3] for err_file in errors_per_file]), np.max([errors_per_file[err_file][4] for err_file in errors_per_file]), np.max([errors_per_file[err_file][5] for err_file in errors_per_file]), np.sum([errors_per_file[err_file][6] for err_file in errors_per_file])]
    # errors_per_file = {}
#print(errors_per_thresh)
for thresh in errors_per_thresh:
    print('Threshold:', thresh)
    print('Number of detections:', errors_per_thresh[thresh][6])
    print('Mean x error:', errors_per_thresh[thresh][0])
    print('Mean y error:', errors_per_thresh[thresh][1])
    print('Median x error:', errors_per_thresh[thresh][2])
    print('Median y error:', errors_per_thresh[thresh][3])
    print('Max x error:', errors_per_thresh[thresh][4])
    print('Max y error:', errors_per_thresh[thresh][5])
    print('\n')
    

100%|██████████| 1/1 [01:04<00:00, 64.99s/it]

Threshold: 10000
Number of detections: 1815916
Mean x error: 15.00580063970503
Mean y error: 14.633367070577341
Median x error: 5.666517899601331
Median y error: 6.252723480057881
Max x error: 1913.5657886683775
Max y error: 2398.5742477296853





