In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sortedcontainers import SortedKeyList

In [2]:
    data_file = "hp_ret_pg_probs.csv"
    data = pd.read_csv(data_file)
    data['UNIQUE_ID'] = data['UNIQUE_ID'].astype('int')
    sorted = data.sort_values('yes_hp_en', ascending=False)
    sorted = sorted.reset_index(drop=True)
    top_hp = sorted[:1125]
    bot_hp = sorted[1125:]

In [3]:
class RatchetNode:

    def __init__(self, id: int, features: np.ndarray):
        self.id = int(id)
        self.features = np.array(features)
        self.mag = magnitude(self.features)
        self.enclosed = None
        self.extra = None

    def __repr__(self):
        
        return f'RatchetNode({self.id}, {self.features}, {self.enclosed})'

    def __str__(self):
        return self.__repr__()

    def get_feature(self, idx: int):
        return self.features[idx]

    def get_mag(self):
        return self._mag

Chebychev distance from origin

In [4]:
def magnitude(arr: np.ndarray):
    return arr.max()

In [5]:
queue = SortedKeyList(key=lambda node: node.mag)

In [6]:
for id, row in top_hp.iterrows():
    id = row[0]
    features = list(row[1:])
    rn = RatchetNode(id, np.array(features))
    queue.add(rn)

# Sort and scan method

1. Create list sorted by chebyshev distance
1. For each item, compute size of "contained" set by scanning earlier items in the list in inverse order
    1. If a "candidate" item or "extra" is encountered, stop 
1. If an item contains exactly k points, mark as "candidate"
1. If an item contains more than k points, mark as "extra"

In [7]:
def encloses(p1: np.ndarray, p2: np.ndarray):
    return np.sum(p1 < p2) == 0

def encloses_node(n1: RatchetNode, n2: RatchetNode):
    return encloses(n1.features, n2.features)

In [8]:
def scan_left(i):
    while i >= 0:
        node = queue[i]
        yield node
        i -= 1


In [9]:
candidate_limit = 18

def process_node(i):
    rn = queue[i]
    scanner = scan_left(i) # includes i
    enclosed_count = 0 
    
    try:
        while True:
            this_node = next(scanner)
            #if this_node.enclosed == candidate_limit or this_node.extra == True:
            #    rn.enclosed = -1
            #    rn.extra = True
            #    break
            if encloses_node(rn, this_node):
                enclosed_count += 1
    except StopIteration:
        rn.enclosed = enclosed_count
     

In [10]:
for i in range(0, len(queue)):
    process_node(i)

In [11]:
candidates = [node for node in queue if node.enclosed == candidate_limit]

In [15]:
def list_enclosed(rn):
    return [node for node in queue if encloses_node(rn, node)]
def list_enclosed_point(pt):
    return [node for node in queue if encloses(pt, node.features)]

From competition 0.415, 0.715, 0.230

In [13]:
candidates

[RatchetNode(245190863, [0.4896092  0.44540571 0.40050956], 18),
 RatchetNode(245112404, [0.49687701 0.44573209 0.36741416], 18),
 RatchetNode(245093499, [0.50945927 0.43028714 0.39348067], 18),
 RatchetNode(244293041, [0.45818007 0.50983837 0.30488046], 18),
 RatchetNode(245108123, [0.44393614 0.53597884 0.29802305], 18),
 RatchetNode(244964283, [0.46084066 0.53686924 0.23056328], 18),
 RatchetNode(245121780, [0.4113807  0.58231522 0.48848779], 18),
 RatchetNode(245122147, [0.42069211 0.59928834 0.29546919], 18),
 RatchetNode(241497941, [0.65977217 0.42563985 0.36986167], 18),
 RatchetNode(245040895, [0.43736535 0.66545202 0.1546297 ], 18),
 RatchetNode(245016701, [0.42402772 0.70913989 0.17931053], 18)]

In [14]:
list_enclosed(candidates[5])

[RatchetNode(245028423, [0.41087466 0.42891356 0.19042865], 1),
 RatchetNode(245070606, [0.44077203 0.39712501 0.07304131], 1),
 RatchetNode(245171642, [0.4479794  0.4243105  0.08144297], 2),
 RatchetNode(245117139, [0.45082415 0.43996205 0.15467944], 3),
 RatchetNode(244979211, [0.44662343 0.45782495 0.14652687], 2),
 RatchetNode(245064622, [0.43013372 0.46081103 0.12561053], 1),
 RatchetNode(245139242, [0.42392657 0.47209906 0.10060654], 1),
 RatchetNode(241622322, [0.41576138 0.47213877 0.11494354], 1),
 RatchetNode(245205576, [0.44180583 0.47941804 0.11535136], 4),
 RatchetNode(240196414, [0.45861683 0.48663731 0.20074199], 10),
 RatchetNode(245213151, [0.44804655 0.48836533 0.17171881], 8),
 RatchetNode(245185440, [0.4449801  0.5123646  0.16308749], 6),
 RatchetNode(245255383, [0.43713557 0.51244276 0.12041233], 3),
 RatchetNode(244821669, [0.45621981 0.51580069 0.21139016], 13),
 RatchetNode(245146457, [0.44205732 0.52012514 0.04373556], 1),
 RatchetNode(245162047, [0.40090643 0.

In [16]:
list_enclosed_point(np.array([0.415, 0.715, 0.230]))

[RatchetNode(245028423, [0.41087466 0.42891356 0.19042865], 1),
 RatchetNode(245162047, [0.40090643 0.52599713 0.19371993], 1),
 RatchetNode(245207936, [0.40323004 0.59275395 0.20255607], 2),
 RatchetNode(245118561, [0.40354597 0.59399804 0.0557527 ], 1),
 RatchetNode(245186798, [0.40875887 0.59732814 0.21236595], 4),
 RatchetNode(245130608, [0.40387427 0.59865963 0.12460671], 2),
 RatchetNode(245071342, [0.40045588 0.609327   0.20674791], 1),
 RatchetNode(245242847, [0.40769981 0.61889304 0.13203352], 3),
 RatchetNode(245205877, [0.41150312 0.6246401  0.08901689], 2),
 RatchetNode(227473475, [0.4009301  0.63405295 0.18562071], 1),
 RatchetNode(245222792, [0.40443196 0.65056075 0.13474382], 3),
 RatchetNode(245082547, [0.4115206  0.65388525 0.05746504], 2),
 RatchetNode(119744050, [0.41496359 0.66855209 0.1552609 ], 7),
 RatchetNode(245251550, [0.41486388 0.69148963 0.19624644], 10),
 RatchetNode(245156449, [0.41247213 0.69177143 0.22015615], 13),
 RatchetNode(245112314, [0.40640167 0.