In [1]:
import pyomo.environ as pyo
from pyomo.core.util import prod as pyprod
import numpy as np

class RarePatternDetect:
    def __init__(self, training_set: np.array, min_area: float):
        self.training_set = training_set # a N x d matrix
        self.min_area = min_area # the smallest allowed area
        self.N, self.d = (range(x) for x in self.training_set.shape)
        self.model = self.create_model()

    def create_model(self):
        # define model

        model = pyo.ConcreteModel()

        # x is a 2d vector
        # TODO: Set domain
        model.pattern = pyo.Var(range(2), self.d, bounds=(np.min(self.training_set), np.max(self.training_set)))

        # define auxiliary variables
        model.interval_lengths = pyo.Var(self.d, within=pyo.NonNegativeReals)

        # y is a boolean vector of size N
        model.included = pyo.Var(self.N, within=pyo.Binary)

        # objective: f(x,y) = sum(y)/self.calculate_area(x)
        model.obj = pyo.Objective(expr=sum(model.included[i] for i in self.N)/pyprod(model.interval_lengths[i] for i in self.d))

        # constraints
        # 1: self.calculate_area(x) >= self.min_area
        model.area_constraint = pyo.Constraint(pyprod(model.interval_lengths[i] for i in self.d) >= self.min_area)

        # 2: enforce that training points included in y lie within the pattern and others don't
        # we want: for any j in range(N): if y[j] == 1 then pattern needs to include jth training point AND if y[j]==0 then it needs to EXclude it!
        model.include_constraint = pyo.ConstraintList()
        M = 100000
        for j in self.N:
            for i in self.d:
                model.include_constraint.add(
                    model.pattern[0,i] <= self.training_set[j,i] + M*(1 - model.included[j])
                )
                model.include_constraint.add(
                    model.pattern[1,i] >= self.training_set[j,i] - M*(1 - model.included[j])
                )


        # 3: interval lengths are differences of pattern points
        model.interval_constraint = pyo.ConstraintList()
        for i in self.d:
            model.interval_constraint.add(
                model.interval_lengths[i] == model.pattern[1,i] - model.pattern[0,i]
            )

        return model


    def add_point_to_model(self, point):
        # 2: point to be classified lies in pattern
        # x[i] <= point[i] <= x[i + d], for all i
        self.model.point_constraint = pyo.ConstraintList()
        for i in self.d:
            self.model.point_constraint.add(
                self.model.pattern[0,i] <= point[i]
            )
            self.model.point_constraint.add(
                point[i] <= self.model.pattern[1,i]
            )

    def classify(self, point_to_be_classified: np.array) -> bool:
        self.add_point_to_model(point_to_be_classified) # point to be classified is a 1 x d array
        return pyo.SolverFactory('mindtpy').solve(self.model, mip_solver='glpk', nlp_solver='ipopt')

In [9]:
from scipy.stats import multivariate_normal

training_points = multivariate_normal.rvs(size=(100,2))
point_to_be_classified = training_points[0]

In [10]:
rare_pattern_detect = RarePatternDetect(training_points, min_area=0.1)

In [11]:
results = rare_pattern_detect.classify(point_to_be_classified)

In [12]:
results

{'Problem': [{'Name': 'unknown', 'Lower bound': 0.0, 'Upper bound': 0.0, 'Number of objectives': 1, 'Number of constraints': 406, 'Number of variables': 106, 'Number of binary variables': 100, 'Number of integer variables': 0, 'Number of continuous variables': 6, 'Number of nonzeros': None, 'Sense': 'minimize', 'Number of disjunctions': 0}], 'Solver': [{'Name': 'MindtPyOA', 'Status': 'ok', 'Message': None, 'User time': None, 'System time': None, 'Wallclock time': None, 'Termination condition': 'optimal', 'Termination message': None, 'Timing': Bunch(Call after main solve = 1.1333031579852104e-05, Call after subproblem solve = 4.62500611320138e-06, OA cut generation = 0.006896167004015297, fixed subproblem = 0.2851669170195237, initialization = 0.3759493329562247, main = 0.035342207993380725, main loop = 0.47220312495483086, main_timer_start_time = 295781.234493625, total = 0.8797631249763072), 'Iterations': 1, 'Num infeasible nlp subproblem': 0, 'Best solution found time': 0.86871004197