In [36]:
import pyomo.environ as pyo
# from pyomo.core.util import prod
import numpy as np

class RarePatternDetect:
    def __init__(self, training_set: np.array, min_area: float):
        self.training_set = training_set # a N x d matrix
        self.min_area = min_area # the smallest allowed area
        self.N, self.d = (range(x) for x in self.training_set.shape)
        self.model = self.create_model()

    def create_model(self):
        def pattern_area():
            return pyo.prod(model.interval_lengths[i] for i in self.d)

        # define model
        model = pyo.ConcreteModel()

        ## variables

        # x is a 2d vector
        # TODO: Set domain
        model.pattern = pyo.Var(range(2), self.d, bounds=(np.min(self.training_set), np.max(self.training_set)))

        # define auxiliary variables
        model.interval_lengths = pyo.Var(self.d, within=pyo.NonNegativeReals)

        # y is a boolean vector of size N
        model.included = pyo.Var(self.N, within=pyo.Binary)

        # objective (minimised by default)
        model.obj = pyo.Objective(expr=sum(model.included[i] for i in self.N)/pattern_area())

        ## constraints

        # pattern area needs to exceed min_area
        model.area_constraint = pyo.Constraint(expr= pattern_area() >= self.min_area)

        # training points included in model.included lie within the pattern (NB: In principle we would need to ensure that points not included are also
        # not included in model.included. However, since including points outside the pattern increases the objective, this is covered.)
        model.include_constraint = pyo.ConstraintList()
        M = 100000
        for j in self.N:
            for i in self.d:
                model.include_constraint.add(
                    model.pattern[0,i] <= self.training_set[j,i] + M*(1 - model.included[j])
                )
                model.include_constraint.add(
                    model.pattern[1,i] >= self.training_set[j,i] - M*(1 - model.included[j])
                )

        # connect auxiliary variables: interval lengths are differences of pattern points
        model.interval_constraint = pyo.ConstraintList()
        for i in self.d:
            model.interval_constraint.add(
                model.interval_lengths[i] == model.pattern[1,i] - model.pattern[0,i]
            )

        return model


    def add_point_to_model(self, point):
        # point to be classified lies in pattern
        # x[i] <= point[i] <= x[i + d], for all i
        self.model.point_constraint = pyo.ConstraintList()
        for i in self.d:
            self.model.point_constraint.add(
                self.model.pattern[0,i] <= point[i]
            )
            self.model.point_constraint.add(
                point[i] <= self.model.pattern[1,i]
            )

    def classify(self, point_to_be_classified: np.array) -> bool:
        self.add_point_to_model(point_to_be_classified) # point to be classified is a 1 x d array
        return pyo.SolverFactory('mindtpy').solve(self.model, mip_solver='glpk', nlp_solver='ipopt')

In [37]:
from scipy.stats import multivariate_normal

training_points = multivariate_normal.rvs(size=(100,2))
point_to_be_classified = training_points[0]

In [38]:
rare_pattern_detect = RarePatternDetect(training_points, min_area=0.1)

In [39]:
results = rare_pattern_detect.classify(point_to_be_classified)

In [40]:
results

{'Problem': [{'Name': 'unknown', 'Lower bound': 2.6823208250489807e-07, 'Upper bound': -7.494096406374967e-09, 'Number of objectives': 1, 'Number of constraints': 407, 'Number of variables': 106, 'Number of binary variables': 100, 'Number of integer variables': 0, 'Number of continuous variables': 6, 'Number of nonzeros': None, 'Sense': 'minimize', 'Number of disjunctions': 0}], 'Solver': [{'Name': 'MindtPyOA', 'Status': 'ok', 'Message': None, 'User time': None, 'System time': None, 'Wallclock time': None, 'Termination condition': 'optimal', 'Termination message': None, 'Timing': Bunch(Call after main solve = 1.3582990504801273e-05, Call after subproblem solve = 3.4999684430658817e-06, OA cut generation = 0.011477167019620538, fixed subproblem = 0.28386662498814985, initialization = 0.6649985000258312, main = 0.057881458022166044, main loop = 0.43052479199832305, main_timer_start_time = 298657.088870833, total = 1.1410782090388238), 'Iterations': 1, 'Num infeasible nlp subproblem': 0, 