In [1]:
def comp(f,g):
    return lambda *x: f(g(*x))

import re, math
minutia_regexp = re.compile(
    "^(?P<id>[0-9]+):"              +   # the integer identifier of the detected minutia
     "(?P<mx>[0-9]+),"              +   # the x-pixel coordinate of the detected minutia
     "(?P<my>[0-9]+):"              +   # the y-pixel coordinate of the detected minutia
     "(?P<dir>[0-9]+):"             +   # the direction of the detected minutia (0:-31) == (0:-360) clockwise
     "(?P<rel>(0\.[0-9]+)|(1\.0)):" +   # the reliability measure assigned to the detected minutia
     "(?P<typ>(RIG)|(BIF)):"        +   # the type of the detected minutia
     "(?P<ftyp>(APP)|(DIS)):"       +   # the type of feature detected
     "(?P<fn>[0-9]+)"               +   # the integer identifier of the type of feature detected
     "(:(?P<neighbours>([0-9]+,[0-9]+;[0-9]+:?)*))?$") # neighbouring minutia

neighbour_regexp = re.compile(
     "^(?P<mx>[0-9]+),"             +  # the x-pixel coordinate of the neighbouring minutia
      "(?P<my>[0-9]+);"             +  # the y-pixel coordinate of the neighbouring minutia
      "(?P<rc>[0-9]+)$")               # the ridge count calculated between the detected minutia and its first neighbor

def toNumberIfPossible(string):
    try:
        return int(string)
    except ValueError:
        try:
            return float(string)
        except ValueError:
            return string

def parseMinutia(string):
    string = ''.join(list(x for x in string if x != ' '))
    m = re.match(minutia_regexp, string)
    if (m is None):
        raise Exception("Does not parse")
    res = {key:toNumberIfPossible(m.group(key)) for key in ["id","mx","my","dir","rel","typ","ftyp","fn"]}
    res["neighbours"] = []
    neighbours = m.group("neighbours")
    if neighbours:
        neighbours = neighbours.split(":")
        for neighbour in neighbours:
            ren = re.match(neighbour_regexp, neighbour)
            res["neighbours"].append({key:toNumberIfPossible(ren.group(key)) for key in ["mx","my","rc"]})
    return res
    
def parseFilename(filename):
    res = None
    with open(filename) as f:
        res = {(mx,my): m 
               for (mx,my,m) 
               in map(
                    comp(lambda x: (x["mx"], x["my"], x), parseMinutia),
                    list(f)[4:]
                )
        }
    for minutia in res.values():
        for n in minutia["neighbours"]:
            n["dir"] = res[(n["mx"],n["my"])]["dir"]
    return list(res.values())
    
        

In [2]:
a = parseMinutia("0 :   14,  366 :  5 :  0.063 :BIF : APP :  3 :   48, 338;  0 :   79, 334;  0 :   69, 350;  0 :   61, 361;  1 :   58, 403;  3")
a

{'dir': 5,
 'fn': 3,
 'ftyp': 'APP',
 'id': 0,
 'mx': 14,
 'my': 366,
 'neighbours': [{'mx': 48, 'my': 338, 'rc': 0},
  {'mx': 79, 'my': 334, 'rc': 0},
  {'mx': 69, 'my': 350, 'rc': 0},
  {'mx': 61, 'my': 361, 'rc': 1},
  {'mx': 58, 'my': 403, 'rc': 3}],
 'rel': 0.063,
 'typ': 'BIF'}

In [3]:
def rotate(vector, steps): # 32 steps in 360 degrees
    x = vector[0]*math.cos(steps*math.pi / 16) - vector[1]*math.sin(steps*math.pi / 16)
    y = vector[0]*math.sin(steps*math.pi / 16) + vector[1]*math.cos(steps*math.pi / 16)
    return (x,y)

def length(vector):
    return (vector[0]**2+vector[1]**2)**0.5

def angle(vec1, vec2):
    dotproduct = vec1[0]*vec2[0] + vec1[1]*vec2[1]
    return dotproduct / (length(vec1)*length(vec2))

In [4]:
def fingers():
    dir = r"mindtct/shelved_pairs_of_fingers/"
    import os, shelve
    files = [x for x in os.listdir(dir) if x.endswith(".dat")]
    for file in files:
        try:
            d = shelve.open(dir+file[:-len(".dat")], flag='r')
            yield d["finger"]
            d.close()
        except:
            continue

In [5]:
cached_fingers = []
for finger, _ in zip(fingers(), range(100)):
    cached_fingers.append(finger)

Exception ignored in: <generator object fingers at 0x00000283DE2E0FC0>
RuntimeError: generator ignored GeneratorExit


In [145]:
class Matcher:
    NotSimilarAtAll = 10**10
    
    def __init__(self, **params):
        self.N = params["N"]
        self.OptValue = params["OptValue"]
        self.MinDissimilarity = params["MinDissimilarity"]  # MUST be stricter than OptValue
        self.thresholds = params["thresholds"]
        self.weights = params["weights"]
        self.finalMatchThreshold= params["finalMatchThreshold"]
        self.minutiaProcessed = 5
        self.min_threshold = 0
        
        self.comparisons = 0
        
    def angle(self, vec1, vec2):
        from math import atan2, pi
        dotproduct = vec1[0]*vec2[0] + vec1[1]*vec2[1]
        determinant= vec1[0]*vec2[1] - vec1[1]*vec2[0]
        return int(round(((atan2(determinant, dotproduct) + pi) * 16)) // pi)

    def diff(self, P, N):
        return ((P["mx"]-N["mx"]), (P["my"]-N["my"]))
            
    def rotate(self, vector, steps): # 32 steps in 360 degrees
        x = vector[0]*math.cos(steps*math.pi / 16) - vector[1]*math.sin(steps*math.pi / 16)
        y = vector[0]*math.sin(steps*math.pi / 16) + vector[1]*math.cos(steps*math.pi / 16)
        return (x,y)
       
    def euclidian_distance(self, P,N):
        D  =  self.diff(P,N)
        return (D[0]**2+D[1]**2)**0.5
    
    def distance_relative_angle(self, P,N):
        up = (0, 1)
        Pdir = self.rotate(up,P["dir"])
        D = self.diff(P,N)
        #print(Pdir)
        #print(D)
        return self.angle((-D[0],-D[1]), Pdir)
    
    def orientation_relative_angle(self, P,N):
        #up = (0, 1)
        #Pdir = self.rotate(up, P["dir"])
        #Ndir = self.rotate(up, N["dir"])
        #return self.angle(Pdir, Ndir) # maybe inaccurate
        return (P["dir"] - N["dir"]) % 32
        
    def ridge_count(self, P,N):
        return N["rc"]
    
    def bounding_box(self,diffs):
        if not all(
            x < y for (x,y) in zip(diffs, self.thresholds)
        ):
            #print ("Out of the box:", diffs)
            return False
        return (x / y for (x,y) in zip(diffs, self.thresholds))
    
    def find_params(self, minutia, neigh, neigh_details):
        from math import hypot
        m = {}
        d = self.diff(minutia, neigh_details)
        m["ed"]  = round(hypot(d[0], d[1]) * 256) % 2**16
        m["dra"] = self.distance_relative_angle(minutia, neigh_details)
        m["ora"] = self.orientation_relative_angle(minutia, neigh_details)
        m["rc"]  = neigh["rc"]
        return m
    
    def prepare_minutia(self, minutia_list):
        hoods = []
        for minutia in minutia_list:
            hood = []
            for neigh in minutia['neighbours']:
                for minutia_n in minutia_list:
                    if (neigh['mx'] == minutia_n['mx'] and neigh['my'] == minutia_n['my'] and minutia_n["rel"] > self.min_threshold):
                        hood.append(self.find_params(minutia, neigh, minutia_n))
                        break
            minut = {}
            minut["min"] = hood
            minut["rel"] = minutia['rel']
            minut["id"] = minutia['id']
            hoods.append(minut)
        return sorted(hoods, key = lambda x: x['rel'], reverse = True)
                        
    def calc_dissimilarity(self, neigh_1, neigh_2):
        EdDiff = abs(neigh_2["ed"]-neigh_1["ed"])
        DraDiff= min((neigh_2["dra"]-neigh_1["dra"]) % 32, (neigh_1["dra"]-neigh_2["dra"]) % 32)
        OraDiff= min((neigh_2["ora"]-neigh_1["ora"]) % 32, (neigh_1["ora"]-neigh_2["ora"]) % 32)
        RcDiff = abs(neigh_2["rc"]-neigh_1["rc"])
        
        diffs = (EdDiff, DraDiff, OraDiff, RcDiff)
        normalized = self.bounding_box(diffs)
        if not normalized:
            return False
        
        weighted_diffs = list(x*y for (x,y) in zip(normalized, self.weights))
        #print(neigh_1)
        #print(neigh_2)
        #print(weighted_diffs, sum(weighted_diffs))
        return sum(weighted_diffs)
    
    def match_minutae(self, min_c, reference):
        for min_r in reference:
            MinutaeDiss = 0
            neigh_matches = 0
            matched_neigh = []
            for neigh_r in min_r["min"]:
                minDiss = {"diss":100, "neigh":""}
                for neigh_c in min_c["min"]:
                    if neigh_c in matched_neigh:
                        continue
                    diss = self.calc_dissimilarity(neigh_r, neigh_c)
                    if not diss:
                        continue
                    if diss < minDiss["diss"]:
                        minDiss["diss"] = diss
                        minDiss["neigh"] = neigh_c
                if minDiss["diss"] < 100:
                    matched_neigh.append(minDiss["neigh"])
                    MinutaeDiss += minDiss["diss"]
                    neigh_matches += 1
                if neigh_matches == self.N:
                    ret = {}
                    ret["minDiss"] = MinutaeDiss / self.N
                    ret["N"] = neigh_matches
                    min_r["min"] = []
                    print("MATCH!", ret)
                    return ret
            #if(neigh_matches > 0):
                #print(neigh_matches, MinutaeDiss)
        return False    
                
    
    def test_match(self, reference, candidate):
        globalDiss = 0
        MinutaeMatches = 0
        MatchCost = 0
        for min_c in candidate:
            match = self.match_minutae(min_c, reference)
            if not match:
                continue
            MinutaeMatches += 1
            #print(match)
            MatchCost += match["minDiss"]
            LastMatch = match["minDiss"]
            if self.stoppingConditions(MatchCost, LastMatch, MinutaeMatches):
                return True
        if MinutaeMatches > self.N:
            print("FINAL SHOWDOWN:", MatchCost, MinutaeMatches, MatchCost/MinutaeMatches)
            return MatchCost/MinutaeMatches < self.finalMatchThreshold
        else:
            return False
        
    def test_match_perf(self, reference, candidate):
        globalDiss = 0
        MinutaeMatches = 0
        MatchCost = 0
        for min_c in candidate[0:10]:
            match = self.match_minutae(min_c, reference[0:20])
            if not match:
                continue
            MinutaeMatches += 1
            #print(match)
            MatchCost += match["minDiss"]
            LastMatch = match["minDiss"]
            if self.stoppingConditions(MatchCost, LastMatch, MinutaeMatches):
                return True
        if MinutaeMatches > 1:
            print("FINAL SHOWDOWN:", MatchCost, MinutaeMatches, MatchCost/MinutaeMatches)
            return MatchCost/MinutaeMatches < self.finalMatchThreshold
        else:
            return False
                        
    
    def match_neighbours(self,p1,p2,n1,n2):
        """print("Parent1: {}\n Neighbour1: {}\n Parent2: {}\n Neighbour2: {}". format(
            str({x:y for (x,y) in p1.items() if x!="neighbours"}),
            str({x:y for (x,y) in n1.items() if x!="neighbours"}),
            str({x:y for (x,y) in p2.items() if x!="neighbours"}),
            str({x:y for (x,y) in n2.items() if x!="neighbours"}),
        ))"""
        self.comparisons += 1
        Ed1 = self.euclidian_distance(p1,n1)
        Ed2 = self.euclidian_distance(p2,n2)
        Dra1= self.distance_relative_angle(p1,n1)
        Dra2= self.distance_relative_angle(p2,n2)
        Ora1= self.orientation_relative_angle(p1,n1)
        Ora2= self.orientation_relative_angle(p2,n2)
        Rc1 = self.ridge_count(p1,n1)
        Rc2 = self.ridge_count(p2,n2)
        
        EdDiff = abs(Ed2-Ed1)
        DraDiff= abs(Dra2-Dra1)
        OraDiff= abs(Ora2-Ora1)
        RcDiff = abs(Rc2 - Rc1)
        
        diffs = (EdDiff, DraDiff, OraDiff, RcDiff)
        
        normalized = self.bounding_box(diffs)
        if not normalized:
            return Matcher.NotSimilarAtAll
        
        weighted_diffs = list(x*y for (x,y) in zip(normalized, self.weights))
        #print("Weighted", weighted_diffs)
        
        return sum(weighted_diffs)
        
    def match_minutia(self,min1, min2):
        #print("1: {} neighs, 2: {} neighs".format(len(min1["neighbours"]),len(min2["neighbours"])))
        matchedIs = []
        matchedJs = []
        totalDissimilarity = 0
        neighboursMatched = 0
        for iindex, I in enumerate(min1["neighbours"]):
            if iindex in matchedIs: continue
            mostSimilarIndex = None
            mostSimilarDissimilarity = None
            for jindex, J in enumerate(min2["neighbours"]):
                if jindex in matchedJs: continue
                dissimilarity = self.match_neighbours(min1,min2,I,J)
                if (dissimilarity is False):
                    continue
                if (mostSimilarDissimilarity is None) or (mostSimilarDissimilarity > dissimilarity):
                    mostSimilarIndex = jindex
                    mostSimilarDissimilarity = dissimilarity
            if mostSimilarDissimilarity is Matcher.NotSimilarAtAll:
                return Matcher.NotSimilarAtAll
            if (mostSimilarIndex is None):
                continue
            matchedIs.append(iindex)
            matchedJs.append(mostSimilarIndex)
            totalDissimilarity += (mostSimilarDissimilarity)
            neighboursMatched += 1
            #print ("Matched {} with {}".format(iindex, mostSimilarIndex))
            if (neighboursMatched >= self.N):
                return totalDissimilarity / neighboursMatched
            return Matcher.NotSimilarAtAll
                
    
    def filter_minutia(self,finger):
        import itertools
        return list(itertools.islice(sorted(finger, key=lambda x: -x["rel"]) ,self.minutiaProcessed))
    
    def stoppingConditions(self,MatchCost,LastDissimilarity,TotalMatched):
        return any([
            MatchCost / TotalMatched < self.OptValue,
            LastDissimilarity        < self.MinDissimilarity,
        ])
        
    def __call__(self,candidate, reference):
        candidate = self.filter_minutia(candidate)
        reference = self.filter_minutia(reference)
        matchedCs = []
        matchedRs = []
        totalDissimilarity = 0
        minutiaeMatched = 0
        for cindex, C in enumerate(candidate):
            if cindex in matchedCs: continue
            mostSimilarIndex = None
            mostSimilarDissimilarity = None
            for rindex, R in enumerate(reference):
                if rindex in matchedRs: continue
                dissimilarity = self.match_minutia(C,R)
                #print("Returned: {}".format(dissimilarity))
                if (mostSimilarDissimilarity is None) or (mostSimilarDissimilarity > dissimilarity):
                    mostSimilarIndex = rindex
                    mostSimilarDissimilarity = dissimilarity
            if mostSimilarDissimilarity is Matcher.NotSimilarAtAll:
                #print ("Total mismatch")
                #print("COMPARISONS", self.comparisons)
                return False
            matchedCs.append(cindex)
            matchedRs.append(mostSimilarIndex)
            totalDissimilarity += (mostSimilarDissimilarity)
            minutiaeMatched += 1
            #print("Matched {} with {} at dissimilarity {}".format(cindex,mostSimilarIndex,mostSimilarDissimilarity))
            if self.stoppingConditions(totalDissimilarity, mostSimilarDissimilarity, minutiaeMatched):
                #print("COMPARISONS", self.comparisons)
                return True
        #print ("Total dissimilarity: {}".format(totalDissimilarity))
        #print("COMPARISONS", self.comparisons)
        return (totalDissimilarity / minutiaeMatched < self.finalMatchThreshold)

In [146]:
import ParseFingers as PF

testFingerSame1 = PF.parseFilename(r"mindtct/minutie/f0002_05.png.min")
testFingerSame2 = PF.parseFilename(r"mindtct/minutie/s0002_05.png.min")
testFingerDiff1 = PF.parseFilename(r"mindtct/minutie/f0002_05.png.min")
testFingerDiff2 = PF.parseFilename(r"mindtct/minutie/s0001_01.png.min")

test = {'MinDissimilarity': 0.125,
 'N': 3.0,
 'OptValue': 0.215,
 'finalMatchThreshold': 0.4,
 'thresholds': [1152, #ED
  2, #DRA
  2, #ORA
  1], #RC
 'weights': (lambda l: [x/sum(l) for x in l]) ([5,2,2,10]),
  #'weights': [0.25, 0.25, 0.25, 0.25],
}

match = Matcher(**test)
#print(testFinger)
a = match.prepare_minutia(testFingerSame1)
b = match.prepare_minutia(testFingerSame2)
c = match.prepare_minutia(testFingerDiff1)
d = match.prepare_minutia(testFingerDiff2)
print(match.test_match_perf(a, b))
print(match.test_match_perf(c, d))

#match(testFinger, testFinger2)

MATCH! {'minDiss': 0.24549220272904482, 'N': 3}
False
False


In [133]:
from pathlib import Path
p = Path('mindtct/minutie')
finger_one = list(p.glob('f*.min'))
finger_two = list(p.glob('s*.min'))
pairs = []
for f in finger_one[0:50]:
    for s in finger_two[0:50]:
        pair = {}
        a = match.prepare_minutia(PF.parseFilename(str(f)))
        b = match.prepare_minutia(PF.parseFilename(str(s)))
        pair["correct"] = s.stem[1:] == f.stem[1:]
        pair["left"] = a
        pair["right"] = b
        pair["l_name"] = f
        pair["r_name"] = s
        pairs.append(pair)


In [143]:
print(len(pairs))
test_pairs = deepcopy(pairs)

2500


In [147]:
from copy import deepcopy
tp = 0
tn = 0
fp = 0
fn = 0
for p in test_pairs:
    res = match.test_match_perf(p["left"], p["right"])
    
    if res and p["correct"]:
        tp+=1
        print("------------------")
    elif res and not p["correct"]:
        print(p["l_name"], p["r_name"])
        fp+=1
        print("------------------")
    elif not res and p["correct"]:
        fn+=1
    elif not res and not p["correct"]:
        tn+=1
print("TP\tFP\tTN\tFN")
print(tp, fp, tn, fn, sep = '\t')

MATCH! {'minDiss': 0.24549220272904482, 'N': 3}
MATCH! {'minDiss': 0.2423702485380117, 'N': 3}
MATCH! {'minDiss': 0.12324865984405457, 'N': 3}
mindtct\minutie\f0004_05.png.min mindtct\minutie\s0039_03.png.min
------------------
MATCH! {'minDiss': 0.034722222222222224, 'N': 3}
------------------
MATCH! {'minDiss': 0.1291727582846004, 'N': 3}
------------------
MATCH! {'minDiss': 0.16436708089668614, 'N': 3}
------------------
MATCH! {'minDiss': 0.26958455165692, 'N': 3}
MATCH! {'minDiss': 0.21302387914230014, 'N': 3}
MATCH! {'minDiss': 0.19960709064327484, 'N': 3}
MATCH! {'minDiss': 0.2604471247563353, 'N': 3}
FINAL SHOWDOWN: 0.9426626461988303 4 0.23566566154970758
------------------
MATCH! {'minDiss': 0.22217653508771928, 'N': 3}
MATCH! {'minDiss': 0.053332115009746585, 'N': 3}
------------------
MATCH! {'minDiss': 0.028173732943469785, 'N': 3}
------------------
MATCH! {'minDiss': 0.22642543859649122, 'N': 3}
MATCH! {'minDiss': 0.23751218323586745, 'N': 3}
MATCH! {'minDiss': 0.149229

In [64]:
def test_batch():
    def cached():
        for finer in cached_fingers:
            yield finer
    gen = cached() # fingers()
    while True:
        valid1 = next(gen)
        valid2 = next(gen)
        switch1= next(gen)
        switch2= next(gen)
        yield [(valid1, True), (valid2, True)] + [(x,False) for x in list(zip(switch1, switch2))]

In [9]:
params = {
    "N": 3,
    "OptValue": 2,
    "MinDissimilarity": 0.5,
    "thresholds": [150,6,5,5],
    "weights": (lambda l: [x/sum(l) for x in l]) ([1,1,1,1]),
    "finalMatchThreshold": 10,
}

m = Matcher(**params)
#m(*next(fingers()))

In [66]:
tn = 0
tp = 0
fn = 0
fp = 0
def test_matcher(matcher, points=100):
    global tn,tp,fn,fp
    score = 0
    potential_score = 0
    for x,_ in zip(test_batch(), range(points)):
        for pair, value in x:
            res = matcher(*pair)
            if (res,value) == (False,False):
                tn+=1
            elif (res,value) == (False,True):
                fn+=1
            elif (res,value) == (True,False):
                fp+=1
            elif (res,value) == (True,True):
                tp+=1
            if (res==value): score+=1
            potential_score +=1
    return score
test_matcher(match)

  if __name__ == '__main__':


50

In [11]:
points = [] # (FP, FN)

In [12]:
len(points)

0

In [70]:
positive_matches = []
negative_matches = []

In [14]:
positive_matches[2]

IndexError: list index out of range

In [67]:
def test_matcher(matcher, points=100):
    tn,tp,fn,fp = 0,0,0,0
    for x,_ in zip(test_batch(), range(points)):
        for pair, value in x:
            res = matcher(*pair)
            if (res,value) == (False,False):
                tn+=1
            elif (res,value) == (False,True):
                fn+=1
            elif (res,value) == (True,False):
                fp+=1
            elif (res,value) == (True,True):
                tp+=1
    return (fp,fn)

In [68]:
def test_performance(matcher, points=100):
    positive_matches, negative_matches = [], []
    for x,_ in zip(test_batch(), range(points)):
        for pair, value in x:
            res = matcher(*pair)
            if res == True:
                positive_matches.append(matcher.comparisons)
            else:
                negative_matches.append(matcher.comparisons)
            matcher.comparisons = 0
    print (sum(positive_matches) / len(positive_matches), sum(negative_matches) / len(negative_matches))
    return positive_matches, negative_matches

In [71]:
test_performance(match)

  This is separate from the ipykernel package so we can avoid doing imports until


ZeroDivisionError: division by zero

In [17]:
def collect_randomized_data():
    global points, positive_matches, negative_matches
    import random
    while (True):
        res = dict()
        res ["N"]                   = random.randint (1,4)
        res ["OptValue"]            = random.uniform (0.3, 4)
        res ["MinDissimilarity"]    = random.uniform(0,res ["OptValue"])
        res ["thresholds"]          = [
                                        random.uniform(20,40),
                                        random.uniform(8,20),
                                        random.uniform(0.5,2),
                                        2
                                        ]
        res ["weights"]             = (lambda l: [x/sum(l) for x in l]) ([10,1,1,8])
        res ["finalMatchThreshold"] = max(res ["OptValue"], random.uniform(4,12))
        
        matcher = Matcher(**res)
        p,n = test_performance(matcher)
        positive_matches.append(p)
        negative_matches.append(n)
        #print(res)
        #points.append(res)
    
collect_randomized_data()

  This is separate from the ipykernel package so we can avoid doing imports until


24.792207792207794 24.608695652173914
38.735849056603776 29.340425531914892
24.789473684210527 24.625
24.819444444444443 24.571428571428573
46.121212121212125 33.776119402985074
45.15151515151515 33.208955223880594
38.056603773584904 29.25531914893617
38.6 29.377777777777776
53.0 35.28260869565217
24.83116883116883 24.47826086956522
44.27272727272727 33.1025641025641
24.82191780821918 24.555555555555557
38.476190476190474 28.862068965517242
45.592592592592595 32.97260273972603
45.529411764705884 33.43939393939394
49.54545454545455 35.1123595505618
51.07692307692308 35.28735632183908
24.83116883116883 24.47826086956522
51.083333333333336 35.03409090909091
38.61538461538461 29.333333333333332
52.57142857142857 33.655913978494624
45.607142857142854 33.31944444444444
38.77777777777778 29.717391304347824
38.86363636363637 28.857142857142858
50.9 34.22222222222222
50.666666666666664 36.71764705882353
24.82191780821918 24.555555555555557
24.82191780821918 24.555555555555557
24.82894736842105 

38.666666666666664 29.06896551724138
44.82142857142857 33.388888888888886
42.8421052631579 31.419753086419753
38.886792452830186 29.70212765957447
50.81818181818182 35.38202247191011
38.509433962264154 29.340425531914892
24.826666666666668 24.52
24.819444444444443 24.571428571428573
50.142857142857146 35.54651162790697
38.28 28.84
24.826666666666668 24.52
51.0625 36.92857142857143
24.794871794871796 24.59090909090909
38.18181818181818 29.357142857142858
45.27272727272727 33.417910447761194
51.15384615384615 36.94252873563219
38.24444444444445 29.363636363636363
24.82191780821918 24.555555555555557
49.7 35.166666666666664
45.73529411764706 33.833333333333336
38.6 28.92
45.294117647058826 33.81818181818182
48.34615384615385 38.351351351351354
45.32258064516129 34.17391304347826
38.72093023255814 28.649122807017545
38.57142857142857 29.75
45.34782608695652 33.03896103896104
38.75 29.333333333333332
37.916666666666664 29.692307692307693
45.80769230769231 32.82432432432432
38.85185185185185

52.57142857142857 34.1505376344086
24.816901408450704 24.586206896551722
38.509433962264154 29.51063829787234
49.76923076923077 35.59770114942529
50.0 35.666666666666664
24.82894736842105 24.5
44.25 32.1625
50.875 34.619565217391305
49.76923076923077 35.47126436781609
38.7037037037037 29.630434782608695
24.826666666666668 24.52
43.0 31.410714285714285
24.82894736842105 24.5
38.509433962264154 29.340425531914892
45.529411764705884 33.54545454545455
51.375 37.36904761904762
25.31578947368421 26.291666666666668
45.21052631578947 33.074074074074076
44.85 32.275
50.285714285714285 33.365591397849464
24.819444444444443 24.571428571428573
38.609756097560975 29.135593220338983
47.029411764705884 34.07575757575758
24.833333333333332 24.454545454545453
50.5 33.91111111111111
38.81132075471698 29.70212765957447
45.73529411764706 33.833333333333336
49.27272727272727 35.29213483146067
45.75 34.0
44.958333333333336 32.78947368421053
45.36363636363637 33.205128205128204
39.24390243902439 31.627118644

44.90909090909091 32.96153846153846
45.125 33.078947368421055
45.60606060606061 33.38805970149254
45.0 33.11392405063291
49.54545454545455 34.943820224719104
50.9 34.733333333333334
37.816326530612244 29.705882352941178
45.75 33.953125
38.43396226415094 29.340425531914892
45.64705882352941 33.54545454545455
24.826666666666668 24.52
51.15384615384615 37.0
24.816901408450704 24.586206896551722
50.61538461538461 36.758620689655174
24.82191780821918 24.555555555555557
49.0 34.644444444444446
24.826666666666668 24.52
50.9 34.22222222222222
38.82608695652174 29.22222222222222
45.0 33.20253164556962
49.0 35.03333333333333
51.375 37.333333333333336
24.82894736842105 24.5
45.0 32.278481012658226
24.82894736842105 24.5
50.53846153846154 35.95402298850575
50.875 34.891304347826086
47.0 33.67142857142857
45.484848484848484 33.52238805970149
50.38461538461539 36.5632183908046
24.789473684210527 24.625
38.53191489361702 28.92452830188679
45.47826086956522 33.38961038961039
24.83116883116883 24.47826

50.4375 36.583333333333336
24.816901408450704 24.586206896551722
26.945205479452056 26.77777777777778
24.816901408450704 24.586206896551722
38.35294117647059 29.424242424242426
44.857142857142854 32.721518987341774
38.43396226415094 29.340425531914892
24.82894736842105 24.5
50.875 34.70652173913044
45.07142857142857 33.59722222222222
45.04347826086956 32.61038961038961
50.666666666666664 36.63529411764706
45.375 33.0
47.41935483870968 34.11594202898551
27.686567164179106 30.12121212121212
44.55 32.7375
24.83116883116883 24.47826086956522
45.464285714285715 33.166666666666664
45.86206896551724 33.63380281690141
24.82191780821918 24.555555555555557
49.0 34.98888888888889
38.68888888888889 29.145454545454545
24.82894736842105 24.5
24.83116883116883 24.47826086956522
38.87234042553192 29.07547169811321
50.875 34.72826086956522
24.83116883116883 24.47826086956522
50.416666666666664 35.01136363636363
38.63636363636363 28.857142857142858
43.84615384615385 39.45977011494253
45.73529411764706 3

51.0625 36.666666666666664
44.82608695652174 32.83116883116883
24.82894736842105 24.5
26.225352112676056 27.344827586206897
45.82857142857143 34.2
45.44117647058823 33.43939393939394
24.82191780821918 24.555555555555557
44.4 32.575
45.82857142857143 34.2
24.826666666666668 24.52
38.43396226415094 29.595744680851062
24.826666666666668 24.52
38.13513513513514 29.063492063492063
50.875 34.81521739130435
25.893333333333334 26.12
24.826666666666668 24.52
49.0 34.833333333333336
46.78260869565217 33.97402597402598
24.824324324324323 24.53846153846154
50.0 35.666666666666664
50.5 33.91111111111111
52.57142857142857 33.53763440860215
45.17391304347826 32.8051948051948
24.826666666666668 24.52
24.826666666666668 24.52
38.333333333333336 28.846153846153847
38.509433962264154 29.51063829787234
24.816901408450704 24.586206896551722
38.142857142857146 29.295454545454547
50.625 36.79761904761905
50.07692307692308 36.4367816091954
45.148148148148145 33.43835616438356
49.76923076923077 35.666666666666

50.666666666666664 36.37647058823529
45.56666666666667 33.371428571428574
24.819444444444443 24.571428571428573
50.666666666666664 36.35294117647059
38.43396226415094 29.340425531914892
46.205882352941174 33.95454545454545
38.87234042553192 29.150943396226417
38.52 28.84
38.72549019607843 29.0
50.0 35.87356321839081
38.55555555555556 29.434782608695652
43.75 31.9
45.60606060606061 33.38805970149254
50.86666666666667 36.423529411764704
24.794871794871796 24.59090909090909
24.816901408450704 24.586206896551722
38.04 29.56
46.11764705882353 34.07575757575758
45.56 33.4
24.826666666666668 24.52
24.816901408450704 24.586206896551722
45.32142857142857 33.763888888888886
26.58730158730159 30.91891891891892
37.125 29.21153846153846
38.23529411764706 29.424242424242426
50.4375 36.55952380952381
50.416666666666664 35.01136363636363
38.82608695652174 28.925925925925927
44.523809523809526 32.41772151898734
38.84 29.16
24.82894736842105 24.5
24.794871794871796 24.59090909090909
44.96666666666667 33

38.68888888888889 29.145454545454545
44.82608695652174 32.83116883116883
38.52 28.84
39.416666666666664 29.076923076923077
44.04761904761905 32.29113924050633
50.9 34.388888888888886
51.57142857142857 35.91860465116279
38.38095238095238 28.862068965517242
50.142857142857146 35.74418604651163
50.285714285714285 34.30107526881721
41.5 30.902439024390244
24.826666666666668 24.52
38.68888888888889 29.145454545454545
45.53333333333333 33.84285714285714
38.48717948717949 28.868852459016395
38.43396226415094 29.340425531914892
38.45 29.0
24.82191780821918 24.555555555555557
24.816901408450704 24.586206896551722
38.27272727272727 29.357142857142858
51.15384615384615 36.839080459770116
45.61764705882353 33.72727272727273
45.81818181818182 33.76119402985075
45.486486486486484 34.07936507936508
38.13636363636363 28.714285714285715
25.095890410958905 26.037037037037038
38.57142857142857 29.75
24.84285714285714 24.533333333333335
37.9811320754717 29.25531914893617
51.0625 36.79761904761905
50.08333

KeyboardInterrupt: 

### Okay, time for some MACHINE LEARNING

In [23]:
"""adam = {
    "N": 3,
    "OptValue": 3.372311099894547,
    "MinDissimilarity": 1.8,
    "thresholds": [43.819599475473886,40.813192675288384,3,1.5],
    "weights": (lambda l: [x/sum(l) for x in l]) ([5,1,2,3]),
    "finalMatchThreshold": 13.641991683405926,
}"""
adam = test = {'MinDissimilarity': 2.2306793596698347,
 'N': 2.0,
 'OptValue': 2.962895941182219,
 'finalMatchThreshold': 9.510819603577431,
 'thresholds': [39.978606828772456,
  13.72104482880197,
  1.03034591789699,
  2],
 'weights': (lambda l: [x/sum(l) for x in l]) ([10,1,1,8])}

class GeneticAlgo:
    def __init__(self):
        pass
    
    def mutate(self,sm):
        for x in range(10):
            sm=self._mutate(sm)
        return sm
    
    def _mutate(self,sm):
        import random
        res = dict()
        res ["N"]                   = max(1, sm["N"] + random.choice([-1,1,0,0,0,0,0]))
        res ["OptValue"]            = max(0.5, sm["OptValue"] + random.uniform(-0.8,0.8))
        res ["MinDissimilarity"]    = random.uniform(0,res ["OptValue"])
        res ["thresholds"]          = [x*random.uniform(0.7,1.3) for x in sm ["thresholds"]]
        res ["weights"]             = sm ["weights"]
        res ["finalMatchThreshold"] = max(res ["OptValue"], sm ["finalMatchThreshold"] * random.uniform(0.8,1.2))
        return res
    
    def crossbreed(self,sp1,sp2):
        res = dict()
        res ["N"]                   = (sp1["N"]+sp2["N"])/2
        res ["OptValue"]            = (sp1["OptValue"]+sp2["OptValue"])/2
        res ["MinDissimilarity"]    = (sp1["MinDissimilarity"]+sp2["MinDissimilarity"])/2
        res ["thresholds"]          = [(x+y)/2 for (x,y) in zip(sp1["thresholds"],sp2["thresholds"])]
        res ["weights"]             = sp1 ["weights"]
        res ["finalMatchThreshold"] = (sp1["finalMatchThreshold"]+sp2["finalMatchThreshold"])/2
        return res
    
    def of(self,sp1):
        return test_matcher(Matcher(**sp1))
    
    def random_popul(self, size):
        m = lambda x: self.mutate(x)
        return [m(m(m(adam))) for _ in range(size)]
    
    def extinct(self,popul,desired_popul):
        tested = [(x,self.of(x)) for x in popul]
        tested = sorted(tested, key=lambda x: x[1])
        outcome = [x[0] for x in tested[:desired_popul]]
        print([x[1] for x in tested[:desired_popul]])
        return outcome
    
    def multiply(self,popul):
        import itertools
        res = []
        for mom,dad in itertools.combinations(popul,2):
            if mom is not dad:
                res.append((self.mutate(self.crossbreed(mom,dad))))
        return popul+res
    
    def step(self,popul):
        res = self.extinct(popul,9)
        return self.multiply(res)
    
    def peek(self,popul):
        print([self.of(x) for x in popul])

In [24]:
best_candidate = None

In [25]:
alg = GeneticAlgo()
popul = alg.random_popul(45)
while (True):
    popul = alg.step(popul)
    best_candidate = popul[0]

  This is separate from the ipykernel package so we can avoid doing imports until


[(0, 47), (0, 50), (0, 50), (0, 50), (1, 47), (1, 48), (2, 46), (2, 47), (2, 47)]
[(0, 47), (0, 48), (0, 48), (0, 49), (0, 49), (0, 49), (0, 49), (0, 49), (0, 49)]
[(0, 47), (0, 47), (0, 48), (0, 48), (0, 48), (0, 48), (0, 48), (0, 48), (0, 49)]
[(0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 48), (0, 48)]
[(0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47)]
[(0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47)]
[(0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47)]
[(0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47)]
[(0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47)]
[(0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47)]
[(0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47)]
[(0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47), (0, 47)]
[(0, 47), (0, 47

KeyboardInterrupt: 

In [None]:
best_candidate

In [None]:
tp,tn,fp,fn

In [None]:
test = {'MinDissimilarity': 2.2306793596698347,
 'N': 2.0,
 'OptValue': 2.962895941182219,
 'finalMatchThreshold': 9.510819603577431,
 'thresholds': [39.978606828772456,
  13.72104482880197,
  1.03034591789699,
  2],
 'weights': (lambda l: [x/sum(l) for x in l]) ([10,1,1,8])}

In [None]:
GeneticAlgo().of(test)

In [None]:
tp,tn,fp,fn

In [None]:
tn = 0
tp = 0
fn = 0
fp = 0

In [None]:
import matplotlib.pyplot as plot

plot.scatter([x[0] for x in points], [x[1] for x in points])
plot.ylabel('FNR')
plot.xlabel('FPR')
plot.show()

In [None]:
import matplotlib.pyplot as plot

# positive graph
avgs_pos = [sum(x)/len(x) for x in positive_matches]
avgs_neg = [sum(x)/len(x) for x in negative_matches]
plot.scatter(avgs_pos, avgs_neg)
plot.ylabel('Positive response')
plot.xlabel('Negative response')
plot.show()