In [27]:
from collections import defaultdict
from math import log

def train(samples):
    classes, freq = defaultdict(lambda:0), defaultdict(lambda:0)
    for feats, label in samples:
        classes[label] += 1                 # count classes frequencies
        for feat in feats:
            freq[label, feat] += 1          # count features frequencies

    for label, feat in freq:                # normalize features frequencies
        freq[label, feat] /= classes[label]
    for c in classes:                       # normalize classes frequencies
        classes[c] /= len(samples)
    print(classes, freq)
    return classes, freq                    # return P(C) and P(O|C)

def classify(classifier, feats):
    classes, prob = classifier
    return min(classes.keys(),              # calculate argmin(-log(P(C|O))) -> argmax(P(C|O))
        key = lambda cl: -log(classes[cl]) + \
            sum(-log(prob.get((cl,feat))) for feat in feats))

def get_features(sample): return (sample[-1]) # get last letter

samples = (("Anna", "F"), ("Michaelq", "M"), ("Jane", "F"), ("Ilya", "M"), ("Anthony", "M"))
features = [(get_features(feat), label) for feat, label in samples]
classifier = train(features)

defaultdict(<function train.<locals>.<lambda> at 0x13545cb80>, {'F': 0.4, 'M': 0.6}) defaultdict(<function train.<locals>.<lambda> at 0x1355f0860>, {('F', 'a'): 0.5, ('M', 'q'): 0.3333333333333333, ('F', 'e'): 0.5, ('M', 'a'): 0.3333333333333333, ('M', 'y'): 0.3333333333333333})


In [28]:
classifier

(defaultdict(<function __main__.train.<locals>.<lambda>()>,
             {'F': 0.4, 'M': 0.6}),
 defaultdict(<function __main__.train.<locals>.<lambda>()>,
             {('F', 'a'): 0.5,
              ('M', 'q'): 0.3333333333333333,
              ('F', 'e'): 0.5,
              ('M', 'a'): 0.3333333333333333,
              ('M', 'y'): 0.3333333333333333}))

In [29]:
print('gender: ', classify(classifier, get_features(u'Milana')))

gender:  F
