<h1> Candidate Elimination </h1>
<pre>
Initialize G to the set of maximally general hypotheses in H
Initial S to the set of maximally specific hypotheses in H
</pre>
<pre>
If d is a positive example {
    Remove from G any hypothesis inconsistent with d
    For each hypothesis s in S that is not consistent with d{
        Remove s from S
        Add to S all minimal generalizations h of such that{
            h is consistent with d, and some member of G is more general than h
        }
        Remove from S any hypothesis that is more general than another hypothesis in S
    }
}
</pre> 
<pre>
If d is a negative example {
    Remove from S any hypothesis inconsistent with d
    For each hypothesis g in G that is not consistent with d{
        Remove g from G
        Add to G all minimal specializations h of g such that {
            h is consistent with d and some member of S is more specific than h
        }
        Remove from G any hypothesis that is less general than another hypothesis in G.
    }
}
</pre>

In [1]:
def g_0(n):
    return ("?",)*n

def s_0(n):
    return ('0',)*n

In [16]:
def more_general(h1, h2):
    #h1 -> hypothesis h2-> example (in generic)
    more_general_parts = []
    for x, y in zip(h1, h2):
        mg = x == "?" or (x != "0" and (x == y or y == "0"))
        more_general_parts.append(mg)
    return all(more_general_parts)
more_general(['?', '?', '?', '?', '?', '?'], ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same'])

True

In [3]:

l1 = [1, 2, 3]
l2 = [3, 4, 5]

list(zip(l1, l2))

[(1, 3), (2, 4), (3, 5)]

In [4]:
# min_generalizations
def fulfills(example, hypothesis):
    # hypothesis -> g or s
    ### the implementation is the same as for hypotheses:
    return more_general(hypothesis, example)

def min_generalizations(h, x):
    h_new = list(h)
    for i in range(len(h)):
        if not fulfills(x[i:i+1], h[i:i+1]):
            h_new[i] = '?' if h[i] != '0' else x[i]
    return [tuple(h_new)]

In [5]:
min_generalizations(h=('0', '0'  , 'sunny'), 
                    x=('rainy', 'windy', 'cloudy'))

[('rainy', 'windy', '?')]

In [6]:
def min_specializations(h, domains, x):
    results = []
    for i in range(len(h)):
        if h[i] == "?":
            for val in domains[i]:
                if x[i] != val:
                    h_new = h[:i] + (val,) + h[i+1:]
                    results.append(h_new)
        elif h[i] != "0":
            h_new = h[:i] + ('0',) + h[i+1:]
            results.append(h_new)
    return results

In [7]:
min_specializations(h=('?', 'x',), 
                    domains=[['a', 'b', 'c'], ['x', 'y']], 
                    x=('b', 'x'))

[('a', 'x'), ('c', 'x'), ('?', '0')]

In [8]:
#dataset 
import csv

with open('enjoySport.csv')  as csvFile:
        examples = [tuple(line)[1:] for line in csv.reader(csvFile)]
examples=examples[1:] #examples[start:end:step]
examples

[('Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Y'),
 ('Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Y'),
 ('Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'N'),
 ('Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Y')]

In [9]:
#getting all the existing domains 
def get_domains(examples):
    count=0
    d = [set() for i in examples[0]] # set so as to not repeat the domainss __ d -> list of sets 
    for x in examples:
        count+=1
        # print('parent row', list(enumerate(x)))
        for i, xi in enumerate(x):
            d[i].add(xi)
            # print(d)
    print(d)
    return [list(sorted(x)) for x in d] # sorting alphabetically - list of list(2D)

get_domains(examples)

[{'Rainy', 'Sunny'}, {'Cold', 'Warm'}, {'Normal', 'High'}, {'Strong'}, {'Warm', 'Cool'}, {'Same', 'Change'}, {'N', 'Y'}]


[['Rainy', 'Sunny'],
 ['Cold', 'Warm'],
 ['High', 'Normal'],
 ['Strong'],
 ['Cool', 'Warm'],
 ['Change', 'Same'],
 ['N', 'Y']]

In [10]:
def generalize_S(x, G, S):
    S_prev = list(S)
    for s in S_prev:
        if s not in S:
            continue
        if not fulfills(x, s):
            S.remove(s)
            Splus = min_generalizations(s, x)
            ## keep only generalizations that have a counterpart in G
            S.update([h for h in Splus if any([more_general(g,h) for g in G])])
            ## remove hypotheses less specific than any other in S
            S.difference_update([h for h in S if any([more_general(h, h1) 
                                      for h1 in S if h != h1])])
    return S

In [11]:
def specialize_G(x, domains, G, S):
    G_prev = list(G)
    for g in G_prev:
        if g not in G:  
            continue
        if fulfills(x, g):
            G.remove(g)
            Gminus = min_specializations(g, domains, x)
            ## keep only specializations that have a conuterpart in S
            G.update([h for h in Gminus if any([more_general(h, s)
                                                for s in S])])
            ## remove hypotheses less general than any other in G
            G.difference_update([h for h in G if 
                                 any([more_general(g1, h) 
                                      for g1 in G if h != g1])])
    return G

In [15]:
#candidate elimination
domains = get_domains(examples)[:-1] # except for label(Y or N)

G = set([g_0(len(domains))])
S = set([s_0(len(domains))])
i=0
print(f'\n S[{i}]: {S}')
print(f' G[{i}]: {G}')


for xcx in examples:
    i=i+1
    xcx=list(xcx)
    x, cx = xcx[:-1], xcx[-1]  # Splitting data into attributes and decisions(Y or N)
    if cx=='Y': # x is positive example
        G = {g for g in G if fulfills(x, g)}
        for g in G:
            if fulfills(x, g):
                print('fulfills ',x,g)
        S = generalize_S(x, G, S)
    else: # x is negative example
        S = {s for s in S if not fulfills(x, s)}
        G = specialize_G(x, domains, G, S)
    print(f'\n S[{i}]: {S}')
    print(f' G[{i}]: {G}')

[{'Rainy', 'Sunny'}, {'Cold', 'Warm'}, {'Normal', 'High'}, {'Strong'}, {'Warm', 'Cool'}, {'Same', 'Change'}, {'N', 'Y'}]

 S[0]: {('0', '0', '0', '0', '0', '0')}
 G[0]: {('?', '?', '?', '?', '?', '?')}
fulfills  ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same'] ('?', '?', '?', '?', '?', '?')

 S[1]: {('Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same')}
 G[1]: {('?', '?', '?', '?', '?', '?')}
fulfills  ['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same'] ('?', '?', '?', '?', '?', '?')

 S[2]: {('Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same')}
 G[2]: {('?', '?', '?', '?', '?', '?')}

 S[3]: {('Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same')}
 G[3]: {('?', 'Warm', '?', '?', '?', '?'), ('Sunny', '?', '?', '?', '?', '?'), ('?', '?', '?', '?', '?', 'Same')}
fulfills  ['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change'] ('?', 'Warm', '?', '?', '?', '?')
fulfills  ['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change'] ('Sunny', '?', '?', '?', '?', '?')

 S[4]: {('Sunny', 'Warm', '?', 'Stro