<h1> Candidate Elimination </h1>
<pre>
Initialize G to the set of maximally general hypotheses in H
Initial S to the set of maximally specific hypotheses in H
</pre>
<pre>
If d is a positive example {
    Remove from G any hypothesis inconsistent with d
    For each hypothesis s in S that is not consistent with d{
        Remove s from S
        Add to S all minimal generalizations h of such that{
            h is consistent with d, and some member of G is more general than h
        }
        Remove from S any hypothesis that is more general than another hypothesis in S
    }
}
</pre> 
<pre>
If d is a negative example {
    Remove from S any hypothesis inconsistent with d
    For each hypothesis g in G that is not consistent with d{
        Remove g from G
        Add to G all minimal specializations h of g such that {
            h is consistent with d and some member of S is more specific than h
        }
        Remove from G any hypothesis that is less general than another hypothesis in G.
    }
}
</pre>

<b>with</b> is used for resource management and exception handling

In [1]:
#dataset 
import csv

with open('enjoySport.csv')  as csvFile:
        examples = [tuple(line)[1:] for line in csv.reader(csvFile)] # this [1:] is to remove the indices of rows, 0, 1, 2, .. 
examples=examples[1:] #examples[start:end:step] this [1:] is to remove the heading -> Sky, AirTemp, Humidity, Wind, etc. 
examples

[('Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Y'),
 ('Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Y'),
 ('Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'N'),
 ('Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'Y')]

In [2]:
#getting all the existing domains 
def get_domains(examples):
    d = [set() for i in examples[0]] # set so as to not repeat the domains(values) d -> list of sets 
    for x in examples:
        for i, xi in enumerate(x):
            d[i].add(xi)
    print(d)
    return [list(sorted(x)) for x in d] # sorting alphabetically - list of list(2D)

get_domains(examples)

[{'Sunny', 'Rainy'}, {'Cold', 'Warm'}, {'High', 'Normal'}, {'Strong'}, {'Cool', 'Warm'}, {'Same', 'Change'}, {'Y', 'N'}]


[['Rainy', 'Sunny'],
 ['Cold', 'Warm'],
 ['High', 'Normal'],
 ['Strong'],
 ['Cool', 'Warm'],
 ['Change', 'Same'],
 ['N', 'Y']]

In [3]:
def g_0(n):
    return ("?",)*n # tuple('?')*n

def s_0(n):
    return ('0',)*n

In [4]:
#checking if h1 is more general than h2
def more_general(h1, h2):
    # h1 - hyp h2 - ex
    hyp_satisfied = []
    for h, x in zip(h1, h2):
        mg = h == "?" or (h != "0" and (h == x or x == "0")) 
        # if ?(most generic) it'll def satisfy; if not 0(not most specific it is fine) and if the value is equal to the value in the example, it is cool
        # else false(does not satisfy)
        hyp_satisfied.append(mg)
    # print(hyp_satisfied)
    return all(hyp_satisfied)

print(more_general(['?', '?', '?', '?', '?', '?'], ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']))

# min_generalizations
def fulfills(example, hypothesis):
    # hypothesis -> g or s
    ### the implementation is the same as for hypotheses:
    return more_general(hypothesis, example)

True


In [5]:
def min_generalizations(h, x): # h -> s 
    h_new = list(h)
    for i in range(len(h)):
        # h[i:i+1] so that result remains tuple instead of str -> because we need to do the zip thing in fulfils 
        if not fulfills(x[i:i+1], h[i:i+1]): 
            h_new[i] = '?' if h[i] != '0' else x[i] # if h[i]=0 then assign the example value, else if it is not 0 i.e. it is some value, assign it ? 
            # print('h new',h_new)
    return [tuple(h_new)]

min_generalizations(h=('0', '0'  , 'sunny'), 
                    x=('rainy', 'windy', 'cloudy'))

[('rainy', 'windy', '?')]

In [6]:
def min_specializations(h, domains, x):
    # h -> g
    results = []
    print('g -> ',h)
    for i in range(len(h)):
        if h[i] == "?":
            for val in domains[i]:
                if x[i] != val: # -ve ex ka value is not equal to the value(domain)
                    h_new = h[:i] + (val,) + h[i+1:] # v.imp to convert it into tuple lyk dis; (val,)
                    print(h[:i], val, h[i+1:], h_new)
                    results.append(h_new)
        elif h[i] != "0":
            h_new = h[:i] + tuple('0') + h[i+1:]
            results.append(h_new)
    return results

min_specializations(h=('?', '?',), 
                    domains=[['a', 'b', 'c'], ['x', 'y']], 
                    x=('b', 'x'))

g ->  ('?', '?')
() a ('?',) ('a', '?')
() c ('?',) ('c', '?')
('?',) y () ('?', 'y')


[('a', '?'), ('c', '?'), ('?', 'y')]

In [7]:
def generalize_S(x, G, S):
    S_prev = list(S) # simply to iterate properly over all the elems, 
    for s in S_prev:
        if s not in S:
            continue
        if not fulfills(x, s):
            S.remove(s)
            Splus = min_generalizations(s, x)
            print('s plus', Splus, G)
            # keep only generalizations that have a counterpart in G
            S.update(h for h in Splus if any([more_general(g, h) for g in G])) #comparing S & G; and G should be more general than S
            # remove hypotheses less specific than any other in S
            S.difference_update([h for h in S if any([more_general(h, h1) for h1 in S if h != h1])]) 
            # we'll never reach here fyi - comparing S & S to check if any hyp in S is more general than the other

    return S
print(generalize_S(['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same'], {('?', 'Warm', '?', '?', '?', '?'), ('?', '?', '?', '?', '?', 'Change')}, {('Rainy', '0', '0', '0', '0', '0')}))

s plus [('?', 'Warm', 'High', 'Strong', 'Warm', 'Same')] {('?', '?', '?', '?', '?', 'Change'), ('?', 'Warm', '?', '?', '?', '?')}
{('?', 'Warm', 'High', 'Strong', 'Warm', 'Same')}


In [8]:
def specialize_G(x, domains, G, S):
    G_prev = list(G)
    for g in G_prev:
        if g not in G:  
            continue
        if fulfills(x, g):
            G.remove(g)
            Gminus = min_specializations(g, domains, x)
            ## keep only specializations that have a conuterpart in S
            G.update([h for h in Gminus if any([more_general(h, s) for s in S])])
            ## remove hypotheses less general than any other in G
            G.difference_update([h for h in G if any([more_general(g1, h) for g1 in G if h != g1])])
            # for h in G:
            #     if(any([more_general(g1, h) for g1 in G if h!=g1])):
            #         print('huh',h)
    return G
print(specialize_G(['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same'],get_domains(examples)[:-1], {('?', 'Warm', '?', '?', '?', '?'), ('?', '?', '?', '?', '?', 'Change'), ('?', 'Warm', '?', '?', '?', 'Change')}, {('Rainy', '0', '0', '0', '0', '0')}))

[{'Sunny', 'Rainy'}, {'Cold', 'Warm'}, {'High', 'Normal'}, {'Strong'}, {'Cool', 'Warm'}, {'Same', 'Change'}, {'Y', 'N'}]
g ->  ('?', 'Warm', '?', '?', '?', '?')
() Rainy ('Warm', '?', '?', '?', '?') ('Rainy', 'Warm', '?', '?', '?', '?')
('?', 'Warm') Normal ('?', '?', '?') ('?', 'Warm', 'Normal', '?', '?', '?')
('?', 'Warm', '?', '?') Cool ('?',) ('?', 'Warm', '?', '?', 'Cool', '?')
('?', 'Warm', '?', '?', '?') Change () ('?', 'Warm', '?', '?', '?', 'Change')
{('?', 'Warm', '?', '?', 'Cool', '?'), ('Rainy', 'Warm', '?', '?', '?', '?'), ('?', 'Warm', 'Normal', '?', '?', '?'), ('?', '?', '?', '?', '?', 'Change')}


In [9]:
#candidate elimination
domains = get_domains(examples)[:-1] # except for label(Y or N)

G = set([g_0(len(domains))])
S = set([s_0(len(domains))])
i=0
print(f'\n S[{i}]: {S}')
print(f' G[{i}]: {G}')


for xcx in examples:
    i=i+1
    xcx=list(xcx)
    x, cx = xcx[:-1], xcx[-1]  # Splitting data into attributes and decisions(Y or N)
    if cx=='Y': # x is positive example
        G = {g for g in G if fulfills(x, g)}
        S = generalize_S(x, G, S)
    else: # x is negative example
        S = {s for s in S if not fulfills(x, s)}
        G = specialize_G(x, domains, G, S)
    print(f'\n S[{i}]: {S}')
    print(f' G[{i}]: {G}')

[{'Sunny', 'Rainy'}, {'Cold', 'Warm'}, {'High', 'Normal'}, {'Strong'}, {'Cool', 'Warm'}, {'Same', 'Change'}, {'Y', 'N'}]

 S[0]: {('0', '0', '0', '0', '0', '0')}
 G[0]: {('?', '?', '?', '?', '?', '?')}
s plus [('Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same')] {('?', '?', '?', '?', '?', '?')}

 S[1]: {('Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same')}
 G[1]: {('?', '?', '?', '?', '?', '?')}
s plus [('Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same')] {('?', '?', '?', '?', '?', '?')}

 S[2]: {('Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same')}
 G[2]: {('?', '?', '?', '?', '?', '?')}
g ->  ('?', '?', '?', '?', '?', '?')
() Sunny ('?', '?', '?', '?', '?') ('Sunny', '?', '?', '?', '?', '?')
('?',) Warm ('?', '?', '?', '?') ('?', 'Warm', '?', '?', '?', '?')
('?', '?') Normal ('?', '?', '?') ('?', '?', 'Normal', '?', '?', '?')
('?', '?', '?', '?') Cool ('?',) ('?', '?', '?', '?', 'Cool', '?')
('?', '?', '?', '?', '?') Same () ('?', '?', '?', '?', '?', 'Same')

 S[3]: {('Sunny', 'Warm', '?