# Candidate Elimination Algorithm for selecting best possible hypothesis from both positive and negative examples

In [1]:
import pandas as pd
import numpy as np

# Step_1: Load the Dataset

In [2]:
df = pd.read_csv('Data_Find_S_Class_example.csv')

In [3]:
attributes = np.array(df.iloc[:,1:-1])
attributes

array([['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same'],
       ['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same'],
       ['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change'],
       ['Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change']],
      dtype=object)

In [4]:
targets = np.array(df.iloc[:,-1])
targets

array(['Yes', 'Yes', 'No', 'Yes'], dtype=object)

# Step 2: Initialize Specific and General Hypothesis

In [5]:
specific_hypothesis = ['0']*attributes.shape[1]
print(specific_hypothesis)

['0', '0', '0', '0', '0', '0']


general_hypothesis_1 = ['?']*attributes.shape[1]
general_hypothesis = [general_hypothesis_1]*attributes.shape[1]
print(general_hypothesis)
type(general_hypothesis)

In [6]:
for i,h in enumerate(attributes):
    print(i,h)

0 ['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
1 ['Sunny' 'Warm' 'High' 'Strong' 'Warm' 'Same']
2 ['Rainy' 'Cold' 'High' 'Strong' 'Warm' 'Change']
3 ['Sunny' 'Warm' 'High' 'Strong' 'Cool' 'Change']


In [7]:
for i,h in enumerate(attributes):
    for j in range(len(specific_hypothesis)):
        print(h[j])

Sunny
Warm
Normal
Strong
Warm
Same
Sunny
Warm
High
Strong
Warm
Same
Rainy
Cold
High
Strong
Warm
Change
Sunny
Warm
High
Strong
Cool
Change


In [8]:
general_hypothesis = [["?" for i in range(len(specific_hypothesis))] 
                      for i in range(len(specific_hypothesis))]
print("general_h: ",general_hypothesis)

general_h:  [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]


specific_hypothesis = attributes[0].copy()
for i,h in enumerate(attributes):
    if targets[i] == 'Yes':
        for j in range(len(specific_hypothesis)):
            if h[j]!=specific_hypothesis[j]:
                specific_hypothesis[j] = '?'
                general_hypothesis[j] = '?'
print(specific_hypothesis)
print(general_hypothesis)

# Sept 3: For each training example in the data
# Step 4: IF positive
## if attribute_Value == general_hypothesis_value
### Retain the general_hypothesis_Value
## Else replace attribute with '?'
# Step5: If Negative example 
## Make generalize hypothesis more specific.

In [9]:
specific_hypothesis = attributes[0].copy()
for i,h in enumerate(attributes):
    if targets[i] == 'Yes':
        for j in range(len(specific_hypothesis)):
            if h[j]!=specific_hypothesis[j]:
                specific_hypothesis[j] = '?'
                general_hypothesis[j][j] = '?'
                print(general_hypothesis)
    if targets[i] == 'No':
        for j in range(len(specific_hypothesis)):
            if h[j] != specific_hypothesis[j]:
                general_hypothesis[j][j] = specific_hypothesis[j]
            else:
                general_hypothesis[j][j] = '?'

[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'Same']]
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'Same']]
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]


In [11]:
indices = [i for i, val in enumerate(general_hypothesis) if val == ['?', '?', '?', '?', '?', '?']]
print("\nIndices",indices)
for i in indices:
    general_hypothesis.remove(['?', '?', '?', '?', '?', '?'])
print(general_hypothesis)
print(specific_hypothesis)


Indices [0, 1, 2, 3]
[['?', 'Warm', '?', '?', '?', '?'], ['Sunny', '?', '?', '?', '?', '?']]
['Sunny' 'Warm' '?' 'Strong' '?' '?']
