In [3]:
import random

In [4]:
# Initialize the population with binary-encoded individuals
def initialize_population(pop_size, rule_length):
    return [random.choices([0, 1], k=rule_length) for _ in range(pop_size)]

# Fitness function: evaluates how well an individual matches the examples
def fitness(individual, examples):
    correct = 0
    for example in examples:
        if evaluate_rule(individual, example) == (example['PlayTennis'] == 'Yes'):
            correct += 1
    return correct / len(examples)

# Evaluate whether a rule matches an example
def evaluate_rule(rule, example):
    attributes = ['Outlook', 'Temperature', 'Humidity', 'Wind']
    values = ['Sunny', 'Overcast', 'Rainy', 'Hot', 'Mild', 'Cool', 'High', 'Normal', 'Strong', 'Weak']
    
    for i in range(0, len(rule), 3):
        attr_index = i // 3
        if rule[i:i+3] != [0, 0, 0]:  # If the attribute is not "don't care"
            attr_value = example[attributes[attr_index]]
            if attr_value not in [values[j] for j in range(3) if rule[i+j] == 1]:
                return False
    return True

# Perform crossover between two parents to produce two children
def crossover(parent1, parent2):
    crossover_point = random.randint(1, len(parent1) - 1)
    child1 = parent1[:crossover_point] + parent2[crossover_point:]
    child2 = parent2[:crossover_point] + parent1[crossover_point:]
    return child1, child2

# Mutate an individual with a given mutation rate
def mutate(individual, mutation_rate):
    return [1 - bit if random.random() < mutation_rate else bit for bit in individual]

# Main genetic algorithm function
def genetic_algorithm(examples, pop_size, generations, replacement_rate, mutation_rate):
    rule_length = 12  # 4 attributes x 3 bits each
    population = initialize_population(pop_size, rule_length)
    
    for _ in range(generations):
        fitnesses = [fitness(individual, examples) for individual in population]
        sorted_population = [x for _, x in sorted(zip(fitnesses, population), reverse=True)]
        
        new_population = sorted_population[:int((1 - replacement_rate) * pop_size)]
        
        while len(new_population) < pop_size:
            parent1, parent2 = random.choices(sorted_population[:int(pop_size / 2)], k=2)
            child1, child2 = crossover(parent1, parent2)
            child1 = mutate(child1, mutation_rate)
            child2 = mutate(child2, mutation_rate)
            new_population.extend([child1, child2])
        
        population = new_population[:pop_size]
    
    best_individual = max(population, key=lambda x: fitness(x, examples))
    return best_individual, fitness(best_individual, examples)

# Interpret the best rule
def interpret_rule(rule):
    attributes = ['Outlook', 'Temperature', 'Humidity', 'Wind']
    value_map = {
        'Outlook': ['Sunny', 'Overcast', 'Rainy'],
        'Temperature': ['Hot', 'Mild', 'Cool'],
        'Humidity': ['High', 'Normal'],
        'Wind': ['Strong', 'Weak']
    }
    
    interpretation = []
    index = 0

    for attr in attributes:
        num_values = len(value_map[attr])  # Get number of values for this attribute
        if any(rule[index:index + num_values]):  # If at least one bit is set
            selected_values = [value_map[attr][j] for j in range(num_values) if rule[index + j] == 1]
            interpretation.append(f"{attr} is {' or '.join(selected_values)}")
        index += num_values  # Move index to the next attribute's section

    return " AND ".join(interpretation)


In [5]:
# Example dataset (adjust as needed)
examples = [
    {'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Weak', 'PlayTennis': 'No'},
    {'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Strong', 'PlayTennis': 'No'},
    {'Outlook': 'Overcast', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Weak', 'PlayTennis': 'Yes'},
    {'Outlook': 'Rainy', 'Temperature': 'Mild', 'Humidity': 'High', 'Wind': 'Weak', 'PlayTennis': 'Yes'},
]

# Example dataset
# examples = [
#     {'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Weak', 'PlayTennis': 'No'},
#     {'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Strong', 'PlayTennis': 'No'},
#     {'Outlook': 'Overcast', 'Temperature': 'Hot', 'Humidity': 'High', 'Wind': 'Weak', 'PlayTennis': 'Yes'},
#     {'Outlook': 'Rainy', 'Temperature': 'Mild', 'Humidity': 'High', 'Wind': 'Weak', 'PlayTennis': 'Yes'},
#     {'Outlook': 'Rainy', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Wind': 'Weak', 'PlayTennis': 'Yes'},
#     {'Outlook': 'Rainy', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Wind': 'Strong', 'PlayTennis': 'No'},
#     {'Outlook': 'Overcast', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Wind': 'Strong', 'PlayTennis': 'Yes'},
# ]

# Run the genetic algorithm
pop_size = 50
generations = 100
replacement_rate = 0.5
mutation_rate = 0.01

best_rule, best_fitness = genetic_algorithm(examples, pop_size, generations, replacement_rate, mutation_rate)
print(f"Best rule: {best_rule}")
print(f"Fitness: {best_fitness}")
print(f"Interpretation: IF {interpret_rule(best_rule)} THEN PlayTennis = Yes")



Best rule: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
Fitness: 0.5
Interpretation: IF Outlook is Sunny or Overcast or Rainy AND Temperature is Hot or Mild or Cool AND Humidity is High or Normal AND Wind is Strong or Weak THEN PlayTennis = Yes
