<a href="https://colab.research.google.com/github/lojaine001/lojaine001/blob/main/Untitled9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import math
from collections import Counter

# Données du tableau
data = [
    {"couleur": "cyan", "forme": "octogonal", "taille": "petit", "motif": "uniforme", "classe": "comestible"},
    {"couleur": "orange", "forme": "hexagonal", "taille": "petit", "motif": "uniforme", "classe": "comestible"},
    {"couleur": "orange", "forme": "octogonal", "taille": "petit", "motif": "rayé", "classe": "non comestible"},
    {"couleur": "magenta", "forme": "hexagonal", "taille": "gigantesque", "motif": "rayé", "classe": "comestible"},
    {"couleur": "cyan", "forme": "hexagonal", "taille": "gigantesque", "motif": "rayé", "classe": "comestible"},
    {"couleur": "orange", "forme": "octogonal", "taille": "grand", "motif": "uniforme", "classe": "comestible"},
    {"couleur": "magenta", "forme": "pentagonal", "taille": "gigantesque", "motif": "rayé", "classe": "non comestible"},
    {"couleur": "magenta", "forme": "octogonal", "taille": "grand", "motif": "rayé", "classe": "non comestible"},
]

# Étape 1 : Calcul de l'entropie de la population
def entropy(data, target_attribute):
    total = len(data)
    counts = Counter([item[target_attribute] for item in data])
    return -sum((count / total) * math.log2(count / total) for count in counts.values())

# Entropie initiale (classe comestible ou non)
initial_entropy = entropy(data, "classe")
initial_entropy


0.954434002924965

In [6]:
import numpy as np

def entropy_numpy(data, target_attribute):
    # Extract values for the target attribute
    values, counts = np.unique([item[target_attribute] for item in data], return_counts=True)
    probabilities = counts / counts.sum()  # Normalize counts to probabilities
    return -np.sum(probabilities * np.log2(probabilities))  # Calculate entropy using NumPy

# Entropie initiale (classe comestible ou non)
initial_entropy = entropy(data, "classe")
initial_entropy

0.954434002924965

In [2]:
#MATH
# Fonction pour calculer le gain d'information
def information_gain(data, target_attribute, attribute):
    total = len(data)
    # Groupement par les valeurs de l'attribut
    subsets = {}
    for item in data:
        key = item[attribute]
        if key not in subsets:
            subsets[key] = []
        subsets[key].append(item)

    # Calcul du gain d'information
    subset_entropy = sum(
        (len(subset) / total) * entropy(subset, target_attribute)
        for subset in subsets.values()
    )
    return initial_entropy - subset_entropy

# Attributs disponibles
attributes = ["couleur", "forme", "taille", "motif"]

# Calcul du gain d'information pour chaque attribut
gains = {attribute: information_gain(data, "classe", attribute) for attribute in attributes}
gains


{'couleur': 0.2657121273840979,
 'forme': 0.45443400292496505,
 'taille': 0.015712127384097885,
 'motif': 0.34758988139079716}

In [7]:
# NUMPY
def information_gain_numpy(data, target_attribute, attribute):
    total = len(data)
    # Group by attribute values
    subsets = {}
    for item in data:
        key = item[attribute]
        if key not in subsets:
            subsets[key] = []
        subsets[key].append(item)

    # Calculate weighted entropy of subsets
    weighted_entropy = sum(
        (len(subset) / total) * entropy_numpy(subset, target_attribute)
        for subset in subsets.values()
    )
    return entropy_numpy(data, target_attribute) - weighted_entropy
# Attributs disponibles
attributes = ["couleur", "forme", "taille", "motif"]

# Calcul du gain d'information pour chaque attribut
gains = {attribute: information_gain(data, "classe", attribute) for attribute in attributes}
gains

{'couleur': 0.2657121273840979,
 'forme': 0.45443400292496505,
 'taille': 0.015712127384097885,
 'motif': 0.34758988139079716}

In [3]:
# Construction récursive de l'arbre de décision
class DecisionNode:
    def __init__(self, attribute=None, branches=None, label=None):
        self.attribute = attribute  # Attribut à tester
        self.branches = branches or {}  # Sous-arbres (valeurs de l'attribut -> sous-arbre)
        self.label = label  # Classe prédite si feuille

def id3(data, target_attribute, attributes):
    # Comptage des classes
    classes = [item[target_attribute] for item in data]
    most_common_class = Counter(classes).most_common(1)[0][0]

    # Cas 1 : tous les exemples sont dans une seule classe
    if len(set(classes)) == 1:
        return DecisionNode(label=classes[0])

    # Cas 2 : aucun attribut restant
    if not attributes:
        return DecisionNode(label=most_common_class)

    # Cas 3 : sélectionner l'attribut avec le meilleur gain d'information
    gains = {attribute: information_gain(data, target_attribute, attribute) for attribute in attributes}
    best_attribute = max(gains, key=gains.get)

    # Créer un nœud pour cet attribut
    node = DecisionNode(attribute=best_attribute)
    subsets = {}
    for item in data:
        key = item[best_attribute]
        if key not in subsets:
            subsets[key] = []
        subsets[key].append(item)

    # Appel récursif sur chaque branche
    remaining_attributes = [attr for attr in attributes if attr != best_attribute]
    for value, subset in subsets.items():
        if subset:
            node.branches[value] = id3(subset, target_attribute, remaining_attributes)
        else:
            node.branches[value] = DecisionNode(label=most_common_class)

    return node

# Construire l'arbre
decision_tree = id3(data, "classe", attributes)

# Fonction pour afficher l'arbre (format textuel simple)
def print_tree(node, depth=0):
    indent = "  " * depth
    if node.label is not None:
        print(f"{indent}Classe: {node.label}")
    else:
        print(f"{indent}{node.attribute}?")
        for value, branch in node.branches.items():
            print(f"{indent}  {value}:")
            print_tree(branch, depth + 2)

# Afficher l'arbre
print_tree(decision_tree)


forme?
  octogonal:
    motif?
      uniforme:
        Classe: comestible
      rayé:
        Classe: non comestible
  hexagonal:
    Classe: comestible
  pentagonal:
    Classe: non comestible


In [4]:
# Nouveaux échantillons à classer
new_samples = [
    {"identificateur": "GH-45", "couleur": "magenta", "forme": "octogonal", "taille": "petit", "motif": "rayé"},
    {"identificateur": "GH-34", "couleur": "cyan", "forme": "pentagonal", "taille": "gigantesque", "motif": "uniforme"},
    {"identificateur": "ML-57", "couleur": "magenta", "forme": "hexagonal", "taille": "grand", "motif": "rayé"},
    {"identificateur": "NS-31", "couleur": "orange", "forme": "hexagonal", "taille": "grand", "motif": "uniforme"},
]

# Fonction pour prédire avec l'arbre
def predict(tree, sample):
    if tree.label is not None:
        return tree.label
    attribute_value = sample[tree.attribute]
    if attribute_value in tree.branches:
        return predict(tree.branches[attribute_value], sample)
    else:
        return "Inconnu"  # Si la valeur n'existe pas dans l'arbre

# Prédictions pour les nouveaux échantillons
predictions = {sample["identificateur"]: predict(decision_tree, sample) for sample in new_samples}
predictions


{'GH-45': 'non comestible',
 'GH-34': 'non comestible',
 'ML-57': 'comestible',
 'NS-31': 'comestible'}