<a href="https://colab.research.google.com/github/maviayten/CHAID_UYGULAMASI/blob/main/chaid_adl%C4%B1_not_defterinin_kopyas%C4%B1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from scipy.stats import chi2_contingency
from sklearn.metrics import accuracy_score

# Örnek veri seti (Iris)
from sklearn.datasets import load_iris
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Sürekli verileri kategorik hale getirme (CHAID için gerekli)
for col in df.columns[:-1]:  # Sadece özellik sütunlarını kategorik hale getiriyoruz
    df[col] = pd.cut(df[col], bins=3, labels=False)

# Eğitim ve test verisi oluşturma
X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# CHAID algoritması: Chi-square testiyle bölme
def chaid_split(data, target_col, feature_col):
    contingency_table = pd.crosstab(data[feature_col], data[target_col])
    chi2, p_value, _, _ = chi2_contingency(contingency_table)
    return chi2, p_value

# CHAID Karar Ağacı Mantığı
def chaid_tree(data, target_col, max_depth=3, min_p_value=0.05, depth=0):
    if depth >= max_depth or len(data[target_col].unique()) == 1:
        return {"type": "leaf", "class": data[target_col].mode()[0]}

    best_feature = None
    best_p_value = 1.0
    for feature in data.drop(columns=[target_col]).columns:
        _, p_value = chaid_split(data, target_col, feature)
        if p_value < best_p_value:
            best_p_value = p_value
            best_feature = feature

    if best_p_value > min_p_value:
        return {"type": "leaf", "class": data[target_col].mode()[0]}

    tree = {"type": "node", "feature": best_feature, "branches": {}}
    for value in data[best_feature].unique():
        subset = data[data[best_feature] == value]
        tree["branches"][value] = chaid_tree(subset, target_col, max_depth, min_p_value, depth + 1)

    return tree

# CHAID ağacını oluşturma
tree = chaid_tree(pd.concat([X_train, y_train], axis=1), target_col='target')
print(tree)


{'type': 'node', 'feature': 'petal width (cm)', 'branches': {0: {'type': 'leaf', 'class': 0}, 1: {'type': 'node', 'feature': 'petal length (cm)', 'branches': {1: {'type': 'leaf', 'class': 1}, 2: {'type': 'leaf', 'class': 2}}}, 2: {'type': 'leaf', 'class': 2}}}


In [None]:
# Hiperparametre optimizasyonu için örnek
best_tree = None
best_depth = None
best_accuracy = 0

for depth in range(1, 6):
    tree = chaid_tree(pd.concat([X_train, y_train], axis=1), target_col='target', max_depth=depth)

    # Test seti üzerinde değerlendirme (basitleştirilmiş tahmin)
    def predict(tree, row):
        if tree['type'] == 'leaf':
            return tree['class']
        return predict(tree['branches'][row[tree['feature']]], row)

    y_pred = X_test.apply(lambda row: predict(tree, row), axis=1)
    acc = accuracy_score(y_test, y_pred)

    if acc > best_accuracy:
        best_accuracy = acc
        best_tree = tree
        best_depth = depth

print(f"En iyi derinlik: {best_depth}, En iyi doğruluk: {best_accuracy:.2f}")


En iyi derinlik: 1, En iyi doğruluk: 1.00


In [None]:
# CHAID ile özellik önemi
feature_importances = {}
for feature in X_train.columns:
    chi2, p_value = chaid_split(pd.concat([X_train, y_train], axis=1), 'target', feature)
    feature_importances[feature] = chi2

# Özellikleri önem sırasına göre sıralama
sorted_features = sorted(feature_importances.items(), key=lambda x: x[1], reverse=True)
print("Özellik Önem Dereceleri:")
for feature, importance in sorted_features:
    print(f"{feature}: {importance:.2f}")


Özellik Önem Dereceleri:
petal width (cm): 207.48
petal length (cm): 202.05
sepal length (cm): 86.14
sepal width (cm): 36.72
