In [3]:
import pandas as pd

data = pd.read_csv('classification.csv')
data.head()

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


In [4]:
age_groups = data.groupby('Age')
n = data.shape[0]
gini_age = 0
for group in age_groups:
    p = len(group[1]) / n
    gini_age += p * (1 - p)
print("Gini index for age column:", gini_age)


Gini index for age column: 0.9681124999999999


In [10]:
salary_groups = data.groupby('EstimatedSalary')
gini_salary = 0
for group in salary_groups:
    p = len(group[1]) / n
    gini_salary += p * (1 - p)
print("Gini index for salary column:", gini_salary)

Gini index for salary column: 0.9874999999999985


In [11]:
def build_tree(data):
    if len(set(data['Purchased'])) == 1:
        return data['Purchased'].iloc[0]
    if data.shape[1] == 1:
        return data['Purchased'].mode()[0]
    best_feature = None
    best_gini = 1
    for feature in data.columns[:-1]:
        feature_groups = data.groupby(feature)
        gini = 0
        for group in feature_groups:
            p = len(group[1]) / n
            gini += p * (1 - p)
        if gini < best_gini:
            best_gini = gini
            best_feature = feature
    tree = {best_feature: {}}
    feature_values = data[best_feature].unique()
    for value in feature_values:
        sub_data = data[data[best_feature] == value].drop(best_feature, axis=1)
        subtree = build_tree(sub_data)
        tree[best_feature][value] = subtree
    return tree

tree = build_tree(data)

In [20]:
def predict(sample, tree):
    for feature, subtree in tree.items():
        print(sample)
        value = sample[feature]
        subtree = subtree[value]
        if isinstance(subtree, dict):
            return predict(sample, subtree)
        else:
            return subtree

new_sample = {'Age': 35, 'EstimatedSalary': 20000}

classification = predict(new_sample, tree)
print("Predicted classification:", classification)

{'Age': 35, 'EstimatedSalary': 20000}
{'Age': 35, 'EstimatedSalary': 20000}
Predicted classification: 0
