In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn import tree
import math
from collections import Counter

In [2]:
# Create the dataset
data = pd.DataFrame([
    ['C1', 'Month-to-month', 'High', 'High', 'Yes', 'Yes'],
    ['C2', 'Two year', 'Low', 'Low', 'No', 'No'],
    ['C3', 'Month-to-month', 'High', 'Medium', 'Yes', 'Yes'],
    ['C4', 'One year', 'Medium', 'Medium', 'No', 'No'],
    ['C5', 'Month-to-month', 'Low', 'High', 'Yes', 'Yes'],
    ['C6', 'Two year', 'Low', 'Low', 'No', 'No'],
    ['C7', 'One year', 'High', 'Medium', 'Yes', 'Yes'],
    ['C8', 'Month-to-month', 'Medium', 'High', 'Yes', 'Yes'],
    ['C9', 'Two year', 'Low', 'Low', 'No', 'No'],
    ['C10', 'One year', 'Low', 'Medium', 'No', 'No']
], columns=['ID', 'Contract Type', 'Support Calls', 'Monthly Charges', 'Internet Service', 'Churn'])

# Drop ID (not needed)
data = data.drop('ID', axis=1)


In [3]:
# Entropy of Target Variable (Churn)
def entropy(column):
    counts = Counter(column)
    total = len(column)
    return -sum((count/total) * math.log2(count/total) for count in counts.values())

target_entropy = entropy(data['Churn'])
print("Entropy of Churn:", round(target_entropy, 4))

Entropy of Churn: 1.0


In [4]:
#Information Gain for each Feature
def info_gain(df, attribute, target):
    total_entropy = entropy(df[target])
    values = df[attribute].unique()
    weighted_entropy = 0

    for val in values:
        subset = df[df[attribute] == val]
        weighted_entropy += (len(subset) / len(df)) * entropy(subset[target])

    return total_entropy - weighted_entropy

features = ['Contract Type', 'Support Calls', 'Monthly Charges', 'Internet Service']

print("\n📊 Information Gain for each feature:")
for feature in features:
    gain = info_gain(data, feature, 'Churn')
    print(f"- {feature}: {round(gain, 4)}")


📊 Information Gain for each feature:
- Contract Type: 0.7245
- Support Calls: 0.439
- Monthly Charges: 0.6
- Internet Service: 1.0


In [5]:
#Encode features and Train Decision Tree
X = data[features]
y = data['Churn']

# Encode categorical variables
encoders = {}
for column in X.columns:
    encoders[column] = LabelEncoder()
    X[column] = encoders[column].fit_transform(X[column])

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Train the decision tree using entropy (ID3)
clf = DecisionTreeClassifier(criterion='entropy')
clf.fit(X, y)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[column] = encoders[column].fit_transform(X[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[column] = encoders[column].fit_transform(X[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[column] = encoders[column].fit_transform(X[column])
A value is trying to be set on a copy of a 

In [6]:
#Visualize Tree
print("\n🌳 Decision Tree Rules:\n")
tree_rules = export_text(clf, feature_names=list(X.columns))
print(tree_rules)


🌳 Decision Tree Rules:

|--- Internet Service <= 0.50
|   |--- class: 0
|--- Internet Service >  0.50
|   |--- class: 1

