In [3]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

# Load the dataset into a pandas DataFrame
data = pd.DataFrame({
    'size': [1500, 2000, 1800, 2400, 1200],
    'bedrooms': [3, 4, 3, 5, 2],
    'price': [150000, 200000, 175000, 250000, 100000]
})

# Define a function to calculate the entropy of a given set of labels
def entropy(labels):
    n_labels = len(labels)
    if n_labels <= 1:
        return 0
    counts = np.bincount(labels)
    probs = counts / n_labels
    n_classes = np.count_nonzero(probs)
    if n_classes <= 1:
        return 0
    ent = 0.
    for i in probs:
        ent -= i * np.log2(i)
    return ent

# Define a function to calculate the information gain of a given split
def info_gain(data, split_attr, target_attr):
    total_entropy = entropy(data[target_attr])
    vals, counts = np.unique(data[split_attr], return_counts=True)
    weighted_entropy = np.sum([(counts[i] / np.sum(counts)) * entropy(data.where(data[split_attr] == vals[i]).dropna()[target_attr]) for i in range(len(vals))])
    return total_entropy - weighted_entropy

# Define the ID3 algorithm to build the decision tree
def id3(data, target_attr, attrs):
    vals, counts = np.unique(data[target_attr], return_counts=True)
    default_val = vals[np.argmax(counts)]
    if len(attrs) == 0:
        return default_val
    elif len(np.unique(data[target_attr])) == 1:
        return vals[0]
    else:
        gain = np.array([info_gain(data, attr, target_attr) for attr in attrs])
        best_attr = attrs[np.argmax(gain)]
        tree = {best_attr: {}}
        remaining_attrs = [i for i in attrs if i != best_attr]
        for val in np.unique(data[best_attr]):
            sub_data = data.where(data[best_attr] == val).dropna()
            subtree = id3(sub_data, target_attr, remaining_attrs)
            tree[best_attr][val] = subtree
        return tree

# Define a function to make predictions using the decision tree
def predict(row, tree):
    for attr, subtree in tree.items():
        if row[attr] in subtree:
            subtree = subtree[row[attr]]
            if isinstance(subtree, dict):
                return predict(row, subtree)
            else:
                return subtree

# Build the decision tree
attrs = ['size', 'bedrooms']
target_attr = 'price'
tree = id3(data, target_attr, attrs)




In [4]:
# Make predictions using the decision tree
predictions = [predict(row, tree) for _, row in data.iterrows()]

# Calculate MSE, RMSE, and MAE
mse = np.mean([(data['price'][i] - predictions[i])**2 for i in range(len(data))])
rmse = np.sqrt(mse)
mae = np.mean(np.abs(data['price'] - predictions))

# Print the results
print("Decision tree: ", tree)
print("MSE: ", mse)
print("RMSE: ", rmse)
print("MAE: ", mae)


Decision tree:  {'size': {1200: 100000.0, 1500: 150000.0, 1800: 175000.0, 2000: 200000.0, 2400: 250000.0}}
MSE:  0.0
RMSE:  0.0
MAE:  0.0
