In [1]:
# Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Importing required libraries
import pandas as pd
import math
import numpy as np

# Definition of Node class for decision tree representation
class Node:
    def __init__(self):
        self.children = []
        self.value = ""
        self.isLeaf = False
        self.pred = ""

# Reading the dataset from a CSV file
data = pd.read_csv("/content/drive/MyDrive/weather_forecast.csv")

In [3]:
# Extracting features/column names from the dataset
features = [feat for feat in data]
print(data, features)

     Outlook Temperature Humidity   Windy Play
0      Sunny         Hot     High    Weak   No
1      Sunny         Hot     High  Strong   No
2   Overcast         Hot     High    Weak  Yes
3       Rain        Mild     High    Weak  Yes
4       Rain        Cool   Normal    Weak  Yes
5       Rain        Cool   Normal  Strong   No
6   Overcast        Cool   Normal  Strong  Yes
7      Sunny        Mild     High    Weak   No
8      Sunny        Cool   Normal    Weak  Yes
9       Rain        Mild   Normal    Weak  Yes
10     Sunny        Mild   Normal  Strong  Yes
11  Overcast        Mild     High  Strong  Yes
12  Overcast         Hot   Normal    Weak  Yes
13      Rain        Mild     High  Strong   No ['Outlook', 'Temperature', 'Humidity', 'Windy', 'Play']


In [4]:
# Removing the "answer" column from the list of features
features.remove("Play")
print(features)

['Outlook', 'Temperature', 'Humidity', 'Windy']


In [5]:
# Function to calculate entropy
def entropy(examples):
    pos = 0.0
    neg = 0.0
    for _, row in examples.iterrows():
        if row["Play"] == "Yes":
            pos += 1
        else:
            neg += 1
    if pos == 0.0 or neg == 0.0:
        return 0.0
    else:
        p = pos / (pos + neg)
        n = neg / (pos + neg)
        return -(p * math.log(p, 2) + n * math.log(n, 2))

In [6]:
# Function to calculate information gain
def info_gain(examples, attr):
    uniq = np.unique(examples[attr])
    gain = entropy(examples)
    for u in uniq:
        subdata = examples[examples[attr] == u]
        sub_e = entropy(subdata)
        gain -= (float(len(subdata)) / float(len(examples))) * sub_e
    return gain

In [7]:
# Calculating entropy for the entire dataset
entropy(data)

# Calculating information gain for each feature
info_gain(data, features)

0.9402859586706309

In [11]:
# Function to implement the ID3 algorithm
def ID3(examples, attrs):
    root = Node()
    max_gain = 0
    max_feat = ""
    for feature in attrs:
        gain = info_gain(examples, feature)
        if gain > max_gain:
            max_gain = gain
            max_feat = feature
    root.value = max_feat
    uniq = np.unique(examples[max_feat])
    for u in uniq:
        subdata = examples[examples[max_feat] == u]
        if entropy(subdata) == 0.0:
            newNode = Node()
            newNode.isLeaf = True
            newNode.value = u
            newNode.pred = np.unique(subdata["Play"])
            root.children.append(newNode)
        else:
            dummyNode = Node()
            dummyNode.value = u
            new_attrs = attrs.copy()
            new_attrs.remove(max_feat)
            child = ID3(subdata, new_attrs)
            dummyNode.children.append(child)
            root.children.append(dummyNode)
    return root

In [12]:
# Function to print the decision tree
def printTree(root: Node, depth=0):
    for i in range(depth):
        print("\t", end="")
    print(root.value, end="")
    if root.isLeaf:
        print(" -> ", root.pred)
    print()
    for child in root.children:
        printTree(child, depth + 1)

In [13]:
# Building the decision tree using ID3 algorithm
root = ID3(data, features)

In [14]:
# Printing the decision tree
printTree(root)

Outlook
	Overcast ->  ['Yes']

	Rain
		Windy
			Strong ->  ['No']

			Weak ->  ['Yes']

	Sunny
		Humidity
			High ->  ['No']

			Normal ->  ['Yes']

