In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from collections import Counter

# Load the dataset
file_path = "/content/weatherHistory.csv"
df = pd.read_csv(file_path)

# Drop rows where 'Precip Type' is missing
df = df.dropna(subset=['Precip Type'])

# Encode the target variable ('Precip Type') into numerical labels
label_encoder = LabelEncoder()
df['Precip Type'] = label_encoder.fit_transform(df['Precip Type'])

# Select relevant features
features = ['Temperature (C)', 'Humidity', 'Wind Speed (km/h)',
            'Wind Bearing (degrees)', 'Visibility (km)', 'Pressure (millibars)']
X = df[features].to_numpy()
y = df['Precip Type'].to_numpy()

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Function to calculate entropy
def entropy(y):
    counts = np.bincount(y)
    probabilities = counts / len(y)
    return -np.sum([p * np.log2(p) for p in probabilities if p > 0])

# Function to calculate information gain
def information_gain(X_column, y, threshold):
    left_mask = X_column <= threshold
    right_mask = X_column > threshold

    parent_entropy = entropy(y)
    left_entropy = entropy(y[left_mask])
    right_entropy = entropy(y[right_mask])

    n = len(y)
    left_weight = len(y[left_mask]) / n
    right_weight = len(y[right_mask]) / n
    child_entropy = (left_weight * left_entropy) + (right_weight * right_entropy)

    return parent_entropy - child_entropy

# Function to find the best feature and threshold for splitting
def best_split(X, y):
    best_gain = 0
    best_feature = None
    best_threshold = None

    for feature in range(X.shape[1]):
        X_column = X[:, feature]
        thresholds = np.unique(X_column)

        for threshold in thresholds:
            gain = information_gain(X_column, y, threshold)
            if gain > best_gain:
                best_gain, best_feature, best_threshold = gain, feature, threshold

    return best_feature, best_threshold

# Decision Tree Node
class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

# Function to build the ID3 tree
def build_tree(X, y, depth=0, max_depth=10):
    if len(set(y)) == 1:
        return Node(value=Counter(y).most_common(1)[0][0])

    if depth >= max_depth:
        return Node(value=Counter(y).most_common(1)[0][0])

    feature, threshold = best_split(X, y)
    if feature is None:
        return Node(value=Counter(y).most_common(1)[0][0])

    left_mask = X[:, feature] <= threshold
    right_mask = X[:, feature] > threshold
    left_subtree = build_tree(X[left_mask], y[left_mask], depth + 1, max_depth)
    right_subtree = build_tree(X[right_mask], y[right_mask], depth + 1, max_depth)

    return Node(feature, threshold, left_subtree, right_subtree)

# Train the ID3 decision tree
tree = build_tree(X_train, y_train)

# Function to make predictions
def predict(tree, X):
    if tree.value is not None:
        return tree.value
    if X[tree.feature] <= tree.threshold:
        return predict(tree.left, X)
    else:
        return predict(tree.right, X)

# Function to print the tree
def print_tree(node, depth=0):
    if node.value is not None:
        print("  " * depth + f"Leaf: {node.value}")
        return
    print("  " * depth + f"Feature {node.feature} <= {node.threshold}")
    print_tree(node.left, depth + 1)
    print_tree(node.right, depth + 1)

# Evaluate the model
y_pred = np.array([predict(tree, x) for x in X_test])
accuracy = np.mean(y_pred == y_test)

print(f"Decision Tree Accuracy: {accuracy * 100:.2f}%")
print("Decision Tree Structure:")
print_tree(tree)

Decision Tree Accuracy: 100.00%
Decision Tree Structure:
Feature 0 <= 0.0
  Leaf: 1
  Leaf: 0
