In [6]:
from google.colab import files
uploaded = files.upload()   # this opens a file picker


Saving tictactoe.csv to tictactoe.csv


In [2]:
# ⬅️ CHANGE THIS FILENAME to your required ID format
%%writefile EC_C_PES2UG23CS176_Lab3.py
import torch
import math

def get_entropy_of_dataset(tensor: torch.Tensor) -> float:
    """
    Entropy(S) = - Σ p_i log2 p_i
    Assumes last column is the class label.
    """
    target = tensor[:, -1]
    _, counts = torch.unique(target, return_counts=True)
    probs = counts.float() / counts.sum()
    # guard against log2(0) if any prob=0 (shouldn't happen, but safe)
    probs = probs[probs > 0]
    entropy = -torch.sum(probs * torch.log2(probs)) if probs.numel() > 0 else torch.tensor(0.0)
    return round(float(entropy.item()), 4)

def get_avg_info_of_attribute(tensor: torch.Tensor, attribute: int) -> float:
    """
    Avg_Info(attribute) = Σ_v (|S_v|/|S|) * Entropy(S_v)
    """
    total = tensor.size(0)
    values = tensor[:, attribute]
    unique_vals = torch.unique(values)
    weighted_entropy = 0.0
    for val in unique_vals:
        subset = tensor[values == val]
        prob = subset.size(0) / total
        weighted_entropy += prob * get_entropy_of_dataset(subset)
    return round(float(weighted_entropy), 4)

def get_information_gain(tensor: torch.Tensor, attribute: int) -> float:
    """
    IG(attribute) = Entropy(S) - Avg_Info(attribute)
    """
    ds_entropy = get_entropy_of_dataset(tensor)
    avg_info = get_avg_info_of_attribute(tensor, attribute)
    ig = ds_entropy - avg_info
    return round(float(ig), 4)

def get_selected_attribute(tensor: torch.Tensor):
    """
    Returns ({attr_index: IG, ...}, best_attr_index)
    """
    n_features = tensor.size(1) - 1  # exclude target col
    gains = {}
    for attr in range(n_features):
        gains[attr] = get_information_gain(tensor, attr)
    best_attr = max(gains, key=gains.get) if gains else None
    return gains, best_attr


Writing EC_C_PES2UG23CS176_Lab3.py


In [4]:
from google.colab import files
uploaded = files.upload()  # choose test.py and your CSV (e.g., tictactoe.csv)
list(uploaded.keys())


Saving test.py to test.py


['test.py']

In [7]:
!python test.py \
  --ID EC_C_PES2UG23CS176_Lab3 \
  --data tictactoe.csv \
  --framework pytorch \
  --print-tree \
  --print-construction


Running tests with PYTORCH framework
 target column: 'Class' (last column)
Original dataset info:
Shape: (958, 10)
Columns: ['top-left-square', 'top-middle-square', 'top-right-square', 'middle-left-square', 'middle-middle-square', 'middle-right-square', 'bottom-left-square', 'bottom-middle-square', 'bottom-right-square', 'Class']

First few rows:

top-left-square: ['x' 'o' 'b'] -> [2 1 0]

top-middle-square: ['x' 'o' 'b'] -> [2 1 0]

top-right-square: ['x' 'o' 'b'] -> [2 1 0]

Class: ['positive' 'negative'] -> [1 0]

Processed dataset shape: torch.Size([958, 10])
Number of features: 9
Features: ['top-left-square', 'top-middle-square', 'top-right-square', 'middle-left-square', 'middle-middle-square', 'middle-right-square', 'bottom-left-square', 'bottom-middle-square', 'bottom-right-square']
Target: Class
Framework: PYTORCH
Data type: <class 'torch.Tensor'>

DECISION TREE CONSTRUCTION DEMO
Total samples: 958
Training samples: 766
Testing samples: 192

Constructing decision tree using tra

In [8]:
from google.colab import files
uploaded = files.upload()   # this opens a file picker


Saving mushrooms.csv to mushrooms.csv


In [9]:
from google.colab import files
uploaded = files.upload()   # this opens a file picker


Saving Nursery.csv to Nursery.csv


In [10]:
!python test.py \
  --ID EC_C_PES2UG23CS176_Lab3 \
  --data mushrooms.csv \
  --framework pytorch \
  --print-tree \
  --print-construction


Running tests with PYTORCH framework
 target column: 'class' (last column)
Original dataset info:
Shape: (8124, 23)
Columns: ['cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor', 'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color', 'stalk-shape', 'stalk-root', 'stalk-surface-above-ring', 'stalk-surface-below-ring', 'stalk-color-above-ring', 'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number', 'ring-type', 'spore-print-color', 'population', 'habitat', 'class']

First few rows:

cap-shape: ['x' 'b' 's' 'f' 'k'] -> [5 0 4 2 3]

cap-surface: ['s' 'y' 'f' 'g'] -> [2 3 0 1]

cap-color: ['n' 'y' 'w' 'g' 'e'] -> [4 9 8 3 2]

class: ['p' 'e'] -> [1 0]

Processed dataset shape: torch.Size([8124, 23])
Number of features: 22
Features: ['cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor', 'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color', 'stalk-shape', 'stalk-root', 'stalk-surface-above-ring', 'stalk-surface-below-ring', 'stalk-color-above-ring', 's

In [12]:
!python test.py \
  --ID EC_C_PES2UG23CS176_Lab3 \
  --data Nursery.csv \
  --framework pytorch \
  --print-tree \
  --print-construction


Running tests with PYTORCH framework
 target column: 'class' (last column)
Original dataset info:
Shape: (12960, 9)
Columns: ['parents', 'has_nurs', 'form', 'children', 'housing', 'finance', 'social', 'health', 'class']

First few rows:

parents: ['usual' 'pretentious' 'great_pret'] -> [2 1 0]

has_nurs: ['proper' 'less_proper' 'improper' 'critical' 'very_crit'] -> [3 2 1 0 4]

form: ['complete' 'completed' 'incomplete' 'foster'] -> [0 1 3 2]

class: ['recommend' 'priority' 'not_recom' 'very_recom' 'spec_prior'] -> [2 1 0 4 3]

Processed dataset shape: torch.Size([12960, 9])
Number of features: 8
Features: ['parents', 'has_nurs', 'form', 'children', 'housing', 'finance', 'social', 'health']
Target: class
Framework: PYTORCH
Data type: <class 'torch.Tensor'>

DECISION TREE CONSTRUCTION DEMO
Total samples: 12960
Training samples: 10368
Testing samples: 2592

Constructing decision tree using training data...
Level 0: Node Info - Entropy = 1.7173
Level 0: Node Info - Selected Attribute: hea