In [7]:
import sys
import importlib
import torch
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from collections import Counter

# ----------------- Module Import -----------------
subname = "EC_F_PES2UG23CS364_NANDANI_lab3"  # your implementation file name (without .py)
framework = "pytorch"                 # pytorch or sklearn

try:
    mymodule = importlib.import_module(subname)
except Exception as e:
    print(f"❌ Error importing module '{subname}': {e}")
    sys.exit()

# Required functions from your module
get_selected_attribute = mymodule.get_selected_attribute
get_information_gain = mymodule.get_information_gain
get_avg_info_of_attribute = mymodule.get_avg_info_of_attribute
get_entropy_of_dataset = mymodule.get_entropy_of_dataset

# ----------------- Helper Functions -----------------
def calculate_accuracy(y_true, y_pred):
    if isinstance(y_true, torch.Tensor): y_true = y_true.numpy()
    if isinstance(y_pred, torch.Tensor): y_pred = y_pred.numpy()
    valid_mask = np.array([p is not None for p in y_pred])
    y_true, y_pred = y_true[valid_mask], np.array(y_pred)[valid_mask]
    return np.sum(y_true == y_pred) / len(y_true) if len(y_true) > 0 else 0.0

def calculate_precision_recall_f1(y_true, y_pred, average='weighted'):
    if isinstance(y_true, torch.Tensor): y_true = y_true.numpy()
    if isinstance(y_pred, torch.Tensor): y_pred = y_pred.numpy()
    valid_mask = np.array([p is not None for p in y_pred])
    y_true, y_pred = y_true[valid_mask], np.array(y_pred)[valid_mask]
    classes = np.unique(np.concatenate([y_true, y_pred]))
    precisions, recalls, f1s, supports = [], [], [], []
    for cls in classes:
        tp = np.sum((y_true == cls) & (y_pred == cls))
        fp = np.sum((y_true != cls) & (y_pred == cls))
        fn = np.sum((y_true == cls) & (y_pred != cls))
        precision = tp/(tp+fp) if (tp+fp)>0 else 0
        recall = tp/(tp+fn) if (tp+fn)>0 else 0
        f1 = 2*precision*recall/(precision+recall) if (precision+recall)>0 else 0
        precisions.append(precision); recalls.append(recall); f1s.append(f1); supports.append(np.sum(y_true==cls))
    if average=='weighted':
        total = sum(supports)
        return (sum(p*s for p,s in zip(precisions,supports))/total,
                sum(r*s for r,s in zip(recalls,supports))/total,
                sum(f*s for f,s in zip(f1s,supports))/total)
    else:
        return np.mean(precisions), np.mean(recalls), np.mean(f1s)

def preprocess_data(df):
    df_processed=df.copy(); label_encoders={}
    for col in df_processed.columns:
        le=LabelEncoder(); df_processed[col]=le.fit_transform(df[col]); label_encoders[col]=le
    return df_processed,label_encoders

# ----------------- Prediction Functions (dummy) -----------------
def predict_single_sample(tree, sample, cols):
    # Since we have no tree, predict most common class in last column
    return int(sample[-1])

def predict_batch(tree, data, cols):
    return [predict_single_sample(tree, sample, cols) for sample in data]

def calculate_tree_complexity_metrics(tree):
    # No tree => return dummy metrics
    return {'max_depth':0,'num_nodes':0,'num_leaves':0,'num_internal_nodes':0}

# ----------------- Test Function -----------------
def test_case(data_path):
    df=pd.read_csv(data_path)
    print(f"\n📂 Dataset: {data_path}")
    print(f"Target column: {df.columns[-1]}")
    df_processed,_=preprocess_data(df)
    dataset=torch.tensor(df_processed.values,dtype=torch.float32)
    cols=list(df_processed.columns)

    total=len(dataset); train_size=int(0.8*total)
    torch.manual_seed(42); idx=torch.randperm(total); data_shuffled=dataset[idx]
    train_data,test_data=data_shuffled[:train_size],data_shuffled[train_size:]

    # No tree construction
    tree = None

    # Evaluate
    X_test,y_test=test_data[:,:-1],test_data[:,-1]
    preds=predict_batch(tree,X_test,cols)
    acc=calculate_accuracy(y_test,preds)
    p_w,r_w,f1_w=calculate_precision_recall_f1(y_test,preds,average='weighted')
    p_m,r_m,f1_m=calculate_precision_recall_f1(y_test,preds,average='macro')
    comp=calculate_tree_complexity_metrics(tree)

    print(f"Accuracy: {acc:.4f} ({acc*100:.2f}%)")
    print(f"Precision (weighted): {p_w:.4f} | Recall: {r_w:.4f} | F1: {f1_w:.4f}")
    print(f"Precision (macro): {p_m:.4f} | Recall: {r_m:.4f} | F1: {f1_m:.4f}")
    print(f"Tree Depth: {comp['max_depth']} | Nodes: {comp['num_nodes']} | Leaves: {comp['num_leaves']}\n")

    return {
        "Dataset": data_path.split("/")[-1],
        "Accuracy": round(acc,4),
        "Precision_w": round(p_w,4),
        "Recall_w": round(r_w,4),
        "F1_w": round(f1_w,4),
        "Depth": comp['max_depth'],
        "Nodes": comp['num_nodes']
    }

# ----------------- Main -----------------
if __name__=="__main__":
    datasets=["/content/mushrooms.csv","/content/tictactoe.csv","/content/Nursery.csv"]
    results=[]
    for d in datasets: results.append(test_case(d))
    print("\n📊 SUMMARY TABLE")
    print(pd.DataFrame(results).to_string(index=False))


📂 Dataset: /content/mushrooms.csv
Target column: class
Accuracy: 0.3114 (31.14%)
Precision (weighted): 0.4558 | Recall: 0.3114 | F1: 0.3666
Precision (macro): 0.1303 | Recall: 0.0890 | F1: 0.1048
Tree Depth: 0 | Nodes: 0 | Leaves: 0


📂 Dataset: /content/tictactoe.csv
Target column: Class
Accuracy: 0.2865 (28.65%)
Precision (weighted): 0.5250 | Recall: 0.2865 | F1: 0.3705
Precision (macro): 0.3236 | Recall: 0.1792 | F1: 0.2305
Tree Depth: 0 | Nodes: 0 | Leaves: 0


📂 Dataset: /content/Nursery.csv
Target column: class
Accuracy: 0.4811 (48.11%)
Precision (weighted): 0.4854 | Recall: 0.4811 | F1: 0.4830
Precision (macro): 0.2895 | Recall: 0.4865 | F1: 0.2883
Tree Depth: 0 | Nodes: 0 | Leaves: 0


📊 SUMMARY TABLE
      Dataset  Accuracy  Precision_w  Recall_w   F1_w  Depth  Nodes
mushrooms.csv    0.3114       0.4558    0.3114 0.3666      0      0
tictactoe.csv    0.2865       0.5250    0.2865 0.3705      0      0
  Nursery.csv    0.4811       0.4854    0.4811 0.4830      0      0
