<a href="https://colab.research.google.com/github/srigokulavishnu/MLlab/blob/main/ID3_(Small_Dataset).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import math
from collections import Counter
import pprint

df = pd.read_csv("job_data.csv")
data = df.to_dict(orient="records")

def discretize_cgpa(cgpa):
    try:
        cgpa = float(cgpa)
    except (ValueError, TypeError):
        return "Unknown"
    if cgpa >= 9:
        return "High"
    elif cgpa >= 8:
        return "Medium"
    else:
        return "Low"

for record in data:
    record["CGPA"] = discretize_cgpa(record["CGPA"])
    record["Interactive"] = str(record["Interactive"]).strip().title()
    record["Practical"] = str(record["Practical"]).strip().upper()
    record["Communication"] = str(record["Communication"]).strip().upper()
    record["JobOffer"] = str(record["JobOffer"]).strip().title()

def entropy(data_subset):
    labels = [record["JobOffer"] for record in data_subset]
    total = len(labels)
    counts = Counter(labels)
    ent = 0.0
    for count in counts.values():
        p = count / total
        ent -= p * math.log2(p)
    return ent

def info_gain(data_subset, attribute):
    total_entropy = entropy(data_subset)
    values = set(record[attribute] for record in data_subset)
    weighted_entropy = 0.0
    total = len(data_subset)
    for val in values:
        subset = [record for record in data_subset if record[attribute] == val]
        weighted_entropy += (len(subset) / total) * entropy(subset)
    return total_entropy - weighted_entropy

def majority_class(data_subset):
    labels = [record["JobOffer"] for record in data_subset]
    return Counter(labels).most_common(1)[0][0]

def id3(data_subset, attributes):
    labels = [record["JobOffer"] for record in data_subset]
    if len(set(labels)) == 1:
        return labels[0]
    if not attributes:
        return majority_class(data_subset)
    gains = [(attr, info_gain(data_subset, attr)) for attr in attributes]
    best_attr, best_gain = max(gains, key=lambda x: x[1])
    if best_gain == 0:
        return majority_class(data_subset)
    tree = {best_attr: {}}
    values = set(record[best_attr] for record in data_subset)
    for val in values:
        subset = [record for record in data_subset if record[best_attr] == val]
        if not subset:
            tree[best_attr][val] = majority_class(data_subset)
        else:
            remaining_attrs = [a for a in attributes if a != best_attr]
            tree[best_attr][val] = id3(subset, remaining_attrs)
    return tree

attributes = ["CGPA", "Interactive", "Practical", "Communication"]
decision_tree = id3(data, attributes)

def predict(tree, sample):
    if not isinstance(tree, dict):
        return tree
    attribute = next(iter(tree))
    attribute_value = sample.get(attribute)
    if attribute_value in tree[attribute]:
        return predict(tree[attribute][attribute_value], sample)
    else:
        return majority_class(data)

cgpa_input = float(input("\nEnter your CGPA: "))
interactive_input = input("Are you interactive? (Yes/No): ").strip().title()
practical_input = input("Your practical knowledge? (VG/AVG/Good): ").strip().upper()
communication_input = input("Your communication level? (G/M/P): ").strip().upper()

new_sample = {
    "CGPA": discretize_cgpa(cgpa_input),
    "Interactive": interactive_input,
    "Practical": practical_input,
    "Communication": communication_input
}

prediction = predict(decision_tree, new_sample)
if prediction and prediction.strip().lower() in ["yes", "y"]:
    print("\nYou will get the job.")
else:
    print("\nYou will not get the job.")



Enter your CGPA: 9
Are you interactive? (Yes/No): no
Your practical knowledge? (VG/AVG/Good): good
Your communication level? (G/M/P): p

You will not get the job.
