In [1]:
import numpy as np
import pandas as pd
from math import log2

# ===============================
# Section G: Dataset Generation
# ===============================

# Student ID as random seed
SEED = 22424601
np.random.seed(SEED)

# Define categorical feature values (2â€“3 values each)
water_type = ['Freshwater', 'Brackish']
fish_size = ['Small', 'Medium', 'Large']
capture_method = ['Net', 'Trap']

# Binary class label
class_label = ['Yes', 'No']

# Generate 24 instances
data = {
    'Water_Type': np.random.choice(water_type, 24),
    'Fish_Size': np.random.choice(fish_size, 24),
    'Capture_Method': np.random.choice(capture_method, 24),
    'Market_Accepted': np.random.choice(class_label, 24)
}

df = pd.DataFrame(data)

# Display dataset
df


Unnamed: 0,Water_Type,Fish_Size,Capture_Method,Market_Accepted
0,Freshwater,Large,Trap,No
1,Freshwater,Large,Net,Yes
2,Brackish,Large,Trap,Yes
3,Freshwater,Small,Net,No
4,Brackish,Medium,Trap,No
5,Brackish,Small,Trap,Yes
6,Brackish,Small,Trap,Yes
7,Brackish,Medium,Trap,No
8,Freshwater,Medium,Net,No
9,Freshwater,Large,Trap,No


In [2]:
# ===============================
# Entropy Computation
# ===============================

def entropy(labels):
    values, counts = np.unique(labels, return_counts=True)
    probabilities = counts / counts.sum()
    return -sum(p * log2(p) for p in probabilities)

class_entropy = entropy(df['Market_Accepted'])

class_entropy


np.float64(0.9949848281859701)

In [3]:
# ===============================
# Modify One Data Instance
# ===============================

# Make a copy to preserve original dataset
df_modified = df.copy()

# Display original value
print("Original instance:")
print(df_modified.loc[0])

# Modify ONE instance (flip the class label)
df_modified.loc[0, 'Market_Accepted'] = (
    'Yes' if df_modified.loc[0, 'Market_Accepted'] == 'No' else 'No'
)

print("\nModified instance:")
print(df_modified.loc[0])


Original instance:
Water_Type         Freshwater
Fish_Size               Large
Capture_Method           Trap
Market_Accepted            No
Name: 0, dtype: object

Modified instance:
Water_Type         Freshwater
Fish_Size               Large
Capture_Method           Trap
Market_Accepted           Yes
Name: 0, dtype: object


In [4]:
# ===============================
# Recompute Entropy After Modification
# ===============================

modified_entropy = entropy(df_modified['Market_Accepted'])

print("Original Class Entropy:", class_entropy)
print("Modified Class Entropy:", modified_entropy)


Original Class Entropy: 0.9949848281859701
Modified Class Entropy: 1.0


In [6]:
# ===============================
# Information Gain Computation
# ===============================

def information_gain(df, feature, target='Market_Accepted'):
    total_entropy = entropy(df[target])
    values, counts = np.unique(df[feature], return_counts=True)

    conditional_entropy = 0
    for v, c in zip(values, counts):
        subset = df[df[feature] == v][target]
        conditional_entropy += (c / len(df)) * entropy(subset)

    return total_entropy - conditional_entropy


In [7]:
features = ['Water_Type', 'Fish_Size', 'Capture_Method']

print("Information Gain BEFORE modification:")
for f in features:
    print(f, ":", information_gain(df, f))

print("\nInformation Gain AFTER modification:")
for f in features:
    print(f, ":", information_gain(df_modified, f))


Information Gain BEFORE modification:
Water_Type : 0.13019784892915887
Fish_Size : 0.05132037498793407
Capture_Method : 3.527804636371812e-05

Information Gain AFTER modification:
Water_Type : 0.08170416594551044
Fish_Size : 0.09369375314528727
Capture_Method : 0.005050449860393602
