<a href="https://colab.research.google.com/github/nishan2001/Data-Warehousing-and-data-mining/blob/main/DWDM_Lab3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##ID3 Algorithm


In [1]:
# Write a program to implement ID3.
# (use the attached laptop_buy_data.csv)

import pandas as pd
import math
from collections import Counter
from google.colab import files
import io
import pprint


uploaded = files.upload()


filename = next(iter(uploaded))
data = pd.read_csv(io.BytesIO(uploaded[filename]))


def entropy(target_col):
    values = target_col.value_counts(normalize=True)
    return -sum(p * math.log2(p) for p in values)


def info_gain(data, split_attr, target_attr):
    total_entropy = entropy(data[target_attr])
    values = data[split_attr].unique()
    weighted_entropy = 0
    for value in values:
        subset = data[data[split_attr] == value]
        weight = len(subset) / len(data)
        weighted_entropy += weight * entropy(subset[target_attr])
    return total_entropy - weighted_entropy


def id3(data, features, target_attr):

    if len(data[target_attr].unique()) == 1:
        return data[target_attr].iloc[0]
    if len(features) == 0:
        return data[target_attr].mode()[0]

    gains = {feature: info_gain(data, feature, target_attr) for feature in features}
    best_feature = max(gains, key=gains.get)

    tree = {best_feature: {}}
    for value in data[best_feature].unique():
        subset = data[data[best_feature] == value]
        subtree = id3(subset, [f for f in features if f != best_feature], target_attr)
        tree[best_feature][value] = subtree

    return tree


features = list(data.columns)
features.remove("Class")
decision_tree = id3(data, features, "Class")


pprint.pprint(decision_tree)


Saving laptop_buy_data.csv to laptop_buy_data.csv
{'Age': {'Middle_Aged': {'Income': {'High': 'No',
                                    'Low': {'Credit_Rating': {'Excellent': 'No',
                                                              'Fair': {'Student': {'Yes': 'Buy'}}}},
                                    'Medium': 'No'}},
         'Senior': {'Credit_Rating': {'Excellent': {'Income': {'High': {'Student': {'No': 'Buy'}},
                                                               'Low': 'Buy',
                                                               'Medium': 'Buy'}},
                                      'Fair': 'No'}},
         'Youth': {'Student': {'No': {'Credit_Rating': {'Excellent': 'No',
                                                        'Fair': 'Buy'}},
                               'Yes': 'Buy'}}}}


##Naive Bayesian Algorithm


In [None]:
# Write a program to implement Naive Bayesian algorithm.
# (use the attached laptop_buy_data.csv)
import pandas as pd
from google.colab import files
import io
from collections import defaultdict


uploaded = files.upload()
filename = next(iter(uploaded))
data = pd.read_csv(io.BytesIO(uploaded[filename]))


X = data.drop('Class', axis=1)
y = data['Class']
features = X.columns
classes = y.unique()


class_probs = y.value_counts(normalize=True).to_dict()  # P(Class)
feature_probs = defaultdict(lambda: defaultdict(lambda: defaultdict(float)))


for feature in features:
    for class_val in classes:
        subset = data[data['Class'] == class_val]
        value_counts = subset[feature].value_counts()
        total = len(subset)
        for val, count in value_counts.items():
            feature_probs[feature][val][class_val] = count / total


def predict(instance):
    probs = {}
    for class_val in classes:
        prob = class_probs[class_val]
        for feature in features:
            val = instance[feature]
            prob *= feature_probs[feature][val].get(class_val, 1e-6)
        probs[class_val] = prob
    return max(probs, key=probs.get)


predictions = X.apply(predict, axis=1)
results = pd.DataFrame({'Actual': y, 'Predicted': predictions})
print(results)

accuracy = (results['Actual'] == results['Predicted']).mean()
print(f"\nAccuracy: {accuracy:.2%}")


Saving laptop_buy_data.csv to laptop_buy_data (4).csv
   Actual Predicted
0     Buy       Buy
1      No       Buy
2      No        No
3      No        No
4      No        No
5     Buy        No
6      No        No
7     Buy       Buy
8      No        No
9     Buy       Buy
10    Buy       Buy
11     No        No
12     No        No
13     No       Buy
14    Buy       Buy
15    Buy       Buy
16    Buy        No
17    Buy        No
18     No        No
19     No        No

Accuracy: 75.00%


##classification by Backpropagation

In [None]:
# Write a to implement classification by backpropagation on following data.

# X1    X2     t
# -1    -1    -1
# -1     1     1
#  1    -1     1
#  1     1    -1


import numpy as np


def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

X = np.array([
    [-1, -1],
    [-1,  1],
    [ 1, -1],
    [ 1,  1]
])


y = np.array([[0], [1], [1], [0]])

# Seed
np.random.seed(42)


input_size = 2
hidden_size = 4
output_size = 1
learning_rate = 0.1
epochs = 10000

# Weights
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))

# Training loop
for epoch in range(epochs):
    # Forward pass
    z1 = X.dot(W1) + b1
    a1 = sigmoid(z1)
    z2 = a1.dot(W2) + b2
    output = sigmoid(z2)

    loss = np.mean((y - output) ** 2)

    # Backpropagation
    d_output = (y - output) * sigmoid_derivative(output)
    d_hidden = d_output.dot(W2.T) * sigmoid_derivative(a1)

    # Update weights and biases
    W2 += a1.T.dot(d_output) * learning_rate
    b2 += np.sum(d_output, axis=0, keepdims=True) * learning_rate
    W1 += X.T.dot(d_hidden) * learning_rate
    b1 += np.sum(d_hidden, axis=0, keepdims=True) * learning_rate

    if epoch % 2000 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

predictions = output
print("\nPredictions:")
for i in range(len(X)):
    print(f"Input: {X[i]}, Predicted: {predictions[i][0]:.4f}, Actual: {y[i][0]}")


Epoch 0, Loss: 0.2690
Epoch 2000, Loss: 0.0430
Epoch 4000, Loss: 0.0055
Epoch 6000, Loss: 0.0027
Epoch 8000, Loss: 0.0017

Predictions:
Input: [-1 -1], Predicted: 0.0439, Actual: 0
Input: [-1  1], Predicted: 0.9745, Actual: 1
Input: [ 1 -1], Predicted: 0.9588, Actual: 1
Input: [1 1], Predicted: 0.0289, Actual: 0
