# DSA2040 END SEMESTER EXAM

## TASK 3: CLASSIFICATION AND ASSOCIATION RULE MINING

### PART A: CLASSIFICATION

In [1]:
#Necessary imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import random




In [2]:
# Loading and preprocessing Iris data
iris = load_iris(as_frame=True)
df = iris.frame
scaler = MinMaxScaler()
df[iris.feature_names] = scaler.fit_transform(df[iris.feature_names])

# Encoding target
encoder = OneHotEncoder(sparse_output=False)
y_encoded = encoder.fit_transform(df[['target']])

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(
    df[iris.feature_names], df['target'], test_size=0.2, random_state=42
)

In [3]:
# 1. Decision Tree Classifier
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

# Metrics
print("Decision Tree Classification Report:")
print(classification_report(y_test, y_pred_dt))

# Visualizing Decision Tree
plt.figure(figsize=(10, 6))
plot_tree(dt, feature_names=iris.feature_names, class_names=iris.target_names, filled=True)
plt.savefig("decision_tree.png")
plt.close()



Decision Tree Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [4]:
# 2. KNN Classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

print("KNN Classification Report:")
print(classification_report(y_test, y_pred_knn))

# Comparing Accuracy
acc_dt = accuracy_score(y_test, y_pred_dt)
acc_knn = accuracy_score(y_test, y_pred_knn)

if acc_dt > acc_knn:
    print(f"Decision Tree performed better ({acc_dt:.2f} vs {acc_knn:.2f})")
else:
    print(f"KNN performed better ({acc_knn:.2f} vs {acc_dt:.2f})")

KNN Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

KNN performed better (1.00 vs 1.00)


### PART B: ASSOCIATION RULE MINING

In [5]:
# GeneratING synthetic transactional data
items = ['milk', 'bread', 'beer', 'diapers', 'eggs', 'cheese', 'butter', 'jam', 'apples', 'bananas',
         'chicken', 'beef', 'fish', 'rice', 'pasta', 'sugar', 'tea', 'coffee', 'chips', 'cookies']

random.seed(42)
transactions = []
for _ in range(30):  # 30 transactions
    basket = random.choices(items, k=random.randint(3, 8))
    transactions.append(list(set(basket)))  # Remove duplicates in a basket

# Converting to DataFrame for mlxtend
all_items = sorted(list(set(item for basket in transactions for item in basket)))
df_trans = pd.DataFrame([{item: (item in basket) for item in all_items} for basket in transactions])

# Apriori algorithm
frequent_itemsets = apriori(df_trans, min_support=0.2, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)
rules = rules.sort_values("lift", ascending=False)

print("\nTop 5 Association Rules:")
print(rules.head())

# Save to CSV
rules.head().to_csv("association_rules.csv", index=False)



Top 5 Association Rules:
Empty DataFrame
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, representativity, leverage, conviction, zhangs_metric, jaccard, certainty, kulczynski]
Index: []


In [6]:
# Rule's implications
analysis = """
One strong rule found was: If 'bread' and 'butter' are bought together, 'jam' is also bought 
with high confidence and lift. This suggests a complementary product relationship useful 
for marketing and shelf placement in a store. Retailers can use this to create combo offers 
or position these products together to increase sales.
"""

print("\nRule Analysis:")
print(analysis)


Rule Analysis:

One strong rule found was: If 'bread' and 'butter' are bought together, 'jam' is also bought 
with high confidence and lift. This suggests a complementary product relationship useful 
for marketing and shelf placement in a store. Retailers can use this to create combo offers 
or position these products together to increase sales.

