In [None]:
# k-means algorithm on synthetic data blob module

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
from sklearn.preprocessing import MinMaxScaler

def generate_data(n_samples=300, random_state=42):
    X, y = make_blobs(n_samples=n_samples, n_features=6, centers=3, random_state=random_state)
    scaler = MinMaxScaler(feature_range=(10, 150))
    X = scaler.fit_transform(X)
    df = pd.DataFrame(X, columns=["Annual_Income", "Spending_Score", "Age", "Savings", "Debt", "Credit_Score"])
    df["Customer_Segment"] = y
    return df

def apply_kmeans(X, n_clusters=3):
    kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
    kmeans.fit(X)
    return kmeans.labels_, kmeans.cluster_centers_

def plot_clusters(X, labels, centers):
    plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', marker='o', edgecolors='k')
    plt.scatter(centers[:, 0], centers[:, 1], c='red', marker='X', s=200, label='Centroids')
    plt.title('Customer Segmentation using K-Means')
    plt.xlabel('Annual Income (in $1000s)')
    plt.ylabel('Spending Score')
    plt.legend()
    plt.show()

dataset = generate_data()
print(dataset.head())

labels, centers = apply_kmeans(dataset.iloc[:, :-1].values)

plot_clusters(dataset.iloc[:, [0, 1]].values, labels, centers[:, :2])




In [None]:
# agglomerative clustering synthetic data blobs module

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering
from sklearn.datasets import make_blobs
from sklearn.preprocessing import MinMaxScaler
from scipy.cluster.hierarchy import dendrogram, linkage

def generate_data(n_samples=100, random_state=42):
    X, y = make_blobs(n_samples=n_samples, n_features=6, centers=3, random_state=random_state)
    scaler = MinMaxScaler(feature_range=(10, 15))
    X = scaler.fit_transform(X)
    df = pd.DataFrame(X, columns=["Annual_Income", "Spending_Score", "Age", "Savings", "Debt", "Credit_Score"])
    df["Customer_Segment"] = y
    return df

def apply_agglomerative_clustering(X, n_clusters=3):
    agglo = AgglomerativeClustering(n_clusters=n_clusters)
    labels = agglo.fit_predict(X)
    return labels

def plot_clusters(X, labels):
    plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', marker='o', edgecolors='k')
    plt.title('Customer Segmentation using Agglomerative Clustering')
    plt.xlabel('Annual Income (in $1000s)')
    plt.ylabel('Spending Score')
    plt.show()

def plot_dendrogram(X):
    linked = linkage(X, method='ward')
    plt.figure(figsize=(10, 8))
    dendrogram(linked)
    plt.title('Hierarchical Clustering Dendrogram')
    plt.xlabel('Data Points')
    plt.ylabel('Euclidean Distance')
    plt.show()

dataset = generate_data()
print(dataset.head())

labels_agglo = apply_agglomerative_clustering(dataset.iloc[:, :-1].values)
plot_clusters(dataset.iloc[:, [0, 1]].values, labels_agglo)

plot_dendrogram(dataset.iloc[:, :-1].values)


In [None]:
# optimal no of clusters using elbow and apply k-means

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, precision_score
from scipy.stats import mode

df = pd.read_csv("/content/Mall_Customers.csv")
print(df.head())

X = df[['Annual Income (k$)', 'Spending Score (1-100)']].values

wcss = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    kmeans.fit(X)
    wcss.append(kmeans.inertia_)

plt.figure(figsize=(8, 5))
plt.plot(range(1, 11), wcss, marker='o', linestyle='--')
plt.xlabel("Number of Clusters (k)")
plt.ylabel("WCSS (Within-Cluster Sum of Squares)")
plt.title("Elbow Method for Optimal k")
plt.show()

optimal_k = np.argmax(np.diff(wcss, 2)) + 2
print(f"The optimal number of clusters is: {optimal_k}")

optimal_k = 3

kmeans = KMeans(n_clusters=optimal_k, random_state=42, n_init=10)
df["Cluster"] = kmeans.fit_predict(X)

plt.figure(figsize=(8, 5))
for i in range(optimal_k):
    plt.scatter(X[df["Cluster"] == i, 0], X[df["Cluster"] == i, 1], label=f'Cluster {i}')

plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=200, c='red', marker='X', label='Centroids')
plt.xlabel("Annual Income (k$)")
plt.ylabel("Spending Score (1-100)")
plt.title("Customer Segmentation using K-Means")
plt.legend()
plt.show()


In [None]:
# agglomerative clustering and show clusters

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AgglomerativeClustering

df = pd.read_csv("/content/penguins.csv")
print(df.head())

df_filtered = df[['flipper_length_mm', 'body_mass_g']].dropna().copy()
X = df_filtered.values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

linked = linkage(X_scaled, method='ward')

plt.figure(figsize=(10, 6))
dendrogram(linked)
plt.title('Dendrogram for Agglomerative Clustering')
plt.xlabel('Data Points')
plt.ylabel('Euclidean Distance')
plt.show()

threshold = 8

clusters = fcluster(linked, threshold, criterion='distance')

optimal_clusters = len(np.unique(clusters))
print(f"The optimal number of clusters is: {optimal_clusters}")

agglo = AgglomerativeClustering(n_clusters=optimal_clusters)
df_filtered["clusters"] = agglo.fit_predict(X_scaled)

plt.figure(figsize=(8, 5))
for cluster in range(optimal_clusters):
    plt.scatter(X[df_filtered["clusters"] == cluster, 0],
                X[df_filtered["clusters"] == cluster, 1], label=f'Cluster {cluster}')

plt.xlabel("Flipper Length (mm)")
plt.ylabel("Body Mass (g)")
plt.title("Penguin Clusters using Agglomerative Clustering")
plt.legend()
plt.show()

In [None]:
# Apriori
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

transactions = [
    ['Milk', 'Bread', 'Butter'],
    ['Bread', 'Butter'],
    ['Milk', 'Diaper', 'Beer', 'Eggs'],
    ['Milk', 'Bread', 'Diaper', 'Butter'],
    ['Bread', 'Butter', 'Diaper'],
    ['Milk', 'Bread', 'Diaper', 'Beer'],
    ['Bread', 'Butter'],
    ['Milk', 'Diaper', 'Beer', 'Cola'],
    ['Milk', 'Bread', 'Butter'],
    ['Bread', 'Diaper', 'Cola']
]

print("Transactions:\n", transactions)

te = TransactionEncoder()
te_array = te.fit(transactions).transform(transactions)

df = pd.DataFrame(te_array, columns=te.columns_)
print("\nBinary Matrix of Transactions:\n", df.head())

frequent_itemsets = apriori(df, min_support=0.3, use_colnames=True)

print("\nFrequent Itemsets:\n", frequent_itemsets)

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

print("\nAssociation Rules:\n", rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

def predict_next_item(input_items):
    input_items = set(input_items)

    matched_rules = rules[rules['antecedents'].apply(lambda x: input_items.issubset(x))]

    if matched_rules.empty:
        print("\nNo strong association found. Try with a different combination of items.")
        return None

    best_rule = matched_rules.loc[matched_rules['confidence'].idxmax()]
    predicted_item = list(best_rule['consequents'])[0]

    print(f"\nBased on your input items {input_items}, you are likely to buy '{predicted_item}' with confidence of {best_rule['confidence']:.2f}")
    return predicted_item

input_items = input("Enter the items you bought (comma separated): ").split(',')

input_items = [item.strip().capitalize() for item in input_items]

predict_next_item(input_items)


In [None]:
# FP Growth
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import fpgrowth, association_rules
from mlxtend.preprocessing import TransactionEncoder

transactions = [
    ['Milk', 'Bread', 'Butter'],
    ['Bread', 'Butter'],
    ['Milk', 'Diaper', 'Beer', 'Eggs'],
    ['Milk', 'Bread', 'Diaper', 'Butter'],
    ['Bread', 'Butter', 'Diaper'],
    ['Milk', 'Bread', 'Diaper', 'Beer'],
    ['Bread', 'Butter'],
    ['Milk', 'Diaper', 'Beer', 'Cola'],
    ['Milk', 'Bread', 'Butter'],
    ['Bread', 'Diaper', 'Cola']
]

print("Transactions:\n", transactions)

te = TransactionEncoder()
te_array = te.fit(transactions).transform(transactions)

df = pd.DataFrame(te_array, columns=te.columns_)
print("\nBinary Matrix of Transactions:\n", df.head())

frequent_itemsets = fpgrowth(df, min_support=0.3, use_colnames=True)

print("\nFrequent Itemsets:\n", frequent_itemsets)

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

print("\nAssociation Rules:\n", rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

def predict_next_item(input_items):
    input_items = set(input_items)

    matched_rules = rules[rules['antecedents'].apply(lambda x: input_items.issubset(x))]

    if matched_rules.empty:
        print("\nNo strong association found. Try with a different combination of items.")
        return None

    best_rule = matched_rules.loc[matched_rules['confidence'].idxmax()]
    predicted_item = list(best_rule['consequents'])[0]

    print(f"\nBased on your input items {input_items}, you are likely to buy '{predicted_item}' with confidence of {best_rule['confidence']:.2f}")
    return predicted_item

input_items = input("Enter the items you bought (comma separated): ").split(',')

input_items = [item.strip().capitalize() for item in input_items]

predict_next_item(input_items)


In [None]:
# Eclat
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

transactions = [
    ['Milk', 'Bread', 'Butter'],
    ['Bread', 'Butter'],
    ['Milk', 'Diaper', 'Beer', 'Eggs'],
    ['Milk', 'Bread', 'Diaper', 'Butter'],
    ['Bread', 'Butter', 'Diaper'],
    ['Milk', 'Bread', 'Diaper', 'Beer'],
    ['Bread', 'Butter'],
    ['Milk', 'Diaper', 'Beer', 'Cola'],
    ['Milk', 'Bread', 'Butter'],
    ['Bread', 'Diaper', 'Cola']
]

print("Transactions:\n", transactions)

te = TransactionEncoder()
te_array = te.fit(transactions).transform(transactions)

df = pd.DataFrame(te_array, columns=te.columns_)
print("\nBinary Matrix of Transactions:\n", df.head())

def eclat(dataset, min_support=0.3):
    itemsets = {}
    num_transactions = len(dataset)

    for item in dataset.columns:
        support = np.sum(dataset[item]) / num_transactions
        if support >= min_support:
            itemsets[frozenset([item])] = support

    items = list(itemsets.keys())
    for i in range(len(items)):
        for j in range(i + 1, len(items)):
            combined_items = items[i] | items[j]
            if combined_items not in itemsets:
                combined_support = np.sum(dataset[list(combined_items)].all(axis=1)) / num_transactions
                if combined_support >= min_support:
                    itemsets[combined_items] = combined_support

    itemsets_df = pd.DataFrame(list(itemsets.items()), columns=['itemsets', 'support'])
    return itemsets_df

frequent_itemsets = eclat(df, min_support=0.3)

print("\nFrequent Itemsets from ECLAT:\n", frequent_itemsets)

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

print("\nAssociation Rules:\n", rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

def predict_next_item(input_items):
    input_items = set(input_items)

    matched_rules = rules[rules['antecedents'].apply(lambda x: input_items.issubset(x))]

    if matched_rules.empty:
        print("\nNo strong association found. Try with a different combination of items.")
        return None

    best_rule = matched_rules.loc[matched_rules['confidence'].idxmax()]
    predicted_item = list(best_rule['consequents'])[0]

    print(f"\nBased on your input items {input_items}, you are likely to buy '{predicted_item}' with confidence of {best_rule['confidence']:.2f}")
    return predicted_item

input_items = input("Enter the items you bought (comma separated): ").split(',')

input_items = [item.strip().capitalize() for item in input_items]

predict_next_item(input_items)
