<a href="https://colab.research.google.com/github/ridusogie/ecommerce-PBA/blob/main/Clustering_example_of_unsupervised_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import numpy as np

# Step 1: Create the dataset
data = {
    "Food": [
        "Apple", "Deep dish pizza", "Carrot", "Potato Chip", "Lettuce", "Chocolate", "Pretzel", "Tomato", "Orange", "Strawberry",
        "Popcorn", "Cucumber", "Celery", "Bread", "Ice cream", "Almonds", "Orange juice", "Grapes", "Peanuts", "Crackers",
        "Blueberries", "Avocado", "Corn", "Rice Pudding", "Jelly", "Baked salmon", "Fried shrimp", "Watermelon", "Banana", "Bacon"
    ],
    "Sweet": [1, 0, 0, 0, 0, 1, 0, 0, 1, 1,
              0, 0, 0, 0, 1, 0, 1, 1, 0, 0,
              1, 0, 0, 1, 1, 0, 0, 1, 1, 0],
    "Crunchy": [1, 0, 1, 1, 1, 0, 1, 0, 1, 1,
                1, 1, 1, 0, 0, 1, 0, 1, 1, 1,
                1, 0, 1, 0, 0, 0, 0, 0, 0, 0],
    "Salty": [0, 1, 0, 1, 0, 0, 1, 0, 0, 0,
              1, 0, 0, 1, 0, 0, 0, 0, 1, 1,
              0, 0, 1, 0, 0, 1, 1, 0, 0, 1],
    "Vegetable": [0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
                  0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
                  0, 1, 1, 0, 0, 0, 0, 0, 0, 0]
}
df = pd.DataFrame(data)

# Step 2: Prepare the features
features = df[["Sweet", "Crunchy", "Salty", "Vegetable"]]

# Standardize the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Print the standardized features
print("Standardized Features:")
print(pd.DataFrame(scaled_features, columns=features.columns))

# Step 3: Apply K-Means clustering
kmeans = KMeans(n_clusters=5, random_state=42, init='k-means++', n_init=1)  # Ensuring reproducibility

# Generate initial centroids before fitting
np.random.seed(42)  # Ensuring consistent results
initial_centroids = scaled_features[np.random.choice(len(scaled_features), 3, replace=False)]

# Print the starting centroids before fitting the model
print("\nStarting Centroids (Before Fitting):")
print(pd.DataFrame(initial_centroids, columns=features.columns))

# Fit the model and assign clusters
df["Cluster"] = kmeans.fit_predict(scaled_features)

# Print cluster centroids
centroids = pd.DataFrame(kmeans.cluster_centers_, columns=features.columns)
print("\nFinal Cluster Centroids:")
print(centroids)
print("\n")

# Step 4: Display clusters and their items
print("Final clusters")
clusters = {}
for cluster in df["Cluster"].unique():
    clusters[cluster] = df[df["Cluster"] == cluster]["Food"].tolist()

for cluster_id, foods in clusters.items():
    print(f"Cluster {cluster_id}: {', '.join(foods)}")


Standardized Features:
       Sweet   Crunchy     Salty  Vegetable
0   1.224745  0.935414 -0.760886  -0.551677
1  -0.816497 -1.069045  1.314257  -0.551677
2  -0.816497  0.935414 -0.760886   1.812654
3  -0.816497  0.935414  1.314257  -0.551677
4  -0.816497  0.935414 -0.760886   1.812654
5   1.224745 -1.069045 -0.760886  -0.551677
6  -0.816497  0.935414  1.314257  -0.551677
7  -0.816497 -1.069045 -0.760886   1.812654
8   1.224745  0.935414 -0.760886  -0.551677
9   1.224745  0.935414 -0.760886  -0.551677
10 -0.816497  0.935414  1.314257  -0.551677
11 -0.816497  0.935414 -0.760886   1.812654
12 -0.816497  0.935414 -0.760886   1.812654
13 -0.816497 -1.069045  1.314257  -0.551677
14  1.224745 -1.069045 -0.760886  -0.551677
15 -0.816497  0.935414 -0.760886  -0.551677
16  1.224745 -1.069045 -0.760886  -0.551677
17  1.224745  0.935414 -0.760886  -0.551677
18 -0.816497  0.935414  1.314257  -0.551677
19 -0.816497  0.935414  1.314257  -0.551677
20  1.224745  0.935414 -0.760886  -0.551677
21 -0.816