In [None]:
!pip install kmodes

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting kmodes
  Downloading kmodes-0.12.2-py2.py3-none-any.whl (20 kB)
Installing collected packages: kmodes
Successfully installed kmodes-0.12.2


In [None]:
#create dataset
import numpy as np 
import pandas as pd

operating_systems = ["Android","iOS"]
isp_names = ["Cox","HughesNet","Xfinity","AT&T"]

data = []
for i in range(100):
    row = []
    row.append(np.random.choice(operating_systems)) #OS
    row.append(np.random.choice(isp_names)) #ISP
    row.append(np.random.poisson(lam=25)) #Age 
    row.append(np.random.uniform(low=0.5, high=1000)) #Time Spent
    data.append(row)

customers = pd.DataFrame(data, columns = ['OS', 'ISP','Age','Time Spent'])

In [None]:
customers

Unnamed: 0,OS,ISP,Age,Time Spent
0,iOS,Cox,21,388.992612
1,iOS,AT&T,24,383.108267
2,iOS,HughesNet,25,636.622502
3,iOS,HughesNet,25,232.535563
4,iOS,HughesNet,33,333.372878
...,...,...,...,...
95,iOS,AT&T,33,264.356217
96,iOS,HughesNet,18,786.172338
97,Android,AT&T,17,426.254712
98,Android,AT&T,20,402.631826


In [None]:
from sklearn import preprocessing
customers_norm = customers.copy()
scaler = preprocessing.MinMaxScaler()
customers_norm[['Age','Time Spent']] = scaler.fit_transform(customers_norm[['Age','Time Spent']])

In [None]:
customers_norm.head()

Unnamed: 0,OS,ISP,Age,Time Spent
0,iOS,Cox,0.28,0.391904
1,iOS,AT&T,0.4,0.385866
2,iOS,HughesNet,0.44,0.646011
3,iOS,HughesNet,0.44,0.231355
4,iOS,HughesNet,0.76,0.334829


In [None]:
from kmodes.kprototypes import KPrototypes
kproto = KPrototypes(n_clusters=3, init='Cao')
clusters = kproto.fit_predict(customers_norm, categorical=[0, 1])


In [None]:
clusters

array([2, 0, 1, 2, 2, 1, 1, 2, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 2, 1, 0, 2,
       1, 1, 0, 2, 0, 0, 0, 0, 2, 0, 1, 2, 2, 2, 0, 0, 1, 1, 2, 2, 2, 1,
       2, 1, 1, 0, 1, 1, 1, 0, 2, 1, 0, 2, 1, 0, 0, 1, 1, 1, 1, 0, 2, 1,
       0, 1, 1, 0, 0, 0, 1, 0, 2, 2, 1, 0, 0, 2, 0, 1, 1, 0, 0, 2, 1, 2,
       1, 0, 1, 1, 2, 2, 1, 2, 1, 0, 0, 1], dtype=uint16)

In [None]:
#join data with labels 
labels = pd.DataFrame(clusters)
labeledCustomers = pd.concat((customers,labels),axis=1)
labeledCustomers = labeledCustomers.rename({0:'labels'},axis=1)

In [None]:
labeledCustomers

Unnamed: 0,OS,ISP,Age,Time Spent,labels
0,iOS,Cox,21,388.992612,2
1,iOS,AT&T,24,383.108267,0
2,iOS,HughesNet,25,636.622502,1
3,iOS,HughesNet,25,232.535563,2
4,iOS,HughesNet,33,333.372878,2
...,...,...,...,...,...
95,iOS,AT&T,33,264.356217,2
96,iOS,HughesNet,18,786.172338,1
97,Android,AT&T,17,426.254712,0
98,Android,AT&T,20,402.631826,0


In [None]:
labeledCustomers['ISP'].unique()

array(['Cox', 'AT&T', 'HughesNet', 'Xfinity'], dtype=object)

In [None]:
# one hot encoding

In [None]:
customers_norm = pd.get_dummies(customers_norm, columns=["OS","ISP"])

In [None]:
customers_norm.head()

Unnamed: 0,Age,Time Spent,OS_Android,OS_iOS,ISP_AT&T,ISP_Cox,ISP_HughesNet,ISP_Xfinity
0,0.28,0.391904,0,1,0,1,0,0
1,0.4,0.385866,0,1,1,0,0,0
2,0.44,0.646011,0,1,0,0,1,0
3,0.44,0.231355,0,1,0,0,1,0
4,0.76,0.334829,0,1,0,0,1,0


In [None]:
from sklearn.cluster import KMeans
kmeans = KMeans(3)
clusters = kmeans.fit_predict(customers_norm)
labels = pd.DataFrame(clusters)
labeledCustomers = pd.concat((customers,labels),axis=1)
labeledCustomers = labeledCustomers.rename({0:'labels'},axis=1)

In [None]:
labeledCustomers.head()

Unnamed: 0,OS,ISP,Age,Time Spent,labels
0,iOS,Cox,21,388.992612,1
1,iOS,AT&T,24,383.108267,1
2,iOS,HughesNet,25,636.622502,2
3,iOS,HughesNet,25,232.535563,2
4,iOS,HughesNet,33,333.372878,2


In [None]:
labeledCustomers['labels'].unique()

array([1, 2, 0], dtype=int32)

In [None]:
!pip install mglearn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mglearn
  Downloading mglearn-0.1.9.tar.gz (540 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m540.1/540.1 KB[0m [31m33.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: mglearn
  Building wheel for mglearn (setup.py) ... [?25l[?25hdone
  Created wheel for mglearn: filename=mglearn-0.1.9-py2.py3-none-any.whl size=582637 sha256=fafc67052da31e2e739f44614c11642af567df04a3ecc41fa4a8b52a9d8d7f4c
  Stored in directory: /root/.cache/pip/wheels/87/75/37/404e66d0c4bad150f101c9a0914b11a8eccc2681559936e7f7
Successfully built mglearn
Installing collected packages: mglearn
Successfully installed mglearn-0.1.9


In [None]:
import mglearn
mglearn.plots.plot_kmeans_algorithm()

TypeError: ignored