In [1]:
# Import the modules
import pandas as pd
from pathlib import Path
import hvplot.pandas
from sklearn.cluster import KMeans

In [2]:
# Read in the CSV file as a Pandas DataFrame
service_ratings_df = pd.read_csv(
    Path("../Resources/service_ratings.csv")
)

# Review the DataFrame
service_ratings_df.head()

Unnamed: 0,mobile_app_rating,personal_banker_rating
0,3.5,2.4
1,3.65,3.14
2,2.9,2.75
3,2.93,3.36
4,2.89,2.62


In [3]:
# Create a a list to store inertia values and the values of k
inertia = []
k = list(range(1, 11))

In [4]:
# Create a for-loop where each value of k is evaluated using the K-means algorithm
# Fit the model using the service_ratings DataFrame
# Append the value of the computed inertia from the `inertia_` attribute of the KMeans model instance
for i in k:
    k_model = KMeans(n_clusters=i, random_state=1)
    k_model.fit(service_ratings_df)
    inertia.append(k_model.inertia_)

In [5]:
# Define a DataFrame to hold the values for k and the corresponding inertia
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)

# Review the DataFrame
df_elbow.head()

Unnamed: 0,k,inertia
0,1,122.472196
1,2,77.08019
2,3,58.44347
3,4,42.595218
4,5,34.6247


In [6]:
# Plot the DataFrame
df_elbow.hvplot.line(
    x="k", 
    y="inertia", 
    title="Elbow Curve", 
    xticks=k
)

## Using k-means with four clusters

In [7]:
# Define the model with 4 clusters
model = KMeans(n_clusters=4, random_state=1)

# Fit the model
model.fit(service_ratings_df)

# Make predictions
k_4 = model.predict(service_ratings_df)

# Create a copy of the DataFrame
service_ratings_predictions_df = service_ratings_df.copy()

# Add a class column with the labels
service_ratings_predictions_df['customer_segment'] = k_4

In [8]:
# Plot the clusters
service_ratings_predictions_df.hvplot.scatter(
    x="mobile_app_rating",
    y="personal_banker_rating",
    by="customer_segment"
)