In [None]:
import pandas as pd
import seaborn as sns

In [None]:
# Load the dataset
home_data = pd.read_csv('housing.csv', usecols = ['longitude','latitude','median_house_value'])
home_data.head()

In [None]:
# Create a scatterplot using Seaborn to visualize data
sns.scatterplot(data = home_data, x ='longitude', y = 'latitude', hue = 'median_house_value')

In [None]:
# Split the dataset into training and test sets
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(home_data[['latitude','longitude']], home_data[['median_house_value']], test_size = 0.33, random_state = 0)

In [None]:
# Normalize the data
from sklearn import preprocessing
X_train_norm = preprocessing.normalize(X_train)
X_test_norm = preprocessing.normalize(X_test)

In [None]:
from sklearn.cluster import KMeans

# Create a K-Means clustering model
kmeans = KMeans(n_clusters = 4, random_state = 0, n_init = 'auto')
kmeans.fit(X_train_norm)

In [None]:
# Create a scatterplot using Seaborn to visualize data
sns.scatterplot(data = X_train, x = 'longitude', y = 'latitude', hue = kmeans.labels_)

In [None]:
from sklearn.metrics import silhouette_score

# Create a dictionary to store Silhouette Scores for different k values
sil = {}
sil_test = {}

# Range of k values to consider
k_values = range(2, 8)

for k in k_values:
    kmeans = KMeans(n_clusters = k, random_state = 0, n_init='auto')
    kmeans.fit(X_train_norm)

    # Get cluster labels for the test set (X_test_norm)
    labels_test = kmeans.predict(X_test_norm)
    sil_test[k] = silhouette_score(X_test_norm, labels_test, metric = 'euclidean')

    print("Silhouette Score for k on test set =", k, "is", sil_test[k])
    print("")

# Find the k with the maximum Silhouette Score
best_k = max(sil_test, key = sil_test.get)

In [None]:
print("Best k:", best_k)
print("Silhouette Score for best k:", sil_test[best_k])