Load the dataset using pandas.

In [None]:
import pandas as pd
data = pd.read_csv('data.csv')

Explore the data to understand its structure and summary statistics.

In [None]:
print(data.describe())
print(data.info())

Calculate the correlation estimate between the features.

In [None]:
correlation_matrix = data.corr()

Visualize the correlation matrix using a heatmap.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.heatmap(correlation_matrix, annot=True)
plt.show()

Visualize raw data distributions and relationships.

In [None]:
sns.pairplot(data)
plt.show()

Standardize the features to have a mean of 0 and a variance of 1.

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

Apply K Means clustering on the standardized data.

In [None]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=3)
kmeans.fit(data_scaled)

Generate the elbow method plot to determine the optimal number of clusters.

In [None]:
inertia = []
for k in range(1, 10):
    kmeans = KMeans(n_clusters=k).fit(data_scaled)
    inertia.append(kmeans.inertia_)
plt.plot(range(1, 10), inertia)
plt.show()

Fit the final K Means model using the optimal number of clusters.

In [None]:
final_kmeans = KMeans(n_clusters=3).fit(data_scaled)

Store clustering results in a dataframe.

In [None]:
print(final_kmeans.labels_)
results = pd.DataFrame(data)
results['Cluster'] = final_kmeans.labels_

Prepare data for PCA by reducing dimensions.

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
data_pca = pca.fit_transform(data_scaled)

Fit PCA and compute explained variance.

In [None]:
explained_variance = pca.explained_variance_ratio_

Apply K Means clustering on PCA-transformed data.

In [None]:
kmeans_pca = KMeans(n_clusters=3).fit(data_pca)

Generate elbow method plot for PCA results.

In [None]:
inertia_pca = []
for k in range(1, 10):
    kmeans = KMeans(n_clusters=k).fit(data_pca)
    inertia_pca.append(kmeans.inertia_)
plt.plot(range(1, 10), inertia_pca)
plt.show()

Fit the final K Means model using PCA data.

In [None]:
final_kmeans_pca = KMeans(n_clusters=3).fit(data_pca)

Store final clustering results in a new dataframe.

In [None]:
final_results = pd.DataFrame(data_pca)
final_results['Cluster'] = final_kmeans_pca.labels_