In [None]:
#@title
# Upload: 
# concrete_compressive_strength.csv

# 8.1. K-Means Clustering

In [None]:
#@title 8.1.1. Import some necessary packages
import numpy as np
import pandas as pd
import matplotlib

from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler

from sklearn.cluster import KMeans as kmeans

#from mpl_toolkits.mplot3d import Axes3D

In [None]:
#@title 8.1.2. Data processing
data      = pd.read_csv('/content/concrete_compressive_strength.csv')
data_keys = list(data.keys())

# get all before first '(' for simplicity
keys = []
for i0 in data_keys:
  keys.append(i0[:i0.find('(')])

datain = data.iloc[:,:-1]
dataou = data.iloc[:,-1:]


datain = datain.values 
dataou = dataou.values

# no need for train and test split

scalerin = MinMaxScaler(feature_range=(0,1))
scalerin.fit(datain)

scalerou = MinMaxScaler(feature_range=(0,1))
scalerou.fit(dataou)

datain_calibrated = scalerin.transform(datain)
dataou_calibrated = scalerou.transform(dataou)

In [None]:
#@title 8.1.3. Clustering and some plots 
color_vals  = ['b','r','k','g','c','m']
marker_vals = ['o','*','s','d','.','+']

K = 3

for i0 in range(datain.shape[1]):
  for i1 in range(i0+1,datain.shape[1],1):

    data_case = np.concatenate((datain_calibrated[:,i0:i0+1],datain_calibrated[:,i1:i1+1],dataou_calibrated[:,0:]), axis = 1)

    model     = kmeans(K, max_iter = 300)
    model.fit(data_case)

    labels    = model.labels_

    fig = plt.figure(figsize = [8,8])
    ax = fig.add_subplot(111, projection='3d')
    legend_val = []
    for i2 in range(K):
      ind = labels == i2
      ax.scatter(datain_calibrated[ind,i0], datain_calibrated[ind,i1], dataou_calibrated[ind,0], s= 40, c=color_vals[i2], marker=marker_vals[i2])
      legend_val.append('Cluster {}'.format(i2+1))
    ax.set_xlabel(keys[i0], fontsize = 15)
    ax.set_ylabel(keys[i1], fontsize = 15)
    ax.set_zlabel(keys[-1], fontsize = 15)
    ax.set_title('K-Means', fontsize = 15)

    ax.legend(legend_val, fontsize = 15, loc = 'best')


# 8.2. DBSCAN Clustering

In [None]:
#@title 8.2.1. Import some necessary packages
import numpy as np
import pandas as pd
import matplotlib

from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler

from sklearn.cluster import DBSCAN as DBSCAN

#from mpl_toolkits.mplot3d import Axes3D

In [None]:
#@title 8.2.2. Data processing
data      = pd.read_csv('/content/concrete_compressive_strength.csv')
data_keys = list(data.keys())

# get all before first '(' for simplicity
keys = []
for i0 in data_keys:
  keys.append(i0[:i0.find('(')])

datain = data.iloc[:,:-1]
dataou = data.iloc[:,-1:]


datain = datain.values 
dataou = dataou.values

# no need for train and test split

scalerin = MinMaxScaler(feature_range=(0,1))
scalerin.fit(datain)

scalerou = MinMaxScaler(feature_range=(0,1))
scalerou.fit(dataou)

datain_calibrated = scalerin.transform(datain)
dataou_calibrated = scalerou.transform(dataou)

In [None]:
#@title 8.2.3. Clustering and some plots 
color_vals  = ['b','r','g','c','m','k']
marker_vals = ['o','*','v','P','D','<']

for i0 in range(datain.shape[1]):
  for i1 in range(i0+1,datain.shape[1],1):

    data_case = np.concatenate((datain_calibrated[:,i0:i0+1],datain_calibrated[:,i1:i1+1],dataou_calibrated[:,0:]), axis = 1)

    model     = DBSCAN(eps = 0.2, min_samples = 4)
    model.fit(data_case)

    labels    = model.labels_
    K         = labels.max()+1

    fig = plt.figure(figsize = [8,8])
    ax = fig.add_subplot(111, projection='3d')
    legend_val = []
    for i2 in range(K):
      ind = labels == i2
      ax.scatter(datain_calibrated[ind,i0], datain_calibrated[ind,i1], dataou_calibrated[ind,0], s= 40, c=color_vals[i2], marker=marker_vals[i2])
      legend_val.append('Cluster {}'.format(i2+1))
    
    # noise
    ind = labels == -1
    if np.where(labels == -1)[0].shape[0] !=0:
      ax.scatter(datain_calibrated[ind,i0], datain_calibrated[ind,i1], dataou_calibrated[ind,0], s= 60, c='y', marker='H')
      legend_val.append('Noise')


    ax.set_xlabel(keys[i0], fontsize = 15)
    ax.set_ylabel(keys[i1], fontsize = 15)
    ax.set_zlabel(keys[-1], fontsize = 15)
    ax.set_title('DBSCAN | # of Clusters'.format(K), fontsize = 15)

    ax.legend(legend_val, fontsize = 15, loc = 'best')


# 8.3. Clustering Feature Space

> <img src=	"	https://i.ibb.co/FX66Gbw/11-1.png	"	width="500"/>