In [1]:
# Imports
from scipy.io import loadmat
import pandas as pd
import numpy as np
import copy
from functools import reduce
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import cv2

In [2]:
# Subtractive Clustering Radius algorithm an utility functions
# The name of the function is wrong
def fuzzyMountain(data, nClusters, radiusA):
    data = formatData(data)

    xColumns = data.columns
    normalizedData = normalize(data)
    centroids = pd.DataFrame(columns=xColumns)

    densities = calculateDensities(data, radiusA)

    maxDensityPoint, maxDensity = getMaxDensityPointAndValue(
        normalizedData, densities)

    centroids = centroids.append(maxDensityPoint)

    radiusB = 1.5*radiusA
    for _ in range(nClusters-1):
        densities = updateDensities(
            normalizedData,
            densities,
            maxDensityPoint,
            maxDensity,
            radiusB
        )
        maxDensityPoint, maxDensity = getMaxDensityPointAndValue(
            normalizedData,
            densities
        )

        centroids = centroids.append(maxDensityPoint)

    originalCentroids = centroids.copy()

    for rowLabel, _ in originalCentroids.iterrows():
        originalCentroids.loc[rowLabel] = data.loc[rowLabel]

    return originalCentroids


def normalize(data):
    normalizedData = data.copy(deep=True)

    for col in data:
        minVal = data[col].min()
        maxVal = data[col].max()

        delta = maxVal-minVal

        normalizedData[col] = (normalizedData[col].to_numpy()-minVal)/delta

    return normalizedData


def calculateDensities(data, radius):
    denominator = (radius/2)**2
    nPoints = data.shape[0]

    densities = pd.DataFrame(
        np.zeros((nPoints, 1)),
        columns=["density"]
    )

    for row_label, row in data.iterrows():
        density = np.exp(-(euclNormMatrix(
            (data.loc[:, :] - row)**2)/denominator)).sum()

        densities.loc[row_label, "density"] = density

    return densities


def getMaxDensityPointAndValue(data, densities):
    maxDensityIndex = densities["density"].idxmax()

    maxDensity = densities.loc[maxDensityIndex, "density"]
    point = data.iloc[maxDensityIndex]

    return [point, maxDensity]


def updateDensities(data, densities, maxDensityPoint, maxDensity, radius):
    densities = densities.copy()

    denominator = (radius/2)**2

    for row_label, row in data.iterrows():
        density = np.exp(- ((euclNorm(row - maxDensityPoint)**2)/denominator))
        densities.loc[row_label, "density"] -= maxDensity*density

    return densities


def formatData(rawData):
    xDimension = rawData.shape[1]
    xColumns = [f'x{i}' for i in range(xDimension)]
    data = pd.DataFrame(rawData, columns=xColumns, copy=True)
    return data


def euclNorm(arr):
    return np.linalg.norm(arr, ord=2)


def euclNormMatrix(matrix):
    matrix = np.array(matrix)
    result = np.zeros(matrix.shape[0])

    for row in matrix.T:
        result += np.sqrt(row**2)

    return np.array(result.T)

In [3]:
# Fuzzy C-means algorithm and utility functions
def euclNorm(arr):
    return np.linalg.norm(arr, ord=2)


def removeZeros(arr):
    for index, value in enumerate(arr):
        if(value == 0):
            arr[index] = 1e-15

    return arr


def cMeans(data, nClusters, tolerance=1e-15, maxIterations=2, radiusA=0.1, mParam=2):
    xDimension = data.shape[1]
    nRows = data.shape[0]
    xColumns = [f"x{i}" for i in range(xDimension)]
    clusterColumns = [f"k{i}" for i in range(nClusters)]

    dfData = pd.DataFrame(data, columns=xColumns)

    mountainCentroids = fuzzyMountain(data, nClusters, radiusA)
    dfCentroids = pd.DataFrame(
        mountainCentroids.to_numpy(), columns=xColumns, index=clusterColumns,
    )

    for clusterCol in clusterColumns:
        dfData[clusterCol] = 0

    dfData.loc[:, clusterColumns] = np.random.dirichlet(
        np.ones(nClusters), size=(nRows)
    )

    exponent = 2 / (mParam - 1)
    for iterations in range(maxIterations):
        for clusterCol in clusterColumns:
            X = dfData.loc[:, xColumns]
            denominator = np.zeros((nRows, 1))

            for _, centroid in dfCentroids.iterrows():
                tempNum = np.array(
                    list(
                        map(
                            euclNorm,
                            np.array(
                                (X - dfCentroids.loc[clusterCol, xColumns])),
                        )
                    )
                ).reshape((nRows, 1))

                tempDen = (
                    np.array(list(map(euclNorm, np.array((X - centroid)))))
                ).reshape((nRows, 1))

                tempDen = removeZeros(tempDen)

                denominator += (tempNum / tempDen) ** exponent

                denominator = removeZeros(denominator)

            dfData.loc[:, clusterCol] = np.ones((nRows, 1)) / denominator

        previousCentroids = copy.deepcopy(dfCentroids)

        for clusterCol in clusterColumns:
            dfCentroids.loc[clusterCol, :] = [
                [
                    np.dot((dfData[clusterCol] ** mParam), dfData[xCol])
                    for xCol in xColumns
                ]
            ] / (dfData[clusterCol] ** mParam).sum()

        delta = np.array(
            list(
                map(
                    euclNorm,
                    np.array(
                        (
                            dfCentroids.loc[:, xColumns]
                            - previousCentroids.loc[:, xColumns]
                        )
                    ),
                )
            )
        ).mean()

        if delta < tolerance:
            break

    return [dfData, dfCentroids, iterations]


In [4]:
# Image utility functions
def readImage(src):
    return cv2.imread(src)

def rescaleImage(image, scaleRatio):
    width = int(image.shape[1] * scaleRatio)
    height = int(image.shape[0] * scaleRatio)

    rescaledImage = cv2.resize(image, (width, height))

    return rescaledImage 

def writeImage(dest, image):
    cv2.imwrite(dest, image)

In [5]:
# Part 1 utility functions
def plotDataAndCentroidsPart1(data, centroids, keys):
    fig = make_subplots(x_title="x", y_title="y")

    fig.add_trace(
        go.Scatter(x=data[keys[0]], y=data[keys[1]], mode="markers", name="Input points")
    )
    
    fig.add_trace(
        go.Scatter(
            x=centroids["x0"],
            y=centroids["x1"],
            mode="markers",
            name="Centroids",
            marker=dict(color="red", size=10),
        )
    )
    fig.show()

In [6]:
# Part 1 main script
# load data
rawData = loadmat("./input/fcm_dataset.mat")
rawData = np.array(rawData["x"])

# %% run C-Means
data, centroids, iterations = cMeans(rawData, 4, 1e-12, 1000, 0.1, 2)
plotDataAndCentroidsPart1(data, centroids, ["x0", "x1"])

In [7]:
# Part 2 (generate result) utility functions
def runCMeansForPhoto(photo, scaleRatio, nClusters):
    try:
        tempImage = readImage(f"./input/{photo}.jpg")
        image = rescaleImage(tempImage, scaleRatio)
        writeImage(f"./output/{photo}.jpg", image)

        flatImage = np.array([item for sublist in image for item in sublist])
        data, centroids, _ = cMeans(flatImage, nClusters, 1e-15, 50, 0.4, 2)

        data.to_csv(f"output/data_{photo}.csv", compression=None)
        centroids.to_csv(f"output/centroids_{photo}.csv", compression=None)

    except:
        pass


In [8]:
# Part 2 (generate result) main script
nClustersPerPhoto = {
    "photo001": 13,
    "photo002": 6,
    "photo003": 10,
    "photo004": 10,
    "photo005": 12,
    "photo006": 5,
    "photo007": 7,
    "photo008": 9,
    "photo009": 12,
    "photo010": 11,
    "photo011": 8,
}
scaleRatio = 0.3

for photo, nClusters in nClustersPerPhoto.items():
    runCMeansForPhoto(photo, scaleRatio, nClusters)


In [9]:
# TP2 part2 (proccess result) utility functions
def plotDataAndCentroids3d(data, centroids, keys):
    fig = make_subplots(x_title="x", y_title="y")

    pointColors = []
    for _, row in data.iterrows():
        pointColors.append(
            f'rgb({row[keys[2]]}, {row[keys[1]]}, {row[keys[0]]})')

    fig.add_trace(
        go.Scatter3d(
            x=data[keys[0]],
            y=data[keys[1]],
            z=data[keys[2]],
            mode="markers",
            name="Input points",
            opacity=0.5,
            marker=dict(color=pointColors, size=3),
        )
    )

    centroidColors = []
    for _, row in centroids.iterrows():
        centroidColors.append(
            f'rgb({row[keys[2]]}, {row[keys[1]]}, {row[keys[0]]})')

    fig.add_trace(
        go.Scatter3d(
            x=centroids[keys[0]],
            y=centroids[keys[1]],
            z=centroids[keys[2]],
            mode="markers",
            name="Centroids",
            marker=dict(color=centroidColors, size=15),
        )
    )
    fig.show()


def generateSegmentedImage(data, centroids):
    xColumns = list(centroids.columns)
    clusterColumns = list(centroids.index)

    for rowLabel, row in data.iterrows():
        cluster = row[clusterColumns].idxmax()
        data.loc[rowLabel, xColumns] = centroids.loc[cluster]

    return data


def runAnalysisForPhoto(photo):
    originalImage = readImage(f"./output/{photo}.jpg")

    data = pd.read_csv(f"./output/data_{photo}.csv", index_col=0)
    centroids = pd.read_csv(f"./output/centroids_{photo}.csv", index_col=0)

    xColumns = list(centroids.columns)

    # Uncomment if you want the pixel and centroids plotted in 3d
    plotDataAndCentroids3d(data, centroids, ["x0", "x1", "x2"])

    processedData = generateSegmentedImage(data, centroids)
    newImage = np.array(processedData.loc[:, xColumns]).reshape(
        originalImage.shape)
    writeImage(f"./output/segmented_{photo}.jpg", newImage)

In [10]:
# TP2 part2 (proccess result) main script
for photo in nClustersPerPhoto.keys():
    try:
        runAnalysisForPhoto(photo)
    except:
        pass