In [1]:
import pandas as pd
import numpy as np
import random
import operator
import math   

In [2]:
## Fuzzy c means clustering algorithm
def compute_fuzzy_c_means(df,m,MAX_ITER,k):       

    def initializeMembershipMatrix():
        membership_mat = list()
        for i in range(n):
            random_num_list = [random.random() for i in range(k)]
            summation = sum(random_num_list)
            temp_list = [x/summation for x in random_num_list]
            membership_mat.append(temp_list)
        return membership_mat


    def calculateClusterCenter(membership_mat):
        cluster_mem_val = zip(*membership_mat)
        cluster_centers = list()
        for j in range(k):
            x = list(cluster_mem_val[j])
            xraised = [e ** m for e in x]
            denominator = sum(xraised)
            temp_num = list()
            for i in range(n):
                data_point = list(df.iloc[i])
                prod = [xraised[i] * val for val in data_point]
                temp_num.append(prod)
            numerator = map(sum, zip(*temp_num))
            center = [z/denominator for z in numerator]
            cluster_centers.append(center)
        return cluster_centers


    def updateMembershipValue(membership_mat, cluster_centers):
        p = float(2/(m-1))
        for i in range(n):
            x = list(df.iloc[i])
            distances = [np.linalg.norm(map(operator.sub, x, cluster_centers[j])) for j in range(k)]
            for j in range(k):
                den = sum([math.pow(float(distances[j]/distances[c]), p) for c in range(k)])
                membership_mat[i][j] = float(1/den)       
        return membership_mat


    def getClusters(membership_mat):
        cluster_labels = list()
        for i in range(n):
            max_val, idx = max((val, idx) for (idx, val) in enumerate(membership_mat[i]))
            cluster_labels.append(idx)
        return cluster_labels


    def fuzzyCMeansClustering():
        # Membership Matrix
        membership_mat = initializeMembershipMatrix()
        curr = 0
        while curr <= MAX_ITER:
            cluster_centers = calculateClusterCenter(membership_mat)
            membership_mat = updateMembershipValue(membership_mat, cluster_centers)
            cluster_labels = getClusters(membership_mat)
            curr += 1
        return cluster_labels, cluster_centers, membership_mat
    
    # Number of Attributes
    num_attr = len(df.columns)

    # Number of data points
    n = len(df)
    
    labels, centers, membership_mat = fuzzyCMeansClustering()
    return centers, labels, membership_mat

In [3]:
## compute output parameters

# df = input appended with output where the last column is 
## construct the A matrix and b matrix and solve for x: Eq: Ax = b
def compute_out_params(df):
    A = np.asarray(df) 
    b = A[:,len(A[0])-1]
    A = A[:,0:len(A[0])-1]

    b = np.reshape(b,(len(A),1))

    c = np.ones(len(A))
    c = np.reshape(c,(len(A),1))

    ## A matrix has input and an additional column of 1's. 
    A = np.append(A,c,axis=1)

    ## solve the linear least square problem
    x = np.linalg.lstsq(A,b)

    return np.transpose(x[0])    

In [4]:
## compute input parameters, i.e., mean and sigma for Gaussian membership function
## source: invgaussmf4sigma() function in fuzzy toolbox in matlab

def compute_in_params(cluster_centers, membership_mat, df):
    
    numCluster = np.shape(cluster_centers)[0]
    numVars  = np.shape(cluster_centers)[1]
    numVars = numVars-1 ## consider only input variables
    cluster_centers = np.asarray(cluster_centers)[:,0:numVars]
    numPts = len(df)
    invals = np.asarray(df) 
    invals = invals[:,0:numVars]
    #print np.shape(invals), np.shape(membership_mat), np.shape(cluster_centers)
    
    inpMemParams = [] 
    
    for i in range(numVars):
        val = 0
        for j in range(numCluster):
            val=0
            for k in range(numPts):
                temp = ((cluster_centers[j][i]-invals[k][i])*(cluster_centers[j][i]-invals[k][i])) / (2*math.log(membership_mat[k][j]))
                val = val + math.sqrt(-temp)
             
            val = (val)/numPts 
            inpMemParams.append([val, cluster_centers[j][i]])
    return np.asarray(inpMemParams)

In [5]:
## Load data and perform fuzzy rule based training

# Number of Clusters / number of rules
k = 3
# Maximum number of iterations for fuzzy c-means
MAX_ITER = 250
# Fuzzy parameter
m = 2.00

## input data in the form of csv file has output appended as the last column
##load data
df_full = pd.read_csv("in.csv")
columns = list(df_full.columns)
features = columns
df = df_full[features]    
    
centers, labels, membership_mat = compute_fuzzy_c_means(df,m,MAX_ITER,k) 

inParams = compute_in_params(centers, membership_mat, df)   
print ('Estimated input Gaussian parameters are: ')
print (inParams)

outParams = compute_out_params(df)
print ('Estimated output parameters are: ')
print (outParams)

np.savetxt('inputParams.txt',inParams,delimiter=',',fmt='%f')
np.savetxt('outputParams.txt',outParams,delimiter=',',fmt='%f')

TypeError: 'zip' object is not subscriptable