In [9]:
import numpy as np
from numpy.linalg.linalg import pinv
 
## scaling the dataset such that every column value lie
## in the range 0 to 1.

## file has 4-dimensional data, and last column
## is the label.


class LoadAndScaleData:
    def __init__(self,filePath):
        self.data = np.loadtxt(filePath,delimiter=',',usecols=(0,1,2,3))
        self.labels = np.loadtxt(filePath,delimiter=',',dtype='S',usecols=(4))
        x, = self.labels.shape
        self.rLabels = np.zeros(shape = (0,3))
        for i in range(0,x):
            ## representing each label as [1,0,0],
            ## [0,1,0],[0,0,1]
            if self.labels[i] == 'Iris-setosa':
                self.rLabels = np.vstack([self.rLabels,[1,0,0]])
            if self.labels[i] == 'Iris-versicolor':
                self.rLabels = np.vstack([self.rLabels,[0,1,0]])
            if self.labels[i] == 'Iris-virginica':
                self.rLabels = np.vstack([self.rLabels,[0,0,1]])
    
    ## scaled data is:-
    ## (minimum - data) / (maximum - minimum)
    def scale(self):
        mat = np.asmatrix(self.data)
        height,width = mat.shape
        for i in range(0,width):
            minimum = np.min(mat[:,i])
            maximum = np.max(mat[:,i])
            for k in range(0,height):
                mat[k,i] = (mat[k,i] - minimum)/(maximum - minimum)
        return mat, self.rLabels
 
 
     
## creating the RBF network that represents the 
## entire network. 
class RBFNetwork:
    def __init__(self, pTypes,scaledData,labels):
        self.pTypes = pTypes
        self.protos = np.zeros(shape=(0,4))
        self.scaledData = scaledData
        self.spread = 0
        self.labels = labels
        self.weights = 0
        

    def generatePrototypes(self):
        
        ## select prototypes randomly and 
        ## equally from each class.
        
        ## create array of nos. b/w 0 to 49,
        ## d.u. value specified for the no. of
        ## prototypes.
        group1 = np.random.randint(0,49,size=self.pTypes)
        group2 = np.random.randint(50,100,size=self.pTypes)
        group3 = np.random.randint(101,150,size=self.pTypes)
        
        ## then add it array self.protos.
        ## it's an array of mx4 elements.
        ## m = no. of prototypes.
        ## vstack method of numpy appends row to
        ## a numpy array.
        
        ## scaledData has 150 rows.
        ## lets say that group1 = [2,7,12,23], 
        ## this particular rows would be selected
        ## from the array scaledData.
        self.protos = np.vstack([self.protos,self.scaledData[group1,:],self.scaledData[group2,:],self.scaledData[group3,:]])
        return self.protos
 
    ## Gaussian function. where t is the center, 
    ## in our case the prototypes  we selected 
    ## before , x represents the input vector. 
    ## || x – t || ^2 represents the squared 
    ## euclidean distance. m1/d^2 represents the
    ## inverse sigma squared.
    def sigma(self):
        dTemp = 0
        for i in range(0,self.pTypes*3):
            ## iterate through the prototypes
              for k in range(0,self.pTypes*3):
                ## finding the distance b/w them.
                dist = np.square(np.linalg.norm(self.protos[i] - self.protos[k]))
                if dist > dTemp:
                    dTemp = dist
        self.spread = dTemp/np.sqrt(self.pTypes*3)
 
    def train(self):
        self.generatePrototypes()
        self.sigma()
        hiddenOut = np.zeros(shape=(0,self.pTypes*3))
        
        ## take one i/p vector at a time.
        ## pass it through activation functions
        ## in the hidden layer.
        for item in self.scaledData:
            out=[]
            for proto in self.protos:
                distance = np.square(np.linalg.norm(item - proto))
                neuronOut = np.exp(-(distance)/(np.square(self.spread)))
                out.append(neuronOut)
            hiddenOut = np.vstack([hiddenOut,np.array(out)])
        print hiddenOut
        
        ## calculate hidden layer weight.
        self.weights = np.dot(pinv(hiddenOut),self.labels)
        print self.weights
 
    def test(self):
        items = [3,4,72,82,91,120,134,98,67,145,131]
        for item in items:
            data = self.scaledData[item]
            out = []
            for proto in self.protos:
                distance = np.square(np.linalg.norm(data-proto))
                neuronOut = np.exp(-(distance)/np.square(self.spread))
                out.append(neuronOut)
             
            netOut = np.dot(np.array(out),self.weights)
            print '---------------------------------'
            print netOut
            print 'Class is ',netOut.argmax(axis=0) + 1
            print 'Given Class ',self.labels[item].argmax(axis=0) +1
         
                 
        
 
    data = LoadAndScaleData('C:\\Python27\\Scripts\\Python_for_DS\\Datasets\\iris.csv');
    scaledData, label = data.scale()
    network = RBFNetwork(4,scaledData,label)
    network.train()
    network.test()

ValueError: could not convert string to float: SepalLength

In [2]:
pwd

u'C:\\Python27\\Scripts\\Python_for_DS\\RBF\\RBFNN'