In [1]:
import math
import random
import matplotlib.pyplot as plt
from setuptools.command.test import test

In [2]:
def rand(a, b):
    return (b-a)*random.random() + a

In [3]:
def makeMatrix(I, J, fill=0.0):
    m = []
    for i in range(I):
        m.append([fill]*J)
    return m

In [4]:
def sigmoid(x):
    #return math.tanh(x)
    return 1 / (1 + math.exp(-x))

In [5]:
def dsigmoid(y):
    #return 1.0 - y ** 2
    return (1-y)*y

In [6]:
def plot(inputs, outputs, actual):
    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    ax1.plot(inputs, actual, 'b-')
    ax1.plot(inputs, outputs, 'r.')
    plt.draw()

In [7]:
class NN:
    def __init__(self, ni, nh, no, regression = False):

        self.regression = regression

        #Number of input, hidden and output nodes.
        self.ni = ni  + 1 # +1 for bias node
        self.nh = nh  + 1 # +1 for bias node
        self.no = no

        # activations for nodes
        self.ai = [1.0]*self.ni
        self.ah = [1.0]*self.nh
        self.ao = [1.0]*self.no

        # create weights
        self.wi = makeMatrix(self.ni, self.nh)
        self.wo = makeMatrix(self.nh, self.no)

        # set them to random vaules
        for i in range(self.ni):
            for j in range(self.nh):
                self.wi[i][j] = rand(-1, 1)
        for j in range(self.nh):
            for k in range(self.no):
                self.wo[j][k] = rand(-1, 1)

        # last change in weights for momentum
        self.ci = makeMatrix(self.ni, self.nh)
        self.co = makeMatrix(self.nh, self.no)


    def update(self, inputs):

        if len(inputs) != self.ni-1:
            print(len(inputs), " -- ",self.ni-1)
            raise(ValueError,  'wrong number of inputs')

        # input activations
        for i in range(self.ni - 1):
            self.ai[i] = inputs[i]

        # hidden activations
        for j in range(self.nh - 1):
            total = 0.0
            for i in range(self.ni):
                total += self.ai[i] * self.wi[i][j]
            self.ah[j] = sigmoid(total)

        # output activations
        for k in range(self.no):
            total = 0.0
            for j in range(self.nh):
                total += self.ah[j] * self.wo[j][k]
            self.ao[k] = total
            if not self.regression:
                self.ao[k] = sigmoid(total)


        return self.ao[:]


    def backPropagate(self, targets, N, M):
        if len(targets) != self.no:
            raise(ValueError, 'wrong number of target values')
        # calculate error terms for output


        output_deltas = [0.0] * self.no


        for k in range(self.no):

            output_deltas[k] = targets[k] - self.ao[k]
            if not self.regression:
                output_deltas[k] = dsigmoid(self.ao[k]) * output_deltas[k]


        # calculate error terms for hidden
        hidden_deltas = [0.0] * self.nh
        for j in range(self.nh):
            error = 0.0
            for k in range(self.no):
                error += output_deltas[k]*self.wo[j][k]
            hidden_deltas[j] = dsigmoid(self.ah[j]) * error

        # update output weights
        for j in range(self.nh):
            for k in range(self.no):
                change = output_deltas[k]*self.ah[j]
                self.wo[j][k] = self.wo[j][k] + N*change + M*self.co[j][k]
                self.co[j][k] = change

        # update input weights
        for i in range(self.ni):
            for j in range(self.nh):
                change = hidden_deltas[j]*self.ai[i]
                self.wi[i][j] = self.wi[i][j] + N*change + M*self.ci[i][j]
                self.ci[i][j] = change
        # calculate error
        error = 0.0
        for k in range(len(targets)):
            error += 0.5*((targets[k]-self.ao[k])**2)
        return error


    def test(self, patterns, verbose = False):
        tmp = []
        for p in patterns:
            if verbose:
                print(p[0], '->', self.update(p[0]))
            tmp.append(self.update(p[0]))


        return tmp


    def weights(self):
        print('Input weights:')
        for i in range(self.ni):
            print(self.wi[i])
        print('Output weights:')
        for j in range(self.nh):
            print(self.wo[j])

    def train(self, train_x, train_y, iterations=1000, N=0.5, M=0.1, verbose = False):
        """Train the neural network.

        N is the learning rate.
        M is the momentum factor.
        """
        for i in range(iterations):
            error = 0.0
            for x,y in zip(train_x,train_y):
                self.update(x)
                tmp = self.backPropagate(y, N, M)
                error += tmp
            if i % 100 == 0:
                print (i,'error %-14f' % error)

In [8]:
def demoClassification(train_x,train_y,test_list):
    # Teach network XOR function
    pat = [
        [[0,0],[0]],
        [[0,1], [1]],
        [[1,0], [1]],
        [[1,1], [0]]
    ]

    # create a network with two input, two hidden, and one output nodes
    n = NN(22,19 ,1, regression = False)

    # train it with some patterns then test it.
    n.train(train_x,train_y, 40000, 0.001,0.001)
    n.test(test_list, verbose = True)

In [9]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [20]:
x = pd.read_csv('heart2.csv', sep=",")

x.sex = x.sex.astype('category')
x.cp = x.cp.astype('category')
x.fbs = x.fbs.astype('category')
x.restecg = x.restecg.astype('category')
x.exang = x.exang.astype('category')
x.ca = x.ca.astype('category')
x.slope = x.slope.astype('category')
x.thal = x.thal.astype('category')

In [21]:
x

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
5,57,1,0,140,192,0,1,148,0,0.4,1,0,1,1
6,56,0,1,140,294,0,0,153,0,1.3,1,0,2,1
7,44,1,1,120,263,0,1,173,0,0.0,2,0,3,1
8,52,1,2,172,199,1,1,162,0,0.5,2,0,3,1
9,57,1,2,150,168,0,1,174,0,1.6,2,0,2,1


In [22]:
y = x['target']
del x['target']

In [23]:
y = pd.DataFrame(y)
x = pd.get_dummies(x, drop_first=True)

In [24]:
data_scaled = StandardScaler().fit_transform(x)
data_scaled = pd.DataFrame(data=data_scaled, columns=x.columns)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [25]:
x_train, x_test, y_train, y_test = train_test_split(data_scaled, y, test_size=0.20, random_state=42)

In [26]:
train_x = x_train.values.tolist()
train_y = y_train.values.tolist()

In [27]:
test_x = x_test.values.tolist()
test_y = y_test.values.tolist()
test_list = zip(test_x, test_y)

In [None]:
with open('train.csv', 'w') as f:
    for item in train_list:
        f.write("%s\n" % item[1])

In [28]:
demoClassification(train_x,train_y, test_list)

0 error 38.237774     
100 error 18.909441     
200 error 15.972096     
300 error 14.205115     
400 error 12.858213     
500 error 11.805089     
600 error 10.991467     
700 error 10.338636     
800 error 9.797049      
900 error 9.337540      
1000 error 8.941018      
1100 error 8.593511      
1200 error 8.283778      
1300 error 8.002403      
1400 error 7.741581      
1500 error 7.495077      
1600 error 7.258258      
1700 error 7.028291      
1800 error 6.804307      
1900 error 6.586988      
2000 error 6.377521      
2100 error 6.176735      
2200 error 5.984847      
2300 error 5.801590      
2400 error 5.626435      
2500 error 5.458774      
2600 error 5.298059      
2700 error 5.143873      
2800 error 4.995948      
2900 error 4.854152      
3000 error 4.718448      
3100 error 4.588858      
3200 error 4.465427      
3300 error 4.348190      
3400 error 4.237146      
3500 error 4.132238      
3600 error 4.033340      
3700 error 3.940258      
3800 error 3.852743     

KeyboardInterrupt: 

In [None]:
pca = PCA(n_components=2)
pca.fit(x)
x_pca = pca.transform(x)
print("variance ratio:", pca.explained_variance_ratio_)

print("sum:", sum(pca.explained_variance_ratio_))
x["p1"] = x_pca[:, 0]
x["p2"] = x_pca[:, 1]

color = ["red", "green"]

for each in range(2):
    plt.scatter(x.p1[y.target == each], x.p2[y.target == each], color=color[each], label=y.target, alpha=0.5)

plt.xlabel("p1")
plt.ylabel("p2")
plt.show()