# Clasificare cardiograme - Nichita Utiu 223

In [6]:
%matplotlib inline
# import needed libraries
# pandas
import pandas as pd

# numpy, matplotlib, seaborn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# this styling is purely my preference
# less chartjunk
sns.set_context('notebook', font_scale=1.5, rc={'line.linewidth': 2.5})
sns.set(style='ticks', palette='Set2', font='Helvetica')

In [7]:
cardiogram_df = pd.read_csv('./data.csv')  # load data
print(cardiogram_df.isnull().any())        # assess the quality
cardiogram_df.describe()                   # short summary

b           False
e           False
AC          False
FM          False
UC          False
DL          False
DS          False
DP          False
DR          False
LB          False
AC.1        False
FM.1        False
UC.1        False
DL.1        False
DS.1        False
DP.1        False
ASTV        False
MSTV        False
ALTV        False
MLTV        False
Width       False
Min         False
Max         False
Nmax        False
Nzeros      False
Mode        False
Mean        False
Median      False
Variance    False
Tendency    False
A           False
B           False
C           False
D           False
E           False
AD          False
DE          False
LD          False
FS          False
SUSP        False
CLASS       False
NSP         False
dtype: bool


Unnamed: 0,b,e,AC,FM,UC,DL,DS,DP,DR,LB,...,C,D,E,AD,DE,LD,FS,SUSP,CLASS,NSP
count,2126.0,2126.0,2126.0,2126.0,2126.0,2126.0,2126.0,2126.0,2126.0,2126.0,...,2126.0,2126.0,2126.0,2126.0,2126.0,2126.0,2126.0,2126.0,2126.0,2126.0
mean,878.439793,1702.877234,2.722484,7.241298,3.659925,1.570085,0.003293,0.126058,0.0,133.303857,...,-0.950141,-0.923801,-0.932267,-0.687676,-0.762935,-0.899341,-0.935089,-0.814675,4.509878,1.304327
std,894.084748,930.919143,3.56085,37.125309,2.847094,2.499229,0.0573,0.464361,0.0,9.840844,...,0.311894,0.382964,0.361856,0.726188,0.646627,0.43735,0.354495,0.580054,3.026883,0.614377
min,0.0,287.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,106.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0
25%,55.0,1009.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,126.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2.0,1.0
50%,538.0,1241.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,133.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,4.0,1.0
75%,1521.0,2434.75,4.0,2.0,5.0,3.0,0.0,0.0,0.0,140.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,7.0,1.0
max,3296.0,3599.0,26.0,564.0,23.0,16.0,1.0,4.0,0.0,160.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,3.0


In [8]:
def sigm(x):
    return 1. / (1. + np.exp(-x))
                 
def sigm_grad(x):
    return sigm(x)/(1 - sigm(x))

def cost_func(X, Y, thetas, activation_func, gradient_func):
    # feed-forward
    Z = [X]  # values before activation function
    for i, theta in enumerate(thetas):
        # add bias
        values = np.concatenate((np.ones((result.shape[0], 1)), Z[-1]), axis=1)
        Z.append(activation_func(values.dot(theta.T)))  # last layer has sigmoid activation
        
    # cross entropy cost function
    cost = (result - Y).ravel().sum() / X.shape[0]
    
    deltas = [d_sigmoid(Z[-1]) * (Y - Z[-1])]
    # backprop    
    for values, theta in zip(Z[-2::-1], thetas[::-1])
        deltas.append(gradient_func(values) * numpy.dot(deltas[-1], theta)
    
    grad = [theta + self.alpha * numpy.outer(layer, delta)
                for theta, layer, delta in zip(self.weights, layers, deltas)]

    return (cost, grad)


def predict(X, thetas, activation_func):
    result = X  # feeed forward result for each 
    for i, theta in enumerate(thetas):
        values = np.concatenate((np.ones((result.shape[0], 1)), result), axis=1)
        result = activation_func(values.dot(theta.T))  # last layer has sigmoid activation

    return result


def neural_net(X, Y, num_classes, hidden_layers_sizes=[10, 10], epochs=100, alpha=0.001):
    """Receives the input, expected output and sizes of the hidden layers"""
    num_features = X.shape[1]  # size of a row
    
    # build hte weights
    thetas = []
    # the number of neurons on each layer, input and output included
    num_nodes = [num_features] + hidden_layers_sizes + [num_classes]
    for ind_layer in range(1, len(num_nodes)):
        # heuristic intialization
        # add a column for bias. also randomize to break symmetry
        theta = np.random.rand(num_nodes[ind_layer], 1 + num_nodes[ind_layer-1]) - 1
        thetas.append(theta)
    
  
    # build the expected values
    expected_output = np.zeros((Y.shape[0], num_classes))
    expected_output[np.arange(Y.shape[0]), Y.astype('int').ravel() - 1] = 1
    
    # gradient descent
    for i in range(epochs):
        (cost, grads) = cost_func(X, Y, thetas, lambda x: x, lambda x: np.ones(x.shape))
        for theta, grad in zip(thetas, grads):
            theta[:, 1:] -= alpha * grad
    
    return thetas

In [10]:
a= cardiogram_df.values[:, :-1] 
thetas = neural_net(a, cardiogram_df.values[:, -1:], 3, epochs=10, alpha=0.0001)
predict(a, thetas, lambda x: x)

array([[ nan,  nan,  nan],
       [ nan,  nan,  nan],
       [ nan,  nan,  nan],
       ..., 
       [ nan,  nan,  nan],
       [ nan,  nan,  nan],
       [ nan,  nan,  nan]])

In [94]:
list(enumerate(range(3, 10)))

[(0, 3), (1, 4), (2, 5), (3, 6), (4, 7), (5, 8), (6, 9)]

In [None]:
class NeuralNet(object):
    def __init__(self, hidden_layers_size=[20, 10], alpha=0.01, activation_func=sigm, deriv_func=sigm_deriv):
        self.alpha = 0.01
        self.hidden_layers_size = hidden_layers_size
        self.activation_func = sigm
        self.deriv_func = deriv_func
       
    def init_weights(self, input_size, output_size):
        layer_sizes = [input_size] + self.hidden_layers_size + [output_size]
        self.thetas = [np.random.rand(num_out, 1 + num_in) - 1
                       for num_out, num_in in zip(layer_sizes[1:], layer_sizes[:-1])]
        
    def train(self, )