In [1]:
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
import pandas as pd
import random as rd
from cvxpy import *

style.use('ggplot')


class SVM():
    def fit(self, X, y, C=5):

        # train to get w and b for hyperplane
        # solving optimizational problem for min ||w|| + C Sum(e_i)
        # subject to y_i (w^t x_i + b) >= 1 - e_i and e_i >= 0 for i = 1, ... , len data[0]

        n = len(X)
        m = len(X[0])

        w = Variable(m)
        b = Variable(1)
        e = Variable(n)


        obj = Minimize(norm(w) + C * sum_entries(e))

        constraints = [e >= 0]

        for i in range(n):
            constraints.append(1 - e[i]- y[i] * (w.T * X[i] + b) <= 0)

        prob = Problem(obj, constraints)
        prob.solve()
        
        self.w = np.squeeze(np.asarray(w.T.value))
        self.b = b.value
    
    
    def predict(self, X):
        return np.sign(np.dot(X, self.w) + self.b)
        

    def prepare_data(self, data, k=10):
        '''
        :param data:
        :param k:
        :return:
        '''
        data = rd.shuffle(data)

        split_data = partition(data, k)
        self.data_for_cross_validtion = []
        for i in range(k):
            test_data = []
            train_data = []
            for j in range(k):
                if j == i:
                    test_data = split_data[i]
                else:
                    train_data.append(split_data[j])
            train_data = [x for y in train_data for x in y]
            self.data_for_cross_validation.append((train_data, test_data))

    def normalize(self, data):
        '''
        :param data: data as [train_data, test_data], both array
        :return: Normalized data, so that all data is on interval [-1, 1]
        '''
        train_data = data[0]
        test_data = data[1]
        x = [0] * len(train_data[0])
        for instance in train_data:
            for pos, feature in enumerate(instance):
                if abs(feature) >= abs(x[pos]):
                    x[pos] = abs(feature)
        for instance in train_data:
            for pos, feature in enumerate(instance):
                feature = feature / x[pos]
        for instance in test_data:
            for pos, feature in enumerate(instance):
                feature = feature / x[pos]
        return (train_data, test_data)

    def set_class(self, data):
        '''
        :param data:
        :return: Data which predict class is either -1 or 1
        '''
        predictions = set()
        for example in data:
            for instance in example:
                predictions.add(instance[-1])
        mapping = {}
        inverse_mapping = {}
        predictions = list(predictions)
        mapping[predictions[0]] = 1
        inverse_mapping[1] = predictions[0]
        mapping[predictions[1]] = -1
        inverse_mapping[0] = predictions[1]
        for example in data:
            for instance in example:
                instance[-1] = mapping[instance[-1]]
        return data




def partition(lst, n):
    '''
    :param lst: array of elemnts
    :param n: number of partitions
    :return: array partitioned into n arrays
    '''
    division = len(lst) / n
    return [lst[round(division * i):round(division * (i + 1))] for i in range(n)]




df = pd.read_csv('/Users/lenarttreven/PycharmProjects/MachineLearning/breast-cancer-wisconsin.data.txt')
df.replace('?', -99, inplace=True)
df.drop(['id'], 1, inplace=True)

X = np.array(df.drop(['class'], 1))
y = np.array(df['class'])
for i in range(len(y)):
    if y[i] == 2:
        y[i] = 1
    elif y[i] == 4:
        y[i] = -1



clf = SVM()

clf.fit(X, y)


df[:20]

Unnamed: 0,clump_thickness,unif_cell_size,unif_cell_shape,marg_adhesion,single_epith_cell_size,bare_nuclei,bland_chrom,norm_nucleoli,mitoses,class
0,5,1,1,1,2,1,3,1,1,2
1,5,4,4,5,7,10,3,2,1,2
2,3,1,1,1,2,2,3,1,1,2
3,6,8,8,1,3,4,3,7,1,2
4,4,1,1,3,2,1,3,1,1,2
5,8,10,10,8,7,10,9,7,1,4
6,1,1,1,1,2,10,3,1,1,2
7,2,1,2,1,2,1,3,1,1,2
8,2,1,1,1,2,1,1,1,5,2
9,4,2,1,1,2,1,2,1,1,2


In [26]:
clf.predict([5, 1, 1, 1, 2, 1, 3, 1, 1])

1.0