In [1]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.metrics import accuracy_score, roc_auc_score, recall_score, precision_score 
from sklearn.model_selection import  StratifiedShuffleSplit

In [2]:
data = pd.read_csv('./database.csv')
data.shape

(38, 8)

In [3]:
data.label = data.label.map({
    'degenerativeOsteoarticular': 0,
    'osteoblasticImplant': 1,
    'bladder': 0,
    'sinusopathy': 0,
    'alvelodental': 0
}).astype(np.int)

data.xCentroid= (data.xCentroid - data.xCentroid.min()) / (data.xCentroid.max() - data.xCentroid.min())
data.yCentroid= (data.yCentroid - data.yCentroid.min()) / (data.yCentroid.max() - data.yCentroid.min())
data.area = (data.area - data.area.min()) / (data.area.max() - data.area.min())
data.arcLength = (data.arcLength - data.arcLength.min()) / (data.arcLength.max() - data.arcLength.min())

data.type = data.type.map({
    'ant': 1
}).astype(np.int)

data.head()

Unnamed: 0,xCentroid,yCentroid,arcLength,area,eccentricity,type,applicationHours,label
0,0.538961,0.181295,1.0,0.902599,0.93,1,3,1
1,0.493506,0.497842,0.334632,0.375628,0.87,1,3,0
2,0.649351,0.070504,0.324233,0.286744,0.78,1,3,1
3,0.149351,0.159712,0.123115,0.049137,0.86,1,3,1
4,0.675325,0.751079,0.101409,0.04062,0.88,1,3,1


In [4]:
featuresColumns = [x for x in data.columns if x not in 'label']
stf = StratifiedShuffleSplit(n_splits=1, test_size=10, random_state=42)
trainIdx, testIdx = next(stf.split(data[featuresColumns], data['label']))

xTrain = data.loc[trainIdx, featuresColumns]
yTrain = data.loc[trainIdx, 'label']

xTest = data.loc[testIdx, featuresColumns]
yTest = data.loc[testIdx, 'label']

In [5]:
sv = svm.SVC()
sv.fit(xTrain, yTrain)

SVC()

In [6]:
def measureErrors(yTrue, yGuess, label):
    return pd.Series({
        'accuracy': accuracy_score(y_true=yTrue, y_pred=yGuess),
        'precision': precision_score(y_true=yTrue, y_pred=yGuess),
        'recall': recall_score(y_true=yTrue, y_pred=yGuess),
        'roc': roc_auc_score(yTrue,yGuess)
        },name=label
    )

In [7]:
trainPrediction = sv.predict(xTrain)
testPrediction = sv.predict(xTest)

trainAndTestErrors = pd.concat([measureErrors(yTrain, trainPrediction, 'train'), measureErrors(yTest, testPrediction, 'test')], axis=1)

trainAndTestErrors

Unnamed: 0,train,test
accuracy,0.607143,0.6
precision,0.6,0.6
recall,0.642857,0.6
roc,0.607143,0.6
