In [1]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.metrics import accuracy_score, roc_auc_score, recall_score, precision_score 
from sklearn.model_selection import  StratifiedShuffleSplit

In [2]:
data = pd.read_csv('./database.csv')
data.shape

(39, 12)

In [3]:
data.label = data.label.map({
    'degenerativeOsteoarticular': 0,
    'osteoblasticImplant': 1,
    'bladder': 0,
    'sinusopathy': 0,
    'alvelodental': 0
}).astype(np.int)

data.xCentroid= (data.xCentroid - data.xCentroid.min()) / (data.xCentroid.max() - data.xCentroid.min())
data.yCentroid= (data.yCentroid - data.yCentroid.min()) / (data.yCentroid.max() - data.yCentroid.min())
data.orientation = (data.orientation  - data.orientation.min()) / (data.orientation.max() - data.orientation.min())
data.equiDiameter= (data.equiDiameter - data.equiDiameter.min()) / (data.equiDiameter.max() - data.equiDiameter.min())
data.meanVal= (data.meanVal - data.meanVal.min()) / (data.meanVal.max() - data.meanVal.min())
data.solidity= (data.solidity - data.solidity.min()) / (data.solidity.max() - data.solidity.min())
data.extent= (data.extent - data.extent.min()) / (data.extent.max() - data.extent.min())
data.aspectRatio= (data.aspectRatio - data.aspectRatio.min()) / (data.aspectRatio.max() - data.aspectRatio.min())
data.area = (data.area - data.area.min()) / (data.area.max() - data.area.min())
data.arcLength = (data.arcLength - data.arcLength.min()) / (data.arcLength.max() - data.arcLength.min())

data.head()

Unnamed: 0,xCentroid,yCentroid,aspectRatio,area,extent,arcLength,solidity,meanVal,equiDiameter,eccentricity,orientation,label
0,0.538961,0.181295,0.086318,0.902599,0.490426,1.0,0.348669,0.39896,0.947723,0.93,0.0,1
1,0.493506,0.497842,0.985124,0.375628,0.760615,0.334632,0.693641,0.875623,0.595092,0.87,0.531667,0
2,0.649351,0.070504,0.501226,0.286744,0.796954,0.324233,0.624906,0.677443,0.514259,0.78,0.328502,1
3,0.149351,0.159712,0.581621,0.049137,0.451644,0.123115,0.473642,0.425056,0.188409,0.86,0.327245,1
4,0.675325,0.751079,0.91886,0.04062,0.636463,0.101409,0.535957,0.250799,0.167819,0.88,0.476822,1


In [4]:
featuresColumns = [x for x in data.columns if x not in 'label']
stf = StratifiedShuffleSplit(n_splits=1, test_size=10, random_state=42)
trainIdx, testIdx = next(stf.split(data[featuresColumns], data['label']))

xTrain = data.loc[trainIdx, featuresColumns]
yTrain = data.loc[trainIdx, 'label']

xTest = data.loc[testIdx, featuresColumns]
yTest = data.loc[testIdx, 'label']

In [5]:
sv = svm.SVC()
sv.fit(xTrain, yTrain)

SVC()

In [6]:
def measureErrors(yTrue, yGuess, label):
    return pd.Series({
        'accuracy': accuracy_score(y_true=yTrue, y_pred=yGuess),
        'precision': precision_score(y_true=yTrue, y_pred=yGuess),
        'recall': recall_score(y_true=yTrue, y_pred=yGuess),
        'roc': roc_auc_score(yTrue,yGuess)
        },name=label
    )

In [7]:
trainPrediction = sv.predict(xTrain)
testPrediction = sv.predict(xTest)

trainAndTestErrors = pd.concat([measureErrors(yTrain, trainPrediction, 'train'), measureErrors(yTest, testPrediction, 'test')], axis=1)

trainAndTestErrors

Unnamed: 0,train,test
accuracy,0.931034,0.4
precision,1.0,0.333333
recall,0.857143,0.2
roc,0.928571,0.4
