In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn import svm

X, y = datasets.load_iris(return_X_y=True)
X.shape, y.shape

((150, 4), (150,))

In [2]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)

In [3]:
X_train.shape, y_train.shape

((90, 4), (90,))

In [4]:
X_test.shape, y_test.shape

((60, 4), (60,))

In [5]:
clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
clf.score(X_test, y_test)

0.9666666666666667

In [6]:
from sklearn.model_selection import cross_val_score
clf = svm.SVC(kernel='linear', C=1)
scores = cross_val_score(clf, X, y, cv=5)
scores

array([0.96666667, 1.        , 0.96666667, 0.96666667, 1.        ])

In [7]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.98 (+/- 0.03)


In [8]:
import numpy as np
from sklearn.model_selection import KFold
X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
y = np.array([1, 2, 3, 4])
kf = KFold(n_splits=2)
kf.get_n_splits(X)
print(kf)


KFold(n_splits=2, random_state=None, shuffle=False)


In [9]:
for train_index, test_index in kf.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]


TRAIN: [2 3] TEST: [0 1]
TRAIN: [0 1] TEST: [2 3]


In [10]:
from sklearn.model_selection import LeaveOneOut
X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
loo = LeaveOneOut()
for train, test in loo.split(X):
    print("train: %s test:%s" % (train, test))
    print("train: %s %s %s test:%s" % (X[train[0]],X[train[1]],X[train[2]],X[test[0]]))
    print("**************************************************************************;")


train: [1 2 3] test:[0]
train: [3 4] [1 2] [3 4] test:[1 2]
**************************************************************************;
train: [0 2 3] test:[1]
train: [1 2] [1 2] [3 4] test:[3 4]
**************************************************************************;
train: [0 1 3] test:[2]
train: [1 2] [3 4] [3 4] test:[1 2]
**************************************************************************;
train: [0 1 2] test:[3]
train: [1 2] [3 4] [1 2] test:[3 4]
**************************************************************************;


In [12]:
import matplotlib.pyplot as plt
%matplotlib inline
import scipy.io
import numpy as np
from datetime import datetime, date, time
import pandas as pd
mat = scipy.io.loadmat('exampleCV_atMatlab/data.mat')

In [13]:
data = mat['data'][0][0]
type(data),data.shape

(numpy.void, ())

In [14]:
type(data[0]),data[0].shape,type(data[1]),data[1].shape

(numpy.ndarray, (84, 1190), numpy.ndarray, (84, 1))

In [15]:
mtype = data.dtype  

In [16]:
ndata = {n: data[n] for n in mtype.names}
ndata

{'features': array([[0.0530716 , 0.0772467 , 0.        , ..., 0.262905  , 0.00870984,
         0.271614  ],
        [0.0356869 , 0.0762979 , 0.        , ..., 0.345782  , 0.0253565 ,
         0.371138  ],
        [0.0529476 , 0.0567482 , 0.        , ..., 0.150802  , 0.0311162 ,
         0.181918  ],
        ...,
        [0.00564811, 0.0895224 , 0.        , ..., 0.264739  , 0.0034535 ,
         0.268192  ],
        [0.0485545 , 0.120048  , 0.        , ..., 0.228666  , 0.0176481 ,
         0.246314  ],
        [0.0039096 , 0.0453717 , 0.        , ..., 0.26317   , 0.0118474 ,
         0.275017  ]]), 'labels': array([[ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 1],
        [ 

In [17]:
dataFeatures = data['features']
dataLabels = data['labels']

In [18]:
dataFeatures.shape,dataLabels.shape

((84, 1190), (84, 1))

In [19]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=5,shuffle=False)
kf.get_n_splits(dataFeatures)
print(kf)

KFold(n_splits=5, random_state=None, shuffle=False)


In [20]:
from sklearn import tree
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
accuracy_model = []
#clf = tree.DecisionTreeClassifier(random_state=20) 
svclassifier = SVC(kernel='linear')
# Iterate over each train-test split
for train_index, test_index in kf.split(dataFeatures):
    # Split train-test
    X_train, X_test = dataFeatures[train_index], dataFeatures[test_index]
    y_train, y_test = dataLabels[train_index], dataLabels[test_index]
    # Train the model
    svclassifier.fit(X_train, y_train)
    # Append to accuracy_model the accuracy of the model
    accuracy_model.append(accuracy_score(y_test, svclassifier.predict(X_test), normalize=True)*100)
 
# Print the accuracy    
print(accuracy_model)

[58.82352941176471, 52.94117647058824, 64.70588235294117, 58.82352941176471, 68.75]


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
