In [170]:
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np

### 10-Fold Stratified Cross-Validation

In [171]:
data = pd.read_csv("../data/data.csv")
X = data.iloc[:, 1:-1].values
y = data.iloc[:, -1].values

le = LabelEncoder()
y = le.fit_transform(y)

sc = StandardScaler()
X = sc.fit_transform(X)

# Stratified 10-Fold Cross Validation
skf = StratifiedKFold(n_splits=10)

# model
# clf = SVC(kernel='linear', C=1, random_state=42)
rmf = RandomForestClassifier()

i = 0
train_scores = np.zeros((1,10))
test_scores = np.zeros((1,10))
for train_index, test_index, in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # train the model
    rmf.fit(X_train, y_train)

    y_train_predicted = rmf.predict(X_train)
    y_test_predicted = rmf.predict(X_test)

    train_scores[0][i] = accuracy_score(y_train, y_train_predicted) * 100
    test_scores[0][i] = accuracy_score(y_test, y_test_predicted) * 100
    i += 1

print("Training: %.2f%% with standard deviation of %.2f%%" % (train_scores.mean(), train_scores.std()))
print("Test:     %.2f%% with standard deviation of %.2f%%" % (test_scores.mean(), test_scores.std()))



Training: 100.00% with standard deviation of 0.00%
Test:     98.75% with standard deviation of 0.85%


### Example on Saving Model

In [172]:
import pickle

with open("../models/svc.pickle", "wb") as f:
    pickle.dump(rmf, f)

### Getting Classifiers Hyperparameters

In [180]:
from sklearn.svm import SVC
from sklearn.svm import NuSVC
from sklearn.tree import ExtraTreeClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB

for stuff in NuSVC.get_params(NuSVC()):
    print(stuff)

break_ties
cache_size
class_weight
coef0
decision_function_shape
degree
gamma
kernel
max_iter
nu
probability
random_state
shrinking
tol
verbose
