In [12]:
import numpy as np
import pandas as pd
from sklearn import svm
from os.path import join
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import learning_curve
import matplotlib.pyplot as plt

# 0. Load Data

In [3]:
data = pd.read_csv("./wholesale-customers.csv")

In [4]:
data.head()

Unnamed: 0,Channel,Region,Fresh,Milk,Grocery,Frozen,Detergents_Paper,Delicassen
0,2,3,12669,9656,7561,214,2674,1338
1,2,3,7057,9810,9568,1762,3293,1776
2,2,3,6353,8808,7684,2405,3516,7844
3,1,3,13265,1196,4221,6404,507,1788
4,2,3,22615,5410,7198,3915,1777,5185


In [5]:
X = data.iloc[:, 2:]
y = data.iloc[:, 0]

In [7]:
def split_hold(rate):
    X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    train_size=rate, random_state=100)
    return X_train, X_test, y_train, y_test

# 1. Linear SVM

In [8]:
def linear_svm():
    for rate in np.linspace(0.05, 0.5, 2):
        clf = SVC(C=1.0, kernel='linear', gamma='auto')
        X_train, X_test, y_train, y_test = split_hold(rate)
        svm_model_linear = clf.fit(X_train, y_train)
        score = svm_model_linear.score(X_test, y_test)
        coeffs = clf.coef_
        print("rate: [{}], score: [{}] \n --> coeffs: [{}]".format(rate, loss, coeffs))
    return 

In [9]:
linear_svm()

rate: [0.05], loss: [[-0.87640449 -0.92134831 -0.875      -0.90804598 -0.83908046]], coeffs: [[[-0.00207043  0.0033018   0.00261715 -0.01635989  0.03697236  0.00575787]]]
rate: [0.5], loss: [[-0.87640449 -0.92134831 -0.875      -0.90804598 -0.83908046]], coeffs: [[[-7.82171250e-04  2.58996601e-03  6.15088752e-03  2.31314789e-05
   1.69480629e-02 -1.20848630e-02]]]


In [10]:
def my_scoring(*args):
    return np.mean(cross_val_score(*args, cv=5))

In [23]:
def linear_svm():
    clf = SVC(C=1.0, kernel='linear', gamma='auto')
    train_sizes, train_loss, test_loss = learning_curve(clf, X, y, cv=5, scoring=my_scoring, 
                                                        train_sizes=np.linspace(0.5, 0.95, 2))
    return train_sizes, train_loss, test_loss

In [24]:
train_sizes, train_loss, test_loss = linear_svm()

KeyboardInterrupt: 

In [None]:
train_loss_mean = -np.mean(train_loss, axis=1)
test_loss_mean = -np.mean(test_loss, axis=1)

plt.plot(train_sizes, train_loss_mean, 'o-', color="r", label="Training")
plt.plot(train_sizes, test_loss_mean, 'o-', color="g", label="Cross-validation")
plt.xlabel("Training examples")
plt.ylabel("Loss")
plt.legend(loc="best")
plt.show()

In [22]:
accuracy, cm, coeffs = linear_svm()

In [23]:
accuracy

0.8636363636363636

In [24]:
cm

array([[88,  6],
       [12, 26]])

In [25]:
coeffs

array([[-0.00260078,  0.00117366,  0.00390151, -0.00185389,  0.02443228,
        -0.0041149 ]])

# 2. Polynomial SVM

In [28]:
def poly_svm():
    clf = SVC(C=1.0, kernel='poly', gamma='auto')
    svm_model_linear = clf.fit(X_train, y_train)
    svm_predictions = svm_model_linear.predict(X_test)
    accuracy = svm_model_linear.score(X_test, y_test) 
    cm = confusion_matrix(y_test, svm_predictions) 
    coeffs = clf.coef_
    return accuracy, cm, coeffs

In [None]:
accuracy, cm, coeffs = poly_svm()

In [None]:
accuracy

In [None]:
cm

In [None]:
coeffs

# 3. Gaussion SVM

In [6]:
def gaussion_svm():
    clf = SVC(C=1.0, kernel='rbf', gamma='auto')
    svm_model_linear = clf.fit(X_train, y_train)
    svm_predictions = svm_model_linear.predict(X_test)
    accuracy = svm_model_linear.score(X_test, y_test) 
    cm = confusion_matrix(y_test, svm_predictions) 
    coeffs = clf.coef_
    return accuracy, cm, coeffs

In [None]:
accuracy, cm, coeffs = gaussion_svm()

In [None]:
accuracy

In [None]:
cm

In [None]:
coeffs