In [1]:
from sklearn.svm import SVC
from sklearn.datasets import load_breast_cancer, make_classification
from sklearn.preprocessing import StandardScaler, normalize
from sklearn.metrics import accuracy_score
from sklearn.model_selection import LeaveOneOut

from sampler import sample_from_logreg

import numpy as np
import matplotlib.pyplot as plt

from cv_svm import SVM_smooth

#import matplotlib
#matplotlib.use("pgf")
#matplotlib.rcParams.update({
#    "pgf.texsystem": "pdflatex",
#    'font.family': 'serif',
#    'text.usetex': True,
#    'pgf.rcfonts': False,
#})

In [2]:
#X, y = load_breast_cancer(return_X_y=True)
#X, y = make_classification(n_samples=250, n_features=50)
X, _, y = sample_from_logreg(n=250, p=20, seed=120)
n = X.shape[0]
p = X.shape[1]
y[np.where(y == 0)] = -1

In [3]:
X_scaled = StandardScaler().fit_transform(X)
#X_scaled = X
print(X_scaled.shape)

(250, 20)


In [4]:
clf = SVC(kernel='linear')
clf.fit(X_scaled, y)
sk_coef_og = clf.coef_.flatten()
sk_coef = clf.coef_/np.linalg.norm(clf.coef_)
print(clf.coef_)
print(clf.intercept_)

[[ 1.44119178e+00 -2.29122060e-01 -4.73133921e-01 -5.71197691e-01
  -9.23568038e-02 -4.16302288e-01  2.29394113e-01 -2.91213658e-02
  -1.85803923e-01  1.53175947e-01  2.37632445e-01  1.26117844e-01
   1.51192806e-01 -1.06962035e-03  1.36621515e-02  4.48711746e-01
   1.74007771e+00  2.70215171e-02 -2.03117321e+00  3.15806682e-02]]
[-0.02560786]


In [5]:
y_pred = clf.predict(X_scaled)
accuracy_score(y, y_pred)

0.88

In [None]:
clf = SVM_smooth(sigma=2e-1, lbd=1e-1)
clf.fit(X_scaled, y, thresh=0, n_iter=1500, eta=0.5/n,
        approx_cv=True, cv=True, log_iter=True, log_iacv=True,
        save_cond_nums=True, save_err_approx=True, save_err_cv=True, warm_start=0, normalise=False, adjust_factor=True)
print(f"grad {np.linalg.norm(clf.nabla_fgd_(clf.weights_, X_scaled, y, clf.sigma_, clf.lbd_))}")
coef = clf.weights_/np.linalg.norm(clf.weights_)
print(f"diff {np.mean(np.abs(coef - sk_coef))} | pct {np.mean(np.abs(coef - sk_coef))/np.sum(np.abs(coef))}")

y_pred = clf.predict(X_scaled)
accuracy_score(y, y_pred)

iter 0 | grad 0.70191 | objective 1.00990 | IACV: 0.00000562 | baseline: 0.00139864
iter 1 | grad 0.70175 | objective 1.00899 | IACV: 0.00001123 | baseline: 0.00139398
iter 2 | grad 0.70158 | objective 1.00807 | IACV: 0.00001685 | baseline: 0.00139018
iter 3 | grad 0.70141 | objective 1.00716 | IACV: 0.00002246 | baseline: 0.00138726
iter 4 | grad 0.70124 | objective 1.00624 | IACV: 0.00002807 | baseline: 0.00138522
iter 5 | grad 0.70108 | objective 1.00533 | IACV: 0.00003368 | baseline: 0.00138406
iter 6 | grad 0.70091 | objective 1.00441 | IACV: 0.00003929 | baseline: 0.00138378
iter 7 | grad 0.70074 | objective 1.00350 | IACV: 0.00004489 | baseline: 0.00138439
iter 8 | grad 0.70057 | objective 1.00259 | IACV: 0.00005050 | baseline: 0.00138588
iter 9 | grad 0.70040 | objective 1.00167 | IACV: 0.00005610 | baseline: 0.00138825
iter 10 | grad 0.70024 | objective 1.00076 | IACV: 0.00006170 | baseline: 0.00139150
iter 11 | grad 0.70007 | objective 0.99985 | IACV: 0.00006730 | baseline: 0

iter 98 | grad 0.68434 | objective 0.92152 | IACV: 0.00054754 | baseline: 0.00347744
iter 99 | grad 0.68414 | objective 0.92064 | IACV: 0.00055296 | baseline: 0.00350875
iter 100 | grad 0.68394 | objective 0.91975 | IACV: 0.00055838 | baseline: 0.00354011
iter 101 | grad 0.68373 | objective 0.91886 | IACV: 0.00056380 | baseline: 0.00357151
iter 102 | grad 0.68353 | objective 0.91798 | IACV: 0.00056921 | baseline: 0.00360294
iter 103 | grad 0.68332 | objective 0.91709 | IACV: 0.00057462 | baseline: 0.00363441
iter 104 | grad 0.68312 | objective 0.91621 | IACV: 0.00058003 | baseline: 0.00366592
iter 105 | grad 0.68291 | objective 0.91532 | IACV: 0.00058543 | baseline: 0.00369746
iter 106 | grad 0.68270 | objective 0.91444 | IACV: 0.00059083 | baseline: 0.00372903
iter 107 | grad 0.68249 | objective 0.91355 | IACV: 0.00059622 | baseline: 0.00376064
iter 108 | grad 0.68228 | objective 0.91267 | IACV: 0.00060162 | baseline: 0.00379227
iter 109 | grad 0.68206 | objective 0.91179 | IACV: 0.00

iter 194 | grad 0.65310 | objective 0.83884 | IACV: 0.00103871 | baseline: 0.00654633
iter 195 | grad 0.65268 | objective 0.83801 | IACV: 0.00104349 | baseline: 0.00657819
iter 196 | grad 0.65226 | objective 0.83719 | IACV: 0.00104826 | baseline: 0.00661005
iter 197 | grad 0.65184 | objective 0.83637 | IACV: 0.00105304 | baseline: 0.00664190
iter 198 | grad 0.65143 | objective 0.83555 | IACV: 0.00105782 | baseline: 0.00667374
iter 199 | grad 0.65101 | objective 0.83474 | IACV: 0.00106260 | baseline: 0.00670558
iter 200 | grad 0.65061 | objective 0.83392 | IACV: 0.00106739 | baseline: 0.00673740
iter 201 | grad 0.65020 | objective 0.83310 | IACV: 0.00107217 | baseline: 0.00676922
iter 202 | grad 0.64980 | objective 0.83228 | IACV: 0.00107697 | baseline: 0.00680103
iter 203 | grad 0.64940 | objective 0.83147 | IACV: 0.00108176 | baseline: 0.00683282
iter 204 | grad 0.64900 | objective 0.83066 | IACV: 0.00108655 | baseline: 0.00686461
iter 205 | grad 0.64861 | objective 0.82984 | IACV: 0.

iter 290 | grad 0.60877 | objective 0.76397 | IACV: 0.00147179 | baseline: 0.00952030
iter 291 | grad 0.60813 | objective 0.76324 | IACV: 0.00147562 | baseline: 0.00954973
iter 292 | grad 0.60748 | objective 0.76252 | IACV: 0.00147944 | baseline: 0.00957913
iter 293 | grad 0.60682 | objective 0.76179 | IACV: 0.00148324 | baseline: 0.00960847
iter 294 | grad 0.60617 | objective 0.76107 | IACV: 0.00148702 | baseline: 0.00963777
iter 295 | grad 0.60550 | objective 0.76035 | IACV: 0.00149078 | baseline: 0.00966702
iter 296 | grad 0.60484 | objective 0.75963 | IACV: 0.00149452 | baseline: 0.00969623
iter 297 | grad 0.60416 | objective 0.75891 | IACV: 0.00149825 | baseline: 0.00972539
iter 298 | grad 0.60349 | objective 0.75819 | IACV: 0.00150196 | baseline: 0.00975450
iter 299 | grad 0.60281 | objective 0.75748 | IACV: 0.00150565 | baseline: 0.00978357
iter 300 | grad 0.60212 | objective 0.75676 | IACV: 0.00150932 | baseline: 0.00981258
iter 301 | grad 0.60144 | objective 0.75605 | IACV: 0.

iter 386 | grad 0.53276 | objective 0.70151 | IACV: 0.00177112 | baseline: 0.01209889
iter 387 | grad 0.53186 | objective 0.70094 | IACV: 0.00177364 | baseline: 0.01212266
iter 388 | grad 0.53096 | objective 0.70037 | IACV: 0.00177616 | baseline: 0.01214636
iter 389 | grad 0.53006 | objective 0.69981 | IACV: 0.00177866 | baseline: 0.01216998
iter 390 | grad 0.52916 | objective 0.69925 | IACV: 0.00178115 | baseline: 0.01219352
iter 391 | grad 0.52825 | objective 0.69869 | IACV: 0.00178363 | baseline: 0.01221700
iter 392 | grad 0.52735 | objective 0.69813 | IACV: 0.00178609 | baseline: 0.01224039
iter 393 | grad 0.52644 | objective 0.69758 | IACV: 0.00178855 | baseline: 0.01226372
iter 394 | grad 0.52553 | objective 0.69702 | IACV: 0.00179100 | baseline: 0.01228696
iter 395 | grad 0.52462 | objective 0.69647 | IACV: 0.00179343 | baseline: 0.01231014
iter 396 | grad 0.52371 | objective 0.69592 | IACV: 0.00179585 | baseline: 0.01233323
iter 397 | grad 0.52280 | objective 0.69537 | IACV: 0.

iter 482 | grad 0.44356 | objective 0.65517 | IACV: 0.00197580 | baseline: 0.01404431
iter 483 | grad 0.44266 | objective 0.65476 | IACV: 0.00197779 | baseline: 0.01406128
iter 484 | grad 0.44176 | objective 0.65436 | IACV: 0.00197978 | baseline: 0.01407820
iter 485 | grad 0.44086 | objective 0.65397 | IACV: 0.00198177 | baseline: 0.01409506
iter 486 | grad 0.43996 | objective 0.65357 | IACV: 0.00198377 | baseline: 0.01411186
iter 487 | grad 0.43906 | objective 0.65318 | IACV: 0.00198577 | baseline: 0.01412861
iter 488 | grad 0.43817 | objective 0.65278 | IACV: 0.00198778 | baseline: 0.01414530
iter 489 | grad 0.43727 | objective 0.65239 | IACV: 0.00198979 | baseline: 0.01416193
iter 490 | grad 0.43638 | objective 0.65200 | IACV: 0.00199180 | baseline: 0.01417851
iter 491 | grad 0.43549 | objective 0.65161 | IACV: 0.00199382 | baseline: 0.01419503
iter 492 | grad 0.43460 | objective 0.65122 | IACV: 0.00199584 | baseline: 0.01421149
iter 493 | grad 0.43371 | objective 0.65084 | IACV: 0.

iter 578 | grad 0.36567 | objective 0.62286 | IACV: 0.00219837 | baseline: 0.01545671
iter 579 | grad 0.36496 | objective 0.62258 | IACV: 0.00220115 | baseline: 0.01546962
iter 580 | grad 0.36425 | objective 0.62231 | IACV: 0.00220394 | baseline: 0.01548251
iter 581 | grad 0.36355 | objective 0.62203 | IACV: 0.00220674 | baseline: 0.01549536
iter 582 | grad 0.36285 | objective 0.62175 | IACV: 0.00220955 | baseline: 0.01550819
iter 583 | grad 0.36216 | objective 0.62148 | IACV: 0.00221238 | baseline: 0.01552100
iter 584 | grad 0.36146 | objective 0.62120 | IACV: 0.00221522 | baseline: 0.01553378
iter 585 | grad 0.36077 | objective 0.62093 | IACV: 0.00221807 | baseline: 0.01554653
iter 586 | grad 0.36008 | objective 0.62066 | IACV: 0.00222093 | baseline: 0.01555925
iter 587 | grad 0.35939 | objective 0.62039 | IACV: 0.00222380 | baseline: 0.01557195
iter 588 | grad 0.35871 | objective 0.62012 | IACV: 0.00222669 | baseline: 0.01558463
iter 589 | grad 0.35802 | objective 0.61985 | IACV: 0.

In [None]:
iacv_coef = normalize(clf.loo_iacv_, axis=1)
true_coef = normalize(clf.loo_true_, axis=1)
print(np.mean(np.linalg.norm(iacv_coef - true_coef, 2, axis=1)))
print(np.mean(np.linalg.norm(coef - true_coef, 2, axis=1)))

In [None]:
print(np.std(clf.loo_iacv_))
print(np.std(clf.loo_true_))

In [None]:
print(np.mean(clf.loo_iacv_, axis=0))
print(np.mean(clf.loo_true_, axis=0))

In [None]:
print("percentage difference between IACV and true CV mean")
print(np.abs(np.mean(clf.loo_iacv_, axis=0) - np.mean(clf.loo_true_, axis=0)).sum()/(np.mean(clf.loo_true_, axis=0)).sum())
print("percentage difference between w_hat and true CV mean")
print(np.abs(clf.weights_ - np.mean(clf.loo_true_, axis=0)).sum()/(np.mean(clf.loo_true_, axis=0)).sum())

In [None]:
plt.hist(np.mean(clf.loo_iacv_, axis=0), alpha=0.5, bins=40, label="IACV")
plt.hist(np.mean(clf.loo_true_, axis=0), alpha=0.5, bins=40, label="true")
plt.legend()

In [None]:
plt.hist(np.linalg.norm(clf.loo_iacv_ - clf.loo_true_, axis=1), bins=60, label="diffs")
plt.legend()

In [None]:
plt.hist(np.linalg.norm(clf.weights_ - clf.loo_true_, axis=1), bins=60, label="diffs")
plt.show()

In [None]:
clf.loo_iacv_

In [None]:
clf.loo_true_

In [None]:
iacv_mean = np.mean(clf.loo_iacv_, axis=0)
coef = iacv_mean/np.linalg.norm(iacv_mean)
print(f"diff {np.mean(np.abs(coef - sk_coef))} | pct {np.mean(np.abs(coef - sk_coef))/np.sum(np.abs(coef))}")

In [None]:
true_cv_mean = np.mean(clf.loo_true_, axis=0)
coef = true_cv_mean/np.linalg.norm(true_cv_mean)
print(f"diff {np.mean(np.abs(coef - sk_coef))} | pct {np.mean(np.abs(coef - sk_coef))/np.sum(np.abs(coef))}")

In [None]:
coef = clf.weights_/np.linalg.norm(clf.weights_)
nbin = 100
plt.hist(coef, bins=nbin, label="FGD", alpha=0.3)
plt.hist(sk_coef.flatten(), bins=nbin, label="sklearn", alpha=0.3)
plt.legend()
plt.show()

In [None]:
fig, ax = plt.subplots()
ax.plot(np.min(clf.cond_nums_, axis=1), label="Min Condition Number", c='b')
ax.plot(np.max(clf.cond_nums_, axis=1), label="Max Condition Number", c='r')
ax.plot(np.mean(clf.cond_nums_, axis=1), label="Mean Condition Number", c='purple')
ax.plot(clf.cond_num_bound_, label="Bound", c="black", linestyle="--")

ax.set_yscale('log')
ax.set_xlabel("Iterations")
ax.set_ylabel("Maximum Hessian Condition No.")
ws = 1.45 # 1.75
hs = 1.35 # 1.25
fig.set_size_inches(w=5.73/ws, h=3.5/hs)
fig.legend()
plt.show()
plt.savefig('svm_convergence_cond_no.pgf', bbox_inches='tight', pad_inches=0)

In [None]:
np.mean(clf.cond_nums_, axis=1)[np.argmax(np.mean(clf.cond_nums_, axis=1))]

In [None]:
plt.hist(np.median(clf.cond_nums_, axis=0))

In [None]:
fig, ax = plt.subplots()
ax.plot(clf.err_approx_["IACV"], label="IACV", c='black')
#ax.plot(clf.err_approx_["NS"], label="NS", c='red', linestyle="--")
ax.plot(clf.err_approx_["baseline"], label="Baseline", c='green', linestyle="--")
ax.set_yscale('log')
ax.set_xscale('log')
ax.set_xlabel("Iterations (log)")
ax.set_ylabel("Err Approx (log)")
ax.legend()
ws = 1.45 # 1.75
hs = 1.35 # 1.25
fig.set_size_inches(w=5.73/ws, h=3.5/hs)
plt.show()
plt.savefig('svm_convergence_err_approx_mod.pgf', bbox_inches='tight', pad_inches=0)

In [None]:
fig, ax = plt.subplots()
ax.plot(clf.err_cv_["IACV"], label="IACV", c='black')
ax.plot(clf.err_cv_["baseline"], label="Baseline", c='green', linestyle="--")
ax.set_yscale('log')
ax.set_xscale('log')
ax.set_xlabel("Iterations (log)")
ax.set_ylabel("Err CV (log)")
ax.legend()
ws = 1.45 # 1.75
hs = 1.35 # 1.25
fig.set_size_inches(w=5.73/ws, h=3.5/hs)
plt.show()
plt.savefig('svm_convergence_err_cv_mod.pgf', bbox_inches='tight', pad_inches=0)