In [None]:
# !pip install -U liblinear-official
!pip install -U libsvm-official

In [None]:
import requests
import numpy as np
import random
import time
import scipy
from scipy import linalg

# from liblinear.liblinearutil import *
from libsvm.svmutil import *

# Get Data

In [None]:
url = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/satimage.scale'
content = requests.get(url).content
content = content.decode('utf-8')

with open('./in_sample.txt', mode='w') as f:
  for line in content:
    f.write(line)

In [None]:
url = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/satimage.scale.t'
content = requests.get(url).content
content = content.decode('utf-8')

with open('./out_sample.txt', mode='w') as f:
  for line in content:
    f.write(line)

# Problem 11

In [None]:
Y, X = svm_read_problem('./in_sample.txt', return_scipy=True)
OUT_Y, OUT_X = svm_read_problem('./out_sample.txt', return_scipy=True)

In [None]:
X

<4435x36 sparse matrix of type '<class 'numpy.float64'>'
	with 158048 stored elements in Compressed Sparse Row format>

In [None]:
y = np.array( [1 if val==5 else 0 for val in Y] )
out_y = np.array( [1 if val==5 else 0 for val in OUT_Y] )

In [None]:
m = svm_train(y, X, '-t 0 -c 10 -d 1')

In [None]:
sv = m.get_SV()
sv_coef = m.get_sv_coef()

In [None]:
sv_arr = []
for row in sv:
    row_vec = []
    for dim in range(1,37):
        try:
            row_vec.append( row[dim] )
        except KeyError:
            row_vec.append( 0 )

    sv_arr.append( row_vec )

sv_arr = np.array( sv_arr )

In [None]:
sv_coef_arr = [x[0] for x in sv_coef]
# for tup in sv_coef:
sv_coef_arr = np.array( sv_coef_arr )

In [None]:
w = np.dot( sv_coef_arr, sv_arr )
linalg.norm(w)

4.646266066207534

# Problem 12 & 13


In [None]:
for sub in range(2, 7):
  y = np.array( [1 if val==sub else -1 for val in Y] )
  # out_y = np.array( [1 if val==sub else -1 for val in OUT_Y] )

  m = svm_train(y, X, '-s 0 -t 1 -c 10 -d 3 -g 1 -r 1')

  support_vectors = m.get_SV()
  # support_vector_coefficients = m.get_sv_coef()

  p_label, p_acc, p_val = svm_predict(y, X, m)
  print(f'Class {sub}, E_in = {100-p_acc[0]:2f}% / SVM:{len(support_vectors)}')

Accuracy = 100% (4435/4435) (classification)
Class 2, E_in = 0.000000% / SVM:93
Accuracy = 99.7294% (4423/4435) (classification)
Class 3, E_in = 0.270575% / SVM:385
Accuracy = 99.1657% (4398/4435) (classification)
Class 4, E_in = 0.834273% / SVM:659
Accuracy = 100% (4435/4435) (classification)
Class 5, E_in = 0.000000% / SVM:281
Accuracy = 99.7069% (4422/4435) (classification)
Class 6, E_in = 0.293123% / SVM:607


# Problem 14

In [None]:
y = np.array( [1 if val==1 else 0 for val in Y] )
out_y = np.array( [1 if val==1 else 0 for val in OUT_Y] )

In [None]:
Cs = [0.01,0.1,1,10,100]
for c in Cs:
  m = svm_train(y, X, f'-s 0 -t 2 -g 10 -c {c}')

  support_vectors = m.get_SV()
  support_vector_coefficients = m.get_sv_coef()

  p_label, p_acc, p_val = svm_predict(out_y, OUT_X, m)
  print(f'C:{c}, E_out = {100-p_acc[0]}% / SVM:{len(support_vectors)}')

Accuracy = 76.5% (1530/2000) (classification)
C:0.01, E_out = 23.5% / SVM:3843
Accuracy = 83.65% (1673/2000) (classification)
C:0.1, E_out = 16.349999999999994% / SVM:3883
Accuracy = 89.35% (1787/2000) (classification)
C:1, E_out = 10.650000000000006% / SVM:3690
Accuracy = 90.3% (1806/2000) (classification)
C:10, E_out = 9.700000000000003% / SVM:3703
Accuracy = 90.3% (1806/2000) (classification)
C:100, E_out = 9.700000000000003% / SVM:3703


# Problem 15

In [None]:
y = np.array( [1 if val==1 else 0 for val in Y] )
out_y = np.array( [1 if val==1 else 0 for val in OUT_Y] )

In [None]:
Gs = [0.1,1,10,100,1000]
for g in Gs:
  m = svm_train(y, X, f'-s 0 -t 2 -g {g} -c 0.1')

  support_vectors = m.get_SV()
  support_vector_coefficients = m.get_sv_coef()

  p_label, p_acc, p_val = svm_predict(out_y, OUT_X, m)
  print(f'Gamma:{g}, E_out = {100-p_acc[0]}% / SVM:{len(support_vectors)}')

Accuracy = 97.75% (1955/2000) (classification)
Gamma:0.01, E_out = 2.25% / SVM:1541
Accuracy = 98.75% (1975/2000) (classification)
Gamma:0.1, E_out = 1.25% / SVM:608
Accuracy = 98.8% (1976/2000) (classification)
Gamma:1, E_out = 1.2000000000000028% / SVM:791
Accuracy = 79.25% (1585/2000) (classification)
Gamma:10, E_out = 20.75% / SVM:3746
Accuracy = 76.95% (1539/2000) (classification)
Gamma:100, E_out = 23.049999999999997% / SVM:4435


# Problem 16

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X

<4435x36 sparse matrix of type '<class 'numpy.float64'>'
	with 158048 stored elements in Compressed Sparse Row format>

In [None]:
choice = {0.1: 0,
      1: 0,
      10: 0,
      100: 0,
      1000: 0}

In [None]:
import random
import time

In [None]:
Gs = [0.1, 1, 10, 100, 1000]

start = time.time()
for attempt in range(1000):
  n = random.randint(1,101)
  X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.045, random_state = n)

  y_train = np.array( [1 if val==1 else 0 for val in y_train] )
  y_test = np.array( [1 if val==1 else 0 for val in y_test] )

  min_eval = float('inf')
  best_g = 0.01
  for g in Gs:
    m = svm_train(y_train, X_train, f' -s 0 -t 2 -g {g} -c 0.01')

    support_vectors = m.get_SV()
    support_vector_coefficients = m.get_sv_coef()

    p_label, p_acc, p_val = svm_predict(y_test, X_test, m)

    if 100-p_acc[0] < min_eval:
      min_eval = 100-p_acc[0]
      best_g = g
    elif 100-p_acc[0] == min_eval and g<best_g:
      best_g = g
      
    # print(f'C:{c}, E_val = {100-p_acc[0]}% / SVM:{len(support_vectors)}')
  
  choice[best_g] += 1
  print(f'[{attempt+1}] Best_G: {best_g}', min_eval)
  time_consumed = time.time()-start
  print(f'>>> Elapsed: {time_consumed/60:.1f} mins | Estimated: {time_consumed/(attempt+1)*(999-attempt)/60:.1f} mins')


In [None]:
choice

{0.1: 1566, 1: 308, 10: 0, 100: 0, 1000: 0}

In [None]:
!apt-get install texlive texlive-xetex texlive-latex-extra pandoc
!pip install pypandoc
!jupyter nbconvert --to PDF /content/HTML_HW5.ipynb

Reading package lists... Done
Building dependency tree       
Reading state information... Done
pandoc is already the newest version (1.19.2.4~dfsg-1build4).
texlive is already the newest version (2017.20180305-1).
texlive-latex-extra is already the newest version (2017.20180305-2).
texlive-xetex is already the newest version (2017.20180305-1).
0 upgraded, 0 newly installed, 0 to remove and 37 not upgraded.
[NbConvertApp] Converting notebook /content/HTML_HW5.ipynb to PDF
[NbConvertApp] Writing 42305 bytes to ./notebook.tex
[NbConvertApp] Building PDF
[NbConvertApp] Running xelatex 3 times: [u'xelatex', u'./notebook.tex', '-quiet']
[NbConvertApp] Running bibtex 1 time: [u'bibtex', u'./notebook']
[NbConvertApp] PDF successfully created
[NbConvertApp] Writing 38899 bytes to /content/HTML_HW5.pdf
