In [2]:
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.datasets import load_svmlight_file
import pandas as pd

In [3]:
data_set = 'mnist.scale'

In [4]:
def read_linear_format(file_path):
    X, y = [], []
    with open(file_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            y.append(int(parts[0]))  
            features = {}
            for item in parts[1:]:
                index, value = item.split(":")
                features[int(index)] = float(value)
            X.append(features)
    return X, np.array(y)

X_train, y_train = read_linear_format(data_set)

In [5]:
mask_3 = np.array(y_train == 3)
mask_7 = np.array(y_train == 7)

indices_3 = np.where(mask_3)[0]
indices_7 = np.where(mask_7)[0]

X_train_3 = [X_train[i] for i in indices_3]
X_train_7 = [X_train[i] for i in indices_7]
y_train_3 = y_train[mask_3]
y_train_7 = y_train[mask_7]

n_features = max(max(feat.keys()) for feat in X_train_3 + X_train_7)

In [6]:
def dict_to_array(X_dict, n_features):
    X_dense = np.zeros((len(X_dict), n_features))
    for i, sample in enumerate(X_dict):
        for feat_idx, value in sample.items():
            X_dense[i, feat_idx-1] = value  
    return X_dense

X_train_3_dense = dict_to_array(X_train_3, n_features)
X_train_7_dense = dict_to_array(X_train_7, n_features)

X_combined = np.vstack([X_train_3_dense, X_train_7_dense])

In [7]:
le = LabelEncoder()

le.fit([3, 7])  

y_combined = np.concatenate([y_train_3, y_train_7])
# the mapping is: 3 -> -1, 7 -> 1
y_train_encoded = np.where(y_combined == 3, -1, 1)  

y_train_3_encoded = np.full(len(y_train_3), -1)  # All 3s become -1
y_train_7_encoded = np.full(len(y_train_7), 1)   # All 7s become 1


- note: In `svc()`, rbf kernel is the default setting so need not specify.

```python
dual_coef_ : array, shape = [n_class-1, n_SV]
```
$\rightarrow$ Coefficients (weights) to each support vector in the decision function.

In our case, we're doing binary classification so `n_class-1 = 1`.
> Therefore we use the index 0 of `dual_coef_`.



## Example of result of `dual_coef_`

The shape (8730,) means that there are 8730 support vectors.

And each coefficient tells that if:
- negative $\Rightarrow$ Support vector belongs to class 1 (the original label is 7)
- positive $\Rightarrow$ Support vector belongs to class -1 (the original label is 3)

In [10]:
svm_classifier = SVC(C = 0.1, gamma = 0.1)
svm_classifier.fit(X_combined, y_train_encoded)
dual_coefficients = svm_classifier.dual_coef_[0]

In [11]:
print(dual_coefficients.shape)
print(dual_coefficients[:5])

(8730,)
[-0.04148516 -0.1        -0.1        -0.1        -0.1       ]


The weight vector is defined as:
> see Lecture 4 slide 10

$$
\mathbf{w} = \sum_{n=1}^{N} \alpha_n y_n \mathbf{z}_n
$$

And 

$$
\begin{split}
||\mathbf{w}||^2 
&= \mathbf{w}^T \mathbf{w} \\
&= \sum_{n=1}^{N}\sum_{m=1}^N \alpha_n \alpha_m y_n y_m K(\mathbf{x}_n, \mathbf{x}_m) \\
&= \sum_{n=1}^{N}\sum_{m=1}^N \alpha_n \alpha_m y_n y_m \exp(-\gamma ||\mathbf{x}_n - \mathbf{x}_m||^2)
\end{split}
$$



In [17]:
# Use this list to store the result of form (C, gamma, margin)
result = []

for C in [0.1, 1, 10]:
    for gamma in [0.1, 1, 10]:
        svm_classifier = SVC(C = C, gamma = gamma)
        svm_classifier.fit(X_combined, y_train_encoded)  

        support_vectors = svm_classifier.support_vectors_
        dual_coefficients = svm_classifier.dual_coef_[0]
        support_labels = y_train_encoded[svm_classifier.support_]

        w_norm_squared = 0
        num_support_vectors = len(support_vectors)

        for i in range(num_support_vectors):
            for j in range(num_support_vectors):
                w_norm_squared += (
                    dual_coefficients[i] * dual_coefficients[j]
                    * support_labels[i] * support_labels[j]
                    * np.exp(-gamma * np.linalg.norm(support_vectors[i] - support_vectors[j])**2)
                )
        
        margin = 1.0 / np.sqrt(w_norm_squared)
        result.append((C, gamma, margin))

In [21]:
df_results = pd.DataFrame(result, columns=['C', 'gamma', 'margin'])
df_pivot = df_results.pivot(index='C', columns='gamma', values='margin')
df_pivot.columns = [f'gamma={gamma}' for gamma in df_pivot.columns]

print("\nResults Table")
print("-" * 50)
print("Margin for each combination:")
print(df_pivot.to_string(float_format=lambda margin: '{:,.6f}'.format(margin)))


Results Table
--------------------------------------------------
Margin for each combination:
      gamma=0.1  gamma=1.0  gamma=10.0
C                                     
0.1    0.041274   0.090780    0.090793
1.0    0.018166   0.009078    0.009079
10.0   0.017794   0.008984    0.008982
