In [32]:
import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
import os, cv2
from sklearn.decomposition import PCA

In [10]:
data_dir = "./augmented_data"

def load_npz_dataset(filename):
    path = os.path.join(data_dir, filename)
    data = np.load(path)
    return data["X"], data["y"]

X_train_A, y_train_A = load_npz_dataset("train_A.npz")   # original clean
X_train_B, y_train_B = load_npz_dataset("train_B.npz")   # augmented / degraded
X_test, y_test       = load_npz_dataset("test.npz")

print(f"trainA: {X_train_A.shape}, trainB: {X_train_B.shape}, test: {X_test.shape}")

trainA: (12000, 224, 224, 3), trainB: (12000, 224, 224, 3), test: (3000, 224, 224, 3)


### Manual K-Fold

In [11]:
def make_folds(n_samples, k=10, seed=42):
    np.random.seed(seed)
    idx = np.arange(n_samples)
    np.random.shuffle(idx)
    folds = np.array_split(idx, k)
    return folds

### Computing Metrics Manually

In [12]:
def calc_metrics(cm):
    TP = np.diag(cm)
    FP = cm.sum(axis=0) - TP
    FN = cm.sum(axis=1) - TP
    precision = np.mean(TP / (TP + FP + 1e-9))
    recall = np.mean(TP / (TP + FN + 1e-9))
    f1 = 2 * precision * recall / (precision + recall + 1e-9)
    acc = TP.sum() / cm.sum()
    return acc, precision, recall, f1

### Inner CV for Hyper-Parameter Tuning

In [13]:
def inner_cv_tuning(X, y, candidate_C, k_inner=3):
    """Return the C value that gives best mean accuracy in inner CV."""
    folds = make_folds(len(X), k_inner)
    avg_acc = []
    for C in candidate_C:
        accs = []
        for i in range(k_inner):
            val_idx = folds[i]
            train_idx = np.concatenate([folds[j] for j in range(k_inner) if j != i])
            model = SVC(kernel="linear", C=C)
            model.fit(X[train_idx], y[train_idx])
            accs.append(model.score(X[val_idx], y[val_idx]))
        avg_acc.append(np.mean(accs))
    best_C = candidate_C[int(np.argmax(avg_acc))]
    return best_C

### Outer CV for Model Evaluation (10-Fold CV)

In [31]:
def evaluate_svm_nested_cv_pca(X, y,
                               candidate_C=[0.1,1,10],
                               candidate_PCs=[50,100,200],
                               k_outer=10, k_inner=3):
    folds = make_folds(len(X), k_outer)
    metrics_all = []
    for i in range(k_outer):
        test_idx = folds[i]
        train_idx = np.concatenate([folds[j] for j in range(k_outer) if j!=i])
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        # --- scale ---
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test  = scaler.transform(X_test)

        # --- inner loop: tune C and PCs using 3‑fold CV ---
        best_score, bestC, bestPC = -np.inf, None, None
        inner_folds = make_folds(len(X_train), k_inner)
        for C in candidate_C:
            for n_pc in candidate_PCs:
                scores = []
                for j in range(k_inner):
                    val_idx = inner_folds[j]
                    tr_idx = np.concatenate([inner_folds[m] for m in range(k_inner) if m!=j])
                    X_tr, X_val = X_train[tr_idx], X_train[val_idx]
                    y_tr, y_val = y_train[tr_idx], y_train[val_idx]
                    pca = PCA(n_components=min(n_pc, X_tr.shape[1]), random_state=42)
                    X_tr_pca = pca.fit_transform(X_tr)
                    X_val_pca = pca.transform(X_val)
                    model = SVC(kernel="linear", C=C)
                    model.fit(X_tr_pca, y_tr)
                    scores.append(model.score(X_val_pca, y_val))
                mean_sc = np.mean(scores)
                if mean_sc > best_score:
                    best_score, bestC, bestPC = mean_sc, C, n_pc

        print(f"Fold {i+1}: Best C = {bestC}, Best PCs = {bestPC}")

        # --- Train with best C and PCA on full train fold ---
        pca_final = PCA(n_components=min(bestPC, X_train.shape[1]), random_state=42)
        X_train_pca = pca_final.fit_transform(X_train)
        X_test_pca  = pca_final.transform(X_test)

        model = SVC(kernel="linear", C=bestC)
        model.fit(X_train_pca, y_train)
        y_pred = model.predict(X_test_pca)
        cm = confusion_matrix(y_test, y_pred, labels=np.unique(y))
        acc, prec, rec, f1 = calc_metrics(cm)
        metrics_all.append([acc,prec,rec,f1])

        print(f"Fold {i+1}: Acc={acc:.3f} P={prec:.3f} R={rec:.3f} F1={f1:.3f}")

    metrics_all = np.array(metrics_all)
    mean, std = metrics_all.mean(0), metrics_all.std(0)
    print("\nSVM With PCA Results")
    print(f"Accuracy   : {mean[0]:.3f} ± {std[0]:.3f}")
    print(f"Precision  : {mean[1]:.3f} ± {std[1]:.3f}")
    print(f"Recall    : {mean[2]:.3f} ± {std[2]:.3f}")
    print(f"F1‑Score  : {mean[3]:.3f} ± {std[3]:.3f}")
    return mean, std

In [18]:
def extract_color_features(img, bins=(16,16,16)):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv],[0,1,2],None,bins,[0,180,0,256,0,256])
    hist = cv2.normalize(hist,hist).flatten()
    return hist

In [19]:
def extract_texture_features(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gx = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3)
    gy = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3)
    mag, _ = cv2.cartToPolar(gx, gy)
    edge_density = np.mean(cv2.Canny(gray,50,150)>0)
    return [np.mean(mag), np.std(mag), edge_density]

In [20]:
def extract_shape_features(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray,50,150)
    contours,_ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        c = max(contours,key=cv2.contourArea)
        area = cv2.contourArea(c)
        peri = cv2.arcLength(c,True)
        x,y,w,h = cv2.boundingRect(c)
        aspect_ratio = w/float(h) if h>0 else 0
        return [area,peri,aspect_ratio,w,h]
    return [0,0,0,0,0]

In [21]:
def extract_features(img):
    color = extract_color_features(img)
    texture = extract_texture_features(img)
    shape = extract_shape_features(img)
    return np.hstack([color,texture,shape])

In [23]:
def batch_extract_features(images):
    print("Extracting hand‑crafted features… ")
    feats = [extract_features(img) for img in images]
    return np.array(feats)

In [26]:
X_trainA_feats = batch_extract_features(X_train_A)
X_trainB_feats = batch_extract_features(X_train_B)
X_test_feats   = batch_extract_features(X_test)

print("Train A features:", X_trainA_feats.shape)
print("Train B features:", X_trainB_feats.shape)
print("Test features:", X_test_feats.shape)

Extracting hand‑crafted features… 
Extracting hand‑crafted features… 
Extracting hand‑crafted features… 
Train A features: (12000, 4104)
Train B features: (12000, 4104)
Test features: (3000, 4104)


In [27]:
scalerA = StandardScaler()
X_trainA_scaled = scalerA.fit_transform(X_trainA_feats)
meanA, stdA = evaluate_svm_nested_cv(X_trainA_scaled, y_train_A)

Fold 1: Best C = 0.1
Fold 1: Acc=0.732  P=0.732  R=0.731  F1=0.732
Fold 2: Best C = 0.1
Fold 2: Acc=0.726  P=0.724  R=0.724  F1=0.724
Fold 3: Best C = 0.1
Fold 3: Acc=0.731  P=0.729  R=0.733  F1=0.731
Fold 4: Best C = 0.1
Fold 4: Acc=0.748  P=0.746  R=0.748  F1=0.747
Fold 5: Best C = 0.1
Fold 5: Acc=0.702  P=0.700  R=0.704  F1=0.702
Fold 6: Best C = 0.1
Fold 6: Acc=0.727  P=0.726  R=0.729  F1=0.727
Fold 7: Best C = 0.1
Fold 7: Acc=0.732  P=0.730  R=0.731  F1=0.730
Fold 8: Best C = 0.1
Fold 8: Acc=0.735  P=0.732  R=0.732  F1=0.732
Fold 9: Best C = 0.1
Fold 9: Acc=0.742  P=0.739  R=0.738  F1=0.739
Fold 10: Best C = 0.1
Fold 10: Acc=0.769  P=0.770  R=0.771  F1=0.770

Final Results
Accuracy: 0.734 ± 0.016
Precision: 0.733 ± 0.017
Recall: 0.734 ± 0.016
F1‑score: 0.733 ± 0.016


TypeError: cannot unpack non-iterable NoneType object

In [None]:
# augmented / degraded dataset
scalerB = StandardScaler()
X_trainB_scaled = scalerB.fit_transform(X_trainB_feats)
meanB, stdB = evaluate_svm_nested_cv(X_trainB_scaled, y_train_B)