In [1]:
import math
import numpy as np
import wandb
from tqdm import tqdm
from torchvision import transforms
from utils import get_data, accuracy, save_Yte, augment_dataset, array_to_tensor
from kernels import *
from models import MultiKRR
from feature import hog
from sklearn.model_selection import train_test_split 


In [2]:
Xtr, Xte, Ytr = get_data()

## Hyperparameter tuning

In [3]:
Xtr_, Xval_, Ytr_, Yval_ = train_test_split(Xtr, Ytr, test_size=0.1, random_state=123)
Xtr_tensor_ = array_to_tensor(Xtr_)
Xval_tensor_ = array_to_tensor(Xval_)

In [None]:
# test
hog_fun = lambda img: hog(img,
    cells_per_block=3,
    normalization='L2-Hys',
)
Xtr_hog_ = np.array([hog_fun(img) for img in tqdm(Xtr_tensor_, desc="Computing HOG")])
Xval_hog_ = np.array([hog_fun(img) for img in Xval_tensor_])


In [4]:
krr = MultiKRR(kernel=GHIKernel(beta=1.).kernel, lambd=1e-4)
krr.fit(Xtr_hog_, Ytr_)
print("accuracy:", accuracy(Yval_, krr.predict(Xval_hog_)))

Computing HOG: 100%|██████████| 4500/4500 [00:21<00:00, 212.84it/s]


KeyboardInterrupt: 

In [5]:
def get_sweep_id(parameters):
    """https://docs.wandb.ai/guides/sweeps/configuration"""
    sweep_config = {
        'method': "bayes",
        'metric': {
          'name': 'accuracy',
          'goal': 'maximize'
        },
        'parameters': parameters
    }
    sweep_id = wandb.sweep(sweep_config, project='kernel-challenge-final')

    return sweep_id

In [6]:
import math

def one_run():
    wandb.init(project='kernel-challenge-final', entity='theodumont', tags=["HOG+KRR"])
    # kernel =============================================================================
    kernel_name = wandb.config.kernel_name
    if kernel_name == "GaussianKernel":
        kernel = GaussianKernel(sigma=wandb.config.sigma).kernel
    elif kernel_name == "LogKernel":
        kernel = LogKernel(d=wandb.config.d).kernel
    elif kernel_name == "GHIKernel":
        kernel = GHIKernel(beta=wandb.config.beta).kernel
    elif kernel_name == "WaveletKernel":
        pass
    
    # model ==============================================================================
    krr = MultiKRR(
        kernel=kernel,
        lambd=wandb.config.lambd,
    )
    krr.fit(Xtr_hog_, Ytr_)
    wandb.run.summary["accuracy"] = accuracy(Yval_, krr.predict(Xval_hog_))

In [8]:
parameters = {
    'lambd': {
        'distribution': 'log_uniform',
        'min': math.log(1e-5),
        'max': math.log(1e-3),
        # 'value': 1e-4
    },
    'cells_per_block': {
        'value': 3,
    },
    'normalization': {
        'value': 'L2-Hys',
    },
    # kernel -----------------------------------------------------------------------------
    'kernel_name': {
        'value': "GHIKernel",
        # 'values': ["GaussianKernel", "WaveletKernel", "LogKernel", "GHIKernel"],
    },
    # 'd': {
    #     'values': [1,2],
    # },
    'beta': {
        'distribution': 'log_uniform',
        'min': math.log(1e-1),
        'max': math.log(1e1),
    },
}
sweep_id = get_sweep_id(parameters)
%env WANDB_SILENT=True

# sweep_id = "2rx2v24s"
wandb.agent(sweep_id, function=one_run)

Create sweep with ID: e5enugkd
Sweep URL: https://wandb.ai/theodumont/kernel-challenge-final/sweeps/e5enugkd
env: WANDB_SILENT=True


[34m[1mwandb[0m: Agent Starting Run: dqyul4is with config:
[34m[1mwandb[0m: 	cells_per_block: 3
[34m[1mwandb[0m: 	d: 1
[34m[1mwandb[0m: 	kernel_name: LogKernel
[34m[1mwandb[0m: 	lambd: 1.2653316374256873e-05
[34m[1mwandb[0m: 	normalization: L2-Hys
Fitting: 100%|██████████| 10/10 [00:34<00:00,  3.46s/it]
Fitting: 100%|██████████| 10/10 [01:03<00:00,  6.30s/it]
Fitting: 100%|██████████| 10/10 [01:05<00:00,  6.55s/it]
Fitting: 100%|██████████| 10/10 [01:18<00:00,  7.80s/it]
Fitting: 100%|██████████| 10/10 [01:11<00:00,  7.13s/it]
Fitting: 100%|██████████| 10/10 [01:01<00:00,  6.13s/it]
Fitting: 100%|██████████| 10/10 [01:10<00:00,  7.09s/it]
Fitting: 100%|██████████| 10/10 [01:04<00:00,  6.49s/it]
Fitting: 100%|██████████| 10/10 [01:02<00:00,  6.26s/it]
Fitting:  50%|█████     | 5/10 [00:50<00:54, 10.82s/it]

## Predictions

In [5]:
Xtr_tensor = array_to_tensor(Xtr)
Xte_tensor = array_to_tensor(Xte)
hog_fun = lambda img: hog(img,
    cells_per_block=3,
    normalization='L2-Hys',
)
Xtr_hog = np.array([hog_fun(img) for img in tqdm(Xtr_tensor, desc="Computing HOG")])
Xte_hog = np.array([hog_fun(img) for img in Xte_tensor])
krr = MultiKRR(kernel=LogKernel(d=2).kernel, lambd=4e-5)
krr.fit(Xtr_hog, Ytr)
hog_fun = lambda img: hog(img,
    cells_per_block=3,
    normalization='L2-Hys',
)
print(f"accuracy on training set: {accuracy(Ytr, krr.predict(Xtr_hog))}")
Yte = krr.predict(Xte_hog)
save_Yte(Yte, model_name="HOG_Log_MultiKRR_1")

Computing HOG: 100%|██████████| 5000/5000 [00:24<00:00, 202.81it/s]
Fitting: 100%|██████████| 10/10 [01:02<00:00,  6.29s/it]


accuracy on training set: 1.0
