This notebook shows how to use the code to create new experiments, load precomputed kernels, compute predictions on test sets. Also, it shows how to compute the Linear and Gaussian kernels (substring kernel has a dedicated python file as it is significantly slower to compute).

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

import cvxpy
import time
import numpy as np
import multiprocessing as mp
from tqdm import tqdm
import sys
sys.path.append('code/')

In [2]:
import io_utils
import kernels
import algos
from importlib import reload
reload(kernels)
reload(io_utils)
reload(algos)

<module 'algos' from 'code/algos.py'>

### Create experiment and load datasets

In [3]:
np.random.seed(42)

exp = io_utils.Experiment()
exp.create_new_experiment()

### Load previous experiment

In [4]:
kernels_wd_train, k_test = exp.load('kernels/SS_7_02.0', True)

100%|██████████| 3/3 [00:15<00:00,  5.12s/it]


In [5]:
# Takes ~ 1min
for kn in kernels_wd_train:
    print("Minimum vp is {:.6f}".format(np.min(np.linalg.eigvals(kn))))

Minimum vp is 3.317647
Minimum vp is 5.027438
Minimum vp is -0.000000


### Cross validation

In [6]:
krr = algos.KernelRidgeRegression()
svm = algos.SVM()

cur_algo = krr  #svm

In [13]:
TRIALS = 5

lambda_vals = np.logspace(-2, 0, TRIALS)

cur_algo.cross_validate(exp, kernels_wd_train, lambda_vals)

  0%|          | 0/5 [00:00<?, ?it/s]

Cross validation with 5 slices. Training set: 1600, Validation set: 400


100%|██████████| 5/5 [00:09<00:00,  1.90s/it]

Best parameter: 0.31622776601683794
Score 1: 0.637
Score 2: 0.6415
Score 3: 0.741





0.6731666666666666

### Predictions

In [8]:
predictions = []

for i in range(3):
    cur_algo.fit(kernels_wd_train[i], exp.labels[i], 0.1)
    cur_algo.evaluatePerformance(kernels_wd_train[i], exp.labels[i])
    r_pred = cur_algo.predict(k_test[i])
    predictions.append(r_pred.copy())

Score : 0.986
Score : 0.990
Score : 0.990


### Save submission file

In [9]:
predictions = np.array(predictions).flatten()
io_utils.parse_output(predictions, "submission_2203_2.csv")

## Linear and gaussian kernels

### Linear kernel

In [10]:
linear_kernel = kernels.LinearKernel()
K_linear_train = []
for i in range(3):
    K_linear_train.append(linear_kernel.computeVectorizedKernel(exp.feats[i], exp.feats[i]))


### Gaussian kernel

In [11]:
gaussian_kernel = kernels.GaussianKernel(.5)
K_gauss_train, K_gauss_val = [], []
for i in range(3):
    K_gauss_train.append(gaussian_kernel.computeVectorizedKernel(exp.feats[i], exp.feats[i]))


### If needed, save experiment

In [None]:
# kernel_name = "wd_7_02"
# exp.save('kernels/{}'.format(kernel_name), kernels_train, kernels_test)