# overview of solve_ridge() usage

In [1]:
from __future__ import print_function

import numpy as np

from regression_code.storm import ridge
from regression_code.storm.tests import test_datasets
from regression_code.storm.tests import test_utils

# load data

In [2]:
datasets = test_datasets.load_all()
dataset = datasets['qa_skinny']

Xtrain = dataset['Xtrain']
Xtest = dataset['Xtest']
Ytrain = dataset['Ytrain']
Ytest = dataset['Ytest']

print('Xtrain:', Xtrain.shape)
print('Xtest:', Xtest.shape)
print('Ytrain:', Ytrain.shape)
print('Ytest:', Ytest.shape)

  return array / array.std(axis, keepdims=True)
  return (a - mns) / sstd


Xtrain: (1832, 116)
Xtest: (504, 116)
Ytrain: (1832, 92970)
Ytest: (504, 92970)


# basic regression

In [28]:
parameters = {
    'weights': True,
    'predictions': True,
    'performance': True,
    'ridges': np.logspace(0, 4, 20),
}

results = ridge.solve_ridge(Xtrain, Ytrain, Xtest, Ytest, **parameters)

print()
print()
test_utils.rprint(results, 'results')

Fitting ridge model...
- (train, test): (0.784, 0.216)
- n_ridges: 20
- n_regressors: 116
- n_regressands: 92970
- n_train_samples: 1832
- n_test_samples: 504
- inputs: Xtrain, Ytrain, Xtest, Ytest
- outputs: weights, predictions, performance
- solver: eig_dual
- dtype: float32

01:     1.0  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.252)
02:     1.6  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.252)
03:     2.6  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.252)
04:     4.3  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.252)
05:     7.0  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.253)
06:    11.3  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.043, +0.102, +0.254)
07:    18.3  (5%, 25%, 50%, 75%, 95%)=(-0.055, -0.000, +0.043, +0.102, +0.254)
08:    29.8  (5%, 25%, 50%, 75%, 95%)=(-0.055, -0.001, +0.043, +0.102, +0.254)
09:    48.3  (5%, 25%, 50%, 75%, 95%)=(-0.055, -0.001, +0.043, +0.102, +0.253)
10:    78

# use alternate solver

In [29]:
parameters = {
    'weights': True,
    'predictions': True,
    'performance': True,
    'ridges': np.logspace(0, 4, 20),
    'solver': 'svd',
}

results = ridge.solve_ridge(Xtrain, Ytrain, Xtest, Ytest, **parameters)

print()
print()
test_utils.rprint(results, 'results')

Fitting ridge model...
- (train, test): (0.784, 0.216)
- n_ridges: 20
- n_regressors: 116
- n_regressands: 92970
- n_train_samples: 1832
- n_test_samples: 504
- inputs: Xtrain, Ytrain, Xtest, Ytest
- outputs: weights, predictions, performance
- solver: svd
- dtype: float32

01:     1.0  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.252)
02:     1.6  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.252)
03:     2.6  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.252)
04:     4.3  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.252)
05:     7.0  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.253)
06:    11.3  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.043, +0.102, +0.254)
07:    18.3  (5%, 25%, 50%, 75%, 95%)=(-0.055, -0.000, +0.043, +0.102, +0.254)
08:    29.8  (5%, 25%, 50%, 75%, 95%)=(-0.055, -0.001, +0.043, +0.102, +0.254)
09:    48.3  (5%, 25%, 50%, 75%, 95%)=(-0.055, -0.001, +0.043, +0.102, +0.253)
10:    78.5  (

# optimize for performance

In [6]:
Xtrain = Xtrain.astype(np.float32)
Ytrain = Ytrain.astype(np.float32)
Xtest = Xtest.astype(np.float32)
Ytest = Ytest.astype(np.float32)

In [10]:
parameters = {
    'weights': False,
    'predictions': False,
    'performance': True,
    'Ytest_zscored': True,
    'ridges': np.logspace(0, 4, 20),
}

results = ridge.solve_ridge(Xtrain, Ytrain, Xtest, Ytest, **parameters)

print()
print()
test_utils.rprint(results, 'results')

Fitting ridge model...
- (train, test): (0.784, 0.216)
- n_ridges: 20
- n_regressors: 116
- n_regressands: 92970
- n_train_samples: 1832
- n_test_samples: 504
- inputs: Xtrain, Ytrain, Xtest, Ytest
- outputs: performance
- solver: eig_dual
- dtype: float32

01:     1.0  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.252)
02:     1.6  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.252)
03:     2.6  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.252)
04:     4.3  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.252)
05:     7.0  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.253)
06:    11.3  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.043, +0.102, +0.254)
07:    18.3  (5%, 25%, 50%, 75%, 95%)=(-0.055, -0.000, +0.043, +0.102, +0.254)
08:    29.8  (5%, 25%, 50%, 75%, 95%)=(-0.055, -0.001, +0.043, +0.102, +0.254)
09:    48.3  (5%, 25%, 50%, 75%, 95%)=(-0.055, -0.001, +0.043, +0.102, +0.253)
10:    78.5  (5%, 25%, 50%, 75%

# pass kernels directly

In [6]:
parameters = {
    'weights': False,
    'predictions': False,
    'performance': True,
    'Ktrain': Xtrain.dot(Xtrain.T),
    'Ktest': Xtest.dot(Xtrain.T),
    'Ytrain': Ytrain,
    'Ytest': Ytest,
    'ridges': np.logspace(0, 4, 20),
}

results = ridge.solve_ridge(**parameters)

print()
print()
test_utils.rprint(results, 'results')

Fitting ridge model...
- (train, test): (0.784, 0.216)
- n_ridges: 20
- n_regressors: None
- n_regressands: 92970
- n_train_samples: 1832
- n_test_samples: 504
- inputs: Ktrain, Ytrain, Ktrain, Ytest, Ktest
- outputs: performance
- solver: eig_dual
- dtype: float64

01:     1.0  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.252)
02:     1.6  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.252)
03:     2.6  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.252)
04:     4.3  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.252)
05:     7.0  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.044, +0.101, +0.253)
06:    11.3  (5%, 25%, 50%, 75%, 95%)=(-0.054, -0.000, +0.043, +0.102, +0.254)
07:    18.3  (5%, 25%, 50%, 75%, 95%)=(-0.055, -0.000, +0.043, +0.102, +0.254)
08:    29.8  (5%, 25%, 50%, 75%, 95%)=(-0.055, -0.001, +0.043, +0.102, +0.254)
09:    48.3  (5%, 25%, 50%, 75%, 95%)=(-0.055, -0.001, +0.043, +0.102, +0.253)
10:    78.5  (5%, 25%,

# see `ridge/ridge.py` for additional options