From 45ae36714cb85676255d28a58499cbbdb8170895 Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Sat, 14 Aug 2021 16:02:44 -0400 Subject: [PATCH 01/18] Generalized argmin param across dimensions --- algorithms/linfa-logistic/src/argmin_param.rs | 34 +++++++++---------- algorithms/linfa-logistic/src/float.rs | 12 +++---- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/algorithms/linfa-logistic/src/argmin_param.rs b/algorithms/linfa-logistic/src/argmin_param.rs index b726c48c5..5e180f4ea 100644 --- a/algorithms/linfa-logistic/src/argmin_param.rs +++ b/algorithms/linfa-logistic/src/argmin_param.rs @@ -1,4 +1,4 @@ -//! This module defines newtypes for ndarray's Array1. +//! This module defines newtypes for ndarray's Array. //! //! This is necessary to be able to abstract over floats (f32 and f64) so that //! the logistic regression code can be abstract in the float type it works @@ -8,51 +8,51 @@ use crate::float::Float; use argmin::prelude::*; -use ndarray::Array1; +use ndarray::{Array, Dimension}; use serde::{Deserialize, Serialize}; #[derive(Serialize, Clone, Deserialize, Debug, Default)] -pub struct ArgminParam(pub Array1); +pub struct ArgminParam(pub Array); -impl ArgminParam { +impl ArgminParam { #[inline] - pub fn as_array(&self) -> &Array1 { + pub fn as_array(&self) -> &Array { &self.0 } } -impl ArgminSub, ArgminParam> for ArgminParam { - fn sub(&self, other: &ArgminParam) -> ArgminParam { +impl ArgminSub, ArgminParam> for ArgminParam { + fn sub(&self, other: &ArgminParam) -> ArgminParam { ArgminParam(&self.0 - &other.0) } } -impl ArgminAdd, ArgminParam> for ArgminParam { - fn add(&self, other: &ArgminParam) -> ArgminParam { - ArgminParam(&self.0 + &other.0) +impl ArgminAdd, ArgminParam> for ArgminParam { + fn add(&self, other: &ArgminParam) -> ArgminParam { + ArgminParam(&self.0 - &other.0) } } -impl ArgminDot, F> for ArgminParam { - fn dot(&self, other: &ArgminParam) -> F { +impl ArgminDot, F> for ArgminParam { + fn dot(&self, other: &ArgminParam) -> F { self.0.dot(&other.0) } } -impl ArgminNorm for ArgminParam { +impl ArgminNorm for ArgminParam { fn norm(&self) -> F { self.0.dot(&self.0) } } -impl ArgminMul> for ArgminParam { - fn mul(&self, other: &F) -> ArgminParam { +impl ArgminMul> for ArgminParam { + fn mul(&self, other: &F) -> ArgminParam { ArgminParam(&self.0 * *other) } } -impl ArgminMul, ArgminParam> for ArgminParam { - fn mul(&self, other: &ArgminParam) -> ArgminParam { +impl ArgminMul, ArgminParam> for ArgminParam { + fn mul(&self, other: &ArgminParam) -> ArgminParam { ArgminParam(&self.0 * &other.0) } } diff --git a/algorithms/linfa-logistic/src/float.rs b/algorithms/linfa-logistic/src/float.rs index 3eb697e37..612a81027 100644 --- a/algorithms/linfa-logistic/src/float.rs +++ b/algorithms/linfa-logistic/src/float.rs @@ -1,6 +1,6 @@ use crate::argmin_param::ArgminParam; use argmin::prelude::{ArgminFloat, ArgminMul}; -use ndarray::NdFloat; +use ndarray::{Dimension, NdFloat}; use ndarray_linalg::Lapack; use num_traits::FromPrimitive; @@ -13,21 +13,21 @@ pub trait Float: + Default + Clone + FromPrimitive - + ArgminMul, ArgminParam> + //+ ArgminMul, ArgminParam> + linfa::Float { const POSITIVE_LABEL: Self; const NEGATIVE_LABEL: Self; } -impl ArgminMul, ArgminParam> for f64 { - fn mul(&self, other: &ArgminParam) -> ArgminParam { +impl ArgminMul, ArgminParam> for f64 { + fn mul(&self, other: &ArgminParam) -> ArgminParam { ArgminParam(&other.0 * *self) } } -impl ArgminMul, ArgminParam> for f32 { - fn mul(&self, other: &ArgminParam) -> ArgminParam { +impl ArgminMul, ArgminParam> for f32 { + fn mul(&self, other: &ArgminParam) -> ArgminParam { ArgminParam(&other.0 * *self) } } From f0209bb830ad5b15b46449f4914787296e428c02 Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Sun, 15 Aug 2021 02:48:46 -0400 Subject: [PATCH 02/18] Wrote multinomial loss and gradient --- algorithms/linfa-logistic/src/argmin_param.rs | 21 ++- algorithms/linfa-logistic/src/float.rs | 5 +- algorithms/linfa-logistic/src/lib.rs | 163 ++++++++++++++++-- 3 files changed, 163 insertions(+), 26 deletions(-) diff --git a/algorithms/linfa-logistic/src/argmin_param.rs b/algorithms/linfa-logistic/src/argmin_param.rs index 5e180f4ea..34f9b8b7e 100644 --- a/algorithms/linfa-logistic/src/argmin_param.rs +++ b/algorithms/linfa-logistic/src/argmin_param.rs @@ -8,13 +8,22 @@ use crate::float::Float; use argmin::prelude::*; -use ndarray::{Array, Dimension}; +use ndarray::{Array, ArrayBase, Data, Dimension, Zip}; use serde::{Deserialize, Serialize}; +pub fn elem_dot, A2: Data, D: Dimension>( + a: &ArrayBase, + b: &ArrayBase, +) -> F { + Zip::from(a) + .and(b) + .fold(F::zero(), |acc, &a, &b| acc + a * b) +} + #[derive(Serialize, Clone, Deserialize, Debug, Default)] -pub struct ArgminParam(pub Array); +pub struct ArgminParam(pub Array); -impl ArgminParam { +impl ArgminParam { #[inline] pub fn as_array(&self) -> &Array { &self.0 @@ -35,13 +44,15 @@ impl ArgminAdd, ArgminParam> for impl ArgminDot, F> for ArgminParam { fn dot(&self, other: &ArgminParam) -> F { - self.0.dot(&other.0) + //self.0.dot(&other.0) + todo!() } } impl ArgminNorm for ArgminParam { fn norm(&self) -> F { - self.0.dot(&self.0) + //self.0.dot(&self.0) + todo!() } } diff --git a/algorithms/linfa-logistic/src/float.rs b/algorithms/linfa-logistic/src/float.rs index 612a81027..49d0f6846 100644 --- a/algorithms/linfa-logistic/src/float.rs +++ b/algorithms/linfa-logistic/src/float.rs @@ -1,6 +1,6 @@ use crate::argmin_param::ArgminParam; use argmin::prelude::{ArgminFloat, ArgminMul}; -use ndarray::{Dimension, NdFloat}; +use ndarray::{Dimension, Ix1, Ix2, NdFloat}; use ndarray_linalg::Lapack; use num_traits::FromPrimitive; @@ -13,7 +13,8 @@ pub trait Float: + Default + Clone + FromPrimitive - //+ ArgminMul, ArgminParam> + + ArgminMul, ArgminParam> + + ArgminMul, ArgminParam> + linfa::Float { const POSITIVE_LABEL: Self; diff --git a/algorithms/linfa-logistic/src/lib.rs b/algorithms/linfa-logistic/src/lib.rs index 6c7c5906e..c6a6e6063 100644 --- a/algorithms/linfa-logistic/src/lib.rs +++ b/algorithms/linfa-logistic/src/lib.rs @@ -24,13 +24,15 @@ use argmin::solver::linesearch::MoreThuenteLineSearch; use argmin::solver::quasinewton::lbfgs::LBFGS; use linfa::prelude::{AsTargets, DatasetBase}; use linfa::traits::{Fit, PredictInplace}; -use ndarray::{s, Array, Array1, ArrayBase, Data, Ix1, Ix2}; +use ndarray::{ + s, Array, Array1, Array2, ArrayBase, Axis, Data, Dimension, Ix1, Ix2, RemoveAxis, Slice, +}; use std::default::Default; mod argmin_param; mod float; -use argmin_param::ArgminParam; +use argmin_param::{elem_dot, ArgminParam}; use float::Float; /// A two-class logistic regression model. @@ -77,7 +79,8 @@ impl Default for LogisticRegression { } } -type LBFGSType = LBFGS, F>, ArgminParam, F>; +type LBFGSType1 = LBFGS, F>, ArgminParam, F>; +type LBFGSType2 = LBFGS, F>, ArgminParam, F>; impl LogisticRegression { /// Creates a new LogisticRegression with default configuration. @@ -201,8 +204,8 @@ impl LogisticRegression { &self, x: &'a ArrayBase, target: Array1, - ) -> LogisticRegressionProblem<'a, F, A> { - LogisticRegressionProblem { + ) -> LogisticRegressionProblem1<'a, F, A> { + LogisticRegressionProblem1 { x, target, alpha: self.alpha, @@ -236,7 +239,7 @@ impl LogisticRegression { /// Create the LBFGS solver using MoreThuenteLineSearch and set gradient /// tolerance. - fn setup_solver(&self) -> LBFGSType { + fn setup_solver(&self) -> LBFGSType1 { let linesearch = MoreThuenteLineSearch::new(); LBFGS::new(linesearch, 10).with_tol_grad(self.gradient_tolerance) } @@ -244,10 +247,10 @@ impl LogisticRegression { /// Run the LBFGS solver until it converges or runs out of iterations. fn run_solver<'a, A>( &self, - problem: LogisticRegressionProblem<'a, F, A>, - solver: LBFGSType, + problem: LogisticRegressionProblem1<'a, F, A>, + solver: LBFGSType1, init_params: Array1, - ) -> Result>> + ) -> Result>> where A: Data, { @@ -261,7 +264,7 @@ impl LogisticRegression { fn convert_result( &self, labels: ClassLabels, - result: &ArgminResult>, + result: &ArgminResult>, ) -> Result> where A: Data, @@ -364,17 +367,25 @@ where /// Conditionally split the feature vector `w` into parameter vector and /// intercept parameter. -fn convert_params(n_features: usize, w: &Array1) -> (Array1, F) { - if n_features == w.len() { - (w.to_owned(), F::zero()) - } else if n_features + 1 == w.len() { - (w.slice(s![..w.len() - 1]).to_owned(), w[w.len() - 1]) +/// Dimensions of `w` are either (f) or (f, n_classes) +fn convert_params( + n_features: usize, + w: &Array, +) -> (Array, Array) { + let nrows = w.shape()[0]; + if n_features == nrows { + (w.to_owned(), Array::zeros(w.raw_dim().remove_axis(Axis(0)))) + } else if n_features + 1 == nrows { + ( + w.slice_axis(Axis(0), Slice::from(..n_features)).to_owned(), + w.index_axis(Axis(0), n_features).to_owned(), + ) } else { panic!( "Unexpected length of parameter vector `w`, exected {} or {}, found {}", n_features, n_features + 1, - w.len() + nrows ); } } @@ -399,6 +410,22 @@ fn log_logistic(x: F) -> F { } } +/// Finds the log of the sum of exponents across a specific axis in a numerically stable way. More +/// specifically, computes `ln(exp(x1) + exp(x2) + exp(e3) + ...)` across an axis. +/// +/// Based off this implementation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.logsumexp.html +fn log_sum_exp>( + m: &ArrayBase, + axis: Axis, +) -> Array { + // Find max value of the array + let max = m.iter().copied().reduce(F::max).unwrap(); + // Computes `max + ln(exp(x1-max) + exp(x2-max) + exp(x3-max) + ...)`, which is equal to the + // log_sum_exp formula + let reduced = m.fold_axis(axis, F::zero(), |acc, elem| *acc + (*elem - max).exp()); + reduced.mapv_into(|e| e.ln() + max) +} + /// Computes the logistic loss assuming the training labels $y \in {-1, 1}$ /// /// Because the logistic function fullfills $\sigma(-z) = 1 - \sigma(z)$ @@ -421,6 +448,38 @@ fn logistic_loss>( -yz.sum() + F::cast(0.5) * alpha * params.dot(¶ms) } +/// Computes loss function of `-sum(Y * log(softmax(H))) + alpha/2 * norm(W)`, where H is `X . W + b`. +/// Also returns the log of the probabilities, which is `log(softmax(H))`, along with `W`. +/// `Y` is the output (n_samples * n_classes), `X` is the input (n_samples * n_features), `W` is the +/// params (n_features * n_classes), `b` is the intercept vector (n_classes). +fn multi_logistic_loss_prob_params>( + x: &ArrayBase, + y: &Array2, + alpha: F, + w: &Array2, // This parameter includes `W` and `b` +) -> (F, Array2, Array2) { + let n_features = x.shape()[1]; + let (params, intercept) = convert_params(n_features, w); + // Compute H + let h = x.dot(¶ms) + intercept; + // This computes `H - log(sum(exp(H)))`, which is equal to + // `log(softmax(H)) = log(exp(H) / sum(exp(H)))` + let log_prob = h - log_sum_exp(&h, Axis(1)); + // Calculate loss + // XXX Should we divide cost by n_samples??? + let loss = -elem_dot(&log_prob, y) + F::cast(0.5) * alpha * elem_dot(¶ms, ¶ms); + (loss, log_prob, params) +} + +fn multi_logistic_loss>( + x: &ArrayBase, + y: &Array2, + alpha: F, + w: &Array2, +) -> F { + multi_logistic_loss_prob_params(x, y, alpha, w).0 +} + /// Computes the gradient of the logistic loss function fn logistic_grad>( x: &ArrayBase, @@ -445,6 +504,36 @@ fn logistic_grad>( } } +/// Computes multinomial gradients for `W` and `b`, combine them, and flatten the result. +/// Gradient for `W` is `Xt . (softmax(H) - Y) + alpha * W`. +/// Gradient for `b` is `sum(softmax(H) - Y)`. +fn multi_logistic_grad>( + x: &ArrayBase, + y: &Array2, + alpha: F, + w: &Array2, +) -> Array1 { + let (loss, log_prob, params) = multi_logistic_loss_prob_params(x, y, alpha, w); + let (n_features, n_classes) = params.dim(); + let intercept = x.ncols() > n_features; + let mut grad = Array::zeros((n_features + intercept as usize, n_classes)); + + // This value is `softmax(H)` + let prob = log_prob.mapv_into(num_traits::Float::exp); + let diff = prob - y; + // Compute gradient for `W` and place it at start of the grad matrix + let mut dw = x.t().dot(&diff); + params *= alpha; + dw += ¶ms; + grad.slice_mut(s![..n_features, ..]).assign(&dw); + // Compute gradient for `b` and place it at end of grad matrix + if intercept { + grad.row_mut(n_features).assign(&diff.sum_axis(Axis(0))); + } + // XXX should we divide by n_samples?? + grad.into_shape(grad.len()).unwrap() +} + /// A fitted logistic regression which can make predictions #[derive(PartialEq, Debug)] pub struct FittedLogisticRegression { @@ -549,15 +638,15 @@ fn class_from_label(labels: &[ClassLabel] /// Internal representation of a logistic regression problem. /// This data structure exists to be handed to Argmin. -struct LogisticRegressionProblem<'a, F: Float, A: Data> { +struct LogisticRegressionProblem1<'a, F: Float, A: Data> { x: &'a ArrayBase, target: Array1, alpha: F, } -impl<'a, F: Float, A: Data> ArgminOp for LogisticRegressionProblem<'a, F, A> { +impl<'a, F: Float, A: Data> ArgminOp for LogisticRegressionProblem1<'a, F, A> { /// Type of the parameter vector - type Param = ArgminParam; + type Param = ArgminParam; /// Type of the return value computed by the cost function type Output = F; /// Type of the Hessian. Can be `()` if not needed. @@ -585,6 +674,42 @@ impl<'a, F: Float, A: Data> ArgminOp for LogisticRegressionProblem<'a, } } +struct LogisticRegressionProblem2<'a, F: Float, A: Data> { + x: &'a ArrayBase, + target: Array2, + alpha: F, +} + +impl<'a, F: Float, A: Data> ArgminOp for LogisticRegressionProblem2<'a, F, A> { + /// Type of the parameter vector + type Param = ArgminParam; + /// Type of the return value computed by the cost function + type Output = F; + /// Type of the Hessian. Can be `()` if not needed. + type Hessian = (); + /// Type of the Jacobian. Can be `()` if not needed. + type Jacobian = Array1; + /// Floating point precision + type Float = F; + + /// Apply the cost function to a parameter `p` + fn apply(&self, p: &Self::Param) -> std::result::Result { + let w = p.as_array(); + Ok(multi_logistic_loss(self.x, &self.target, self.alpha, w)) + } + + /// Compute the gradient at parameter `p`. + fn gradient(&self, p: &Self::Param) -> std::result::Result { + let w = p.as_array(); + Ok(ArgminParam(multi_logistic_grad( + self.x, + &self.target, + self.alpha, + w, + ))) + } +} + #[cfg(test)] mod test { extern crate linfa; From 4a64b9a322411bf31672ab2af22c26716eb6a731 Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Sun, 15 Aug 2021 13:26:45 -0400 Subject: [PATCH 03/18] Partially done --- algorithms/linfa-logistic/src/lib.rs | 287 ++++++++++++++------------- 1 file changed, 152 insertions(+), 135 deletions(-) diff --git a/algorithms/linfa-logistic/src/lib.rs b/algorithms/linfa-logistic/src/lib.rs index c6a6e6063..44d0a8c9a 100644 --- a/algorithms/linfa-logistic/src/lib.rs +++ b/algorithms/linfa-logistic/src/lib.rs @@ -65,26 +65,98 @@ use float::Float; /// let model = LogisticRegression::default().fit(&dataset).unwrap(); /// let prediction = model.predict(&dataset); /// ``` -pub struct LogisticRegression { +pub struct LogisticRegression { alpha: F, fit_intercept: bool, max_iterations: u64, gradient_tolerance: F, - initial_params: Option<(Array1, F)>, + initial_params: Option>, } -impl Default for LogisticRegression { - fn default() -> LogisticRegression { +impl Default for LogisticRegression { + fn default() -> Self { LogisticRegression::new() } } -type LBFGSType1 = LBFGS, F>, ArgminParam, F>; -type LBFGSType2 = LBFGS, F>, ArgminParam, F>; +type LBFGSType = LBFGS, F>, ArgminParam, F>; +type LBFGSType1 = LBFGSType; +type LBFGSType2 = LBFGSType; -impl LogisticRegression { +impl LogisticRegression { + /// Given a 2-dimensional feature matrix array `x` with shape + /// (n_samples, n_features) and an iterable of target classes to predict, + /// create a `FittedLinearRegression` object which allows making + /// predictions. + /// + /// The iterable of target classes `y` must have exactly two distinct + /// values, (e.g. 0.0 and 1.0, 0 and 1, "cat" and "dog", ...), which + /// represent the two different classes the model is supposed to predict. + /// + /// The iterable `y` must also produces exactly `n_samples` items, i.e. + /// exactly as many items as there are rows in the feature matrix `x`. + /// + /// This method returns an error if any of the preconditions are violated, + /// i.e. any values are `Inf` or `NaN`, `y` doesn't have as many items as + /// `x` has rows, or if other parameters (gradient_tolerance, alpha) have + /// been set to inalid values. + fn fit(&self, x: &ArrayBase, y: T) -> Result> + where + A: Data, + T: AsTargets, + C: PartialOrd + Clone, + { + let (labels, target) = label_classes(y)?; + self.validate_data(x, &target)?; + let problem = self.setup_problem(x, target); + let solver = self.setup_solver(); + let init_params = self.setup_init_params(x.ncols()); + let result = self.run_solver(problem, solver, ArgminParam(init_params))?; + self.convert_result(labels, &result) + } + + /// Create the initial parameters, either from a user supplied guess + /// or a 1-d array of `0`s. + fn setup_init_params(&self, n_features: usize) -> Array1 { + if let Some(params) = self.initial_params.as_ref() { + params.clone() + } else { + Array::zeros(n_features + self.fit_intercept as usize) + } + } +} + +impl LogisticRegression { + fn fit(&self, x: &ArrayBase, y: T) -> Result> + where + A: Data, + T: AsTargets, + C: PartialOrd + Clone, + { + let (labels, target) = label_classes_multi(y)?; + self.validate_data(x, &target)?; + let problem = self.setup_problem(x, target); + let solver = self.setup_solver(); + let init_params = self.setup_init_params(x.ncols(), target.ncols()); + let result = self.run_solver(problem, solver, ArgminParam(init_params))?; + self.convert_result(labels, &result) + } + + /// Create the initial parameters, either from a user supplied guess + /// or a 1-d array of `0`s. + fn setup_init_params(&self, n_features: usize, n_classes: usize) -> Array2 { + if let Some(params) = self.initial_params.as_ref() { + params.clone() + } else { + n_features + self.fit_intercept as usize; + Array::zeros((n_features + self.fit_intercept as usize, n_classes)) + } + } +} + +impl LogisticRegression { /// Creates a new LogisticRegression with default configuration. - pub fn new() -> LogisticRegression { + pub fn new() -> Self { LogisticRegression { alpha: F::cast(1.0), fit_intercept: true, @@ -96,79 +168,48 @@ impl LogisticRegression { /// Set the normalization parameter `alpha` used for L2 normalization, /// defaults to `1.0`. - pub fn alpha(mut self, alpha: F) -> LogisticRegression { + pub fn alpha(mut self, alpha: F) -> Self { self.alpha = alpha; self } /// Configure if an intercept should be fitted, defaults to `true`. - pub fn with_intercept(mut self, fit_intercept: bool) -> LogisticRegression { + pub fn with_intercept(mut self, fit_intercept: bool) -> Self { self.fit_intercept = fit_intercept; self } /// Configure the maximum number of iterations that the solver should perform, /// defaults to `100`. - pub fn max_iterations(mut self, max_iterations: u64) -> LogisticRegression { + pub fn max_iterations(mut self, max_iterations: u64) -> Self { self.max_iterations = max_iterations; self } /// Configure the minimum change to the gradient to continue the solver, /// defaults to `1e-4`. - pub fn gradient_tolerance(mut self, gradient_tolerance: F) -> LogisticRegression { + pub fn gradient_tolerance(mut self, gradient_tolerance: F) -> Self { self.gradient_tolerance = gradient_tolerance; self } /// Configure the initial parameters from where the optimization starts. - /// The `params` array must have the same size as the number of columns of - /// the feature matrix `x` passed to the `fit` method - pub fn initial_params(mut self, params: Array1, intercept: F) -> LogisticRegression { - self.initial_params = Some((params, intercept)); + /// The `params` array must have at least the same number of rows as there are columns on the + /// feature matrix `x` passed to the `fit` method + pub fn initial_params(mut self, params: Array) -> Self { + self.initial_params = Some(params); self } - /// Given a 2-dimensional feature matrix array `x` with shape - /// (n_samples, n_features) and an iterable of target classes to predict, - /// create a `FittedLinearRegression` object which allows making - /// predictions. - /// - /// The iterable of target classes `y` must have exactly two distinct - /// values, (e.g. 0.0 and 1.0, 0 and 1, "cat" and "dog", ...), which - /// represent the two different classes the model is supposed to predict. - /// - /// The iterable `y` must also produces exactly `n_samples` items, i.e. - /// exactly as many items as there are rows in the feature matrix `x`. - /// - /// This method returns an error if any of the preconditions are violated, - /// i.e. any values are `Inf` or `NaN`, `y` doesn't have as many items as - /// `x` has rows, or if other parameters (gradient_tolerance, alpha) have - /// been set to inalid values. - fn fit(&self, x: &ArrayBase, y: T) -> Result> - where - A: Data, - T: AsTargets, - C: PartialOrd + Clone, - { - let (labels, target) = label_classes(y)?; - self.validate_data(x, &target)?; - let problem = self.setup_problem(x, target); - let solver = self.setup_solver(); - let init_params = self.setup_init_params(x); - let result = self.run_solver(problem, solver, init_params)?; - self.convert_result(labels, &result) - } - /// Ensure that `x` and `y` have the right shape and that all data and /// configuration parameters are finite. - fn validate_data(&self, x: &ArrayBase, y: &ArrayBase) -> Result<()> - where - A: Data, - B: Data, - { - if x.shape()[0] != y.len() { - return Err(Error::MismatchedShapes(x.shape()[0], y.len())); + fn validate_data, B: Data>( + &self, + x: &ArrayBase, + y: &ArrayBase, + ) -> Result<()> { + if x.shape()[0] != y.shape()[0] { + return Err(Error::MismatchedShapes(x.shape()[0], y.shape()[0])); } if x.iter().any(|x| !x.is_finite()) || y.iter().any(|y| !y.is_finite()) @@ -179,20 +220,18 @@ impl LogisticRegression { if !self.gradient_tolerance.is_finite() || self.gradient_tolerance <= F::zero() { return Err(Error::InvalidGradientTolerance); } - self.validate_init_params(x)?; + self.validate_init_params(x.shape()[1], y.shape().get(1).copied())?; Ok(()) } - fn validate_init_params(&self, x: &ArrayBase) -> Result<()> - where - A: Data, - { - if let Some((params, intercept)) = self.initial_params.as_ref() { - let (_, n_features) = x.dim(); - if n_features != params.dim() { + fn validate_init_params(&self, mut n_features: usize, n_classes: Option) -> Result<()> { + if let Some(params) = self.initial_params.as_ref() { + let shape = params.shape(); + n_features += self.fit_intercept as usize; + if n_features != shape[0] || n_classes != shape.get(1).copied() { return Err(Error::InvalidInitialParametersGuessSize); } - if params.iter().any(|p| !p.is_finite()) || !intercept.is_finite() { + if params.iter().any(|p| !p.is_finite()) { return Err(Error::InvalidInitialParametersGuess); } } @@ -203,58 +242,30 @@ impl LogisticRegression { fn setup_problem<'a, A: Data>( &self, x: &'a ArrayBase, - target: Array1, - ) -> LogisticRegressionProblem1<'a, F, A> { - LogisticRegressionProblem1 { + target: Array, + ) -> LogisticRegressionProblem<'a, F, A, D> { + LogisticRegressionProblem { x, target, alpha: self.alpha, } } - /// Create the initial parameters, either from a user supplied guess - /// or a 1-d array of `0`s. - fn setup_init_params(&self, x: &ArrayBase) -> Array1 - where - A: Data, - { - let n_features = x.shape()[1]; - let param_len = if self.fit_intercept { - n_features + 1 - } else { - n_features - }; - - let mut init_parmas = Array1::zeros(param_len); - - if let Some((params, intercept)) = self.initial_params.as_ref() { - init_parmas.slice_mut(s![..n_features]).assign(params); - if param_len == n_features + 1 { - init_parmas[n_features] = *intercept; - } - } - - init_parmas - } - /// Create the LBFGS solver using MoreThuenteLineSearch and set gradient /// tolerance. - fn setup_solver(&self) -> LBFGSType1 { + fn setup_solver(&self) -> LBFGSType { let linesearch = MoreThuenteLineSearch::new(); LBFGS::new(linesearch, 10).with_tol_grad(self.gradient_tolerance) } /// Run the LBFGS solver until it converges or runs out of iterations. - fn run_solver<'a, A>( + fn run_solver( &self, - problem: LogisticRegressionProblem1<'a, F, A>, - solver: LBFGSType1, - init_params: Array1, - ) -> Result>> - where - A: Data, - { - Executor::new(problem, solver, ArgminParam(init_params)) + problem: P, + solver: P::Solver, + init_params: P::Param, + ) -> Result> { + Executor::new(problem, solver, init_params) .max_iters(self.max_iterations) .run() .map_err(|err| err.into()) @@ -365,6 +376,15 @@ where )) } +fn label_classes_multi(y: T) -> Result<(ClassLabels, Array2)> +where + F: Float, + T: AsTargets, + C: PartialOrd + Clone, +{ + todo!() +} + /// Conditionally split the feature vector `w` into parameter vector and /// intercept parameter. /// Dimensions of `w` are either (f) or (f, n_classes) @@ -448,36 +468,34 @@ fn logistic_loss>( -yz.sum() + F::cast(0.5) * alpha * params.dot(¶ms) } -/// Computes loss function of `-sum(Y * log(softmax(H))) + alpha/2 * norm(W)`, where H is `X . W + b`. -/// Also returns the log of the probabilities, which is `log(softmax(H))`, along with `W`. +/// Compute the log of probabilities, which is `log(softmax(H))`, along with `W`. /// `Y` is the output (n_samples * n_classes), `X` is the input (n_samples * n_features), `W` is the /// params (n_features * n_classes), `b` is the intercept vector (n_classes). -fn multi_logistic_loss_prob_params>( +fn multi_logistic_prob_params>( x: &ArrayBase, - y: &Array2, - alpha: F, w: &Array2, // This parameter includes `W` and `b` -) -> (F, Array2, Array2) { +) -> (Array2, Array2) { let n_features = x.shape()[1]; let (params, intercept) = convert_params(n_features, w); // Compute H let h = x.dot(¶ms) + intercept; // This computes `H - log(sum(exp(H)))`, which is equal to // `log(softmax(H)) = log(exp(H) / sum(exp(H)))` - let log_prob = h - log_sum_exp(&h, Axis(1)); - // Calculate loss - // XXX Should we divide cost by n_samples??? - let loss = -elem_dot(&log_prob, y) + F::cast(0.5) * alpha * elem_dot(¶ms, ¶ms); - (loss, log_prob, params) + let log_prob = &h - log_sum_exp(&h, Axis(1)); + (log_prob, params) } +/// Computes loss function of `-sum(Y * log(softmax(H))) + alpha/2 * norm(W)`, where H is `X . W + b`. fn multi_logistic_loss>( x: &ArrayBase, y: &Array2, alpha: F, w: &Array2, ) -> F { - multi_logistic_loss_prob_params(x, y, alpha, w).0 + let (log_prob, params) = multi_logistic_prob_params(x, w); + // Calculate loss + // XXX Should we divide cost by n_samples??? + -elem_dot(&log_prob, y) + F::cast(0.5) * alpha * elem_dot(¶ms, ¶ms) } /// Computes the gradient of the logistic loss function @@ -504,7 +522,7 @@ fn logistic_grad>( } } -/// Computes multinomial gradients for `W` and `b`, combine them, and flatten the result. +/// Computes multinomial gradients for `W` and `b` and combine them. /// Gradient for `W` is `Xt . (softmax(H) - Y) + alpha * W`. /// Gradient for `b` is `sum(softmax(H) - Y)`. fn multi_logistic_grad>( @@ -512,8 +530,8 @@ fn multi_logistic_grad>( y: &Array2, alpha: F, w: &Array2, -) -> Array1 { - let (loss, log_prob, params) = multi_logistic_loss_prob_params(x, y, alpha, w); +) -> Array2 { + let (log_prob, mut params) = multi_logistic_prob_params(x, w); let (n_features, n_classes) = params.dim(); let intercept = x.ncols() > n_features; let mut grad = Array::zeros((n_features + intercept as usize, n_classes)); @@ -531,7 +549,7 @@ fn multi_logistic_grad>( grad.row_mut(n_features).assign(&diff.sum_axis(Axis(0))); } // XXX should we divide by n_samples?? - grad.into_shape(grad.len()).unwrap() + grad } /// A fitted logistic regression which can make predictions @@ -638,22 +656,20 @@ fn class_from_label(labels: &[ClassLabel] /// Internal representation of a logistic regression problem. /// This data structure exists to be handed to Argmin. -struct LogisticRegressionProblem1<'a, F: Float, A: Data> { +struct LogisticRegressionProblem<'a, F: Float, A: Data, D: Dimension> { x: &'a ArrayBase, - target: Array1, + target: Array, alpha: F, } +type LogisticRegressionProblem1<'a, F, A> = LogisticRegressionProblem<'a, F, A, Ix1>; +type LogisticRegressionProblem2<'a, F, A> = LogisticRegressionProblem<'a, F, A, Ix2>; + impl<'a, F: Float, A: Data> ArgminOp for LogisticRegressionProblem1<'a, F, A> { - /// Type of the parameter vector type Param = ArgminParam; - /// Type of the return value computed by the cost function type Output = F; - /// Type of the Hessian. Can be `()` if not needed. type Hessian = (); - /// Type of the Jacobian. Can be `()` if not needed. type Jacobian = Array1; - /// Floating point precision type Float = F; /// Apply the cost function to a parameter `p` @@ -674,22 +690,11 @@ impl<'a, F: Float, A: Data> ArgminOp for LogisticRegressionProblem1<'a } } -struct LogisticRegressionProblem2<'a, F: Float, A: Data> { - x: &'a ArrayBase, - target: Array2, - alpha: F, -} - impl<'a, F: Float, A: Data> ArgminOp for LogisticRegressionProblem2<'a, F, A> { - /// Type of the parameter vector type Param = ArgminParam; - /// Type of the return value computed by the cost function type Output = F; - /// Type of the Hessian. Can be `()` if not needed. type Hessian = (); - /// Type of the Jacobian. Can be `()` if not needed. type Jacobian = Array1; - /// Floating point precision type Float = F; /// Apply the cost function to a parameter `p` @@ -710,6 +715,18 @@ impl<'a, F: Float, A: Data> ArgminOp for LogisticRegressionProblem2<'a } } +trait SolvableProblem: ArgminOp + Sized { + type Solver: Solver; +} + +impl<'a, F: Float, A: Data> SolvableProblem for LogisticRegressionProblem1<'a, F, A> { + type Solver = LBFGSType1; +} + +impl<'a, F: Float, A: Data> SolvableProblem for LogisticRegressionProblem2<'a, F, A> { + type Solver = LBFGSType2; +} + #[cfg(test)] mod test { extern crate linfa; From b289afe9e4e44c2f7a4ce58dc88a9196097f80c8 Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Sun, 15 Aug 2021 15:59:45 -0400 Subject: [PATCH 04/18] Finished multi fitted model --- algorithms/linfa-logistic/Cargo.toml | 1 + algorithms/linfa-logistic/src/lib.rs | 146 +++++++++++++++++++++++---- 2 files changed, 127 insertions(+), 20 deletions(-) diff --git a/algorithms/linfa-logistic/Cargo.toml b/algorithms/linfa-logistic/Cargo.toml index 9342faf64..c7454605e 100644 --- a/algorithms/linfa-logistic/Cargo.toml +++ b/algorithms/linfa-logistic/Cargo.toml @@ -16,6 +16,7 @@ categories = ["algorithms", "mathematics", "science"] [dependencies] ndarray = { version = "0.15", features = ["approx", "blas"] } ndarray-linalg = "0.14" +ndarray-stats = "0.5.0" num-traits = "0.2" argmin = { version = "0.4.6", features = ["ndarrayl"] } serde = "1.0" diff --git a/algorithms/linfa-logistic/src/lib.rs b/algorithms/linfa-logistic/src/lib.rs index 44d0a8c9a..cc09e441c 100644 --- a/algorithms/linfa-logistic/src/lib.rs +++ b/algorithms/linfa-logistic/src/lib.rs @@ -25,8 +25,10 @@ use argmin::solver::quasinewton::lbfgs::LBFGS; use linfa::prelude::{AsTargets, DatasetBase}; use linfa::traits::{Fit, PredictInplace}; use ndarray::{ - s, Array, Array1, Array2, ArrayBase, Axis, Data, Dimension, Ix1, Ix2, RemoveAxis, Slice, + s, Array, Array1, Array2, ArrayBase, Axis, Data, DataMut, Dimension, Ix1, Ix2, RemoveAxis, + Slice, Zip, }; +use ndarray_stats::QuantileExt; use std::default::Default; mod argmin_param; @@ -272,27 +274,29 @@ impl LogisticRegression { } /// Take an ArgminResult and return a FittedLogisticRegression. - fn convert_result( + fn convert_result<'a, A, C>( &self, labels: ClassLabels, - result: &ArgminResult>, + result: &ArgminResult>, ) -> Result> where A: Data, C: PartialOrd + Clone, + LogisticRegressionProblem<'a, F, A, D>: SolvableProblem, { - let mut intercept = F::cast(0.0); - let mut params = result.state().best_param.as_array().clone(); - if self.fit_intercept { - intercept = params[params.len() - 1]; - params = params.slice(s![..params.len() - 1]).to_owned(); - } + let params = result.state().best_param.as_array(); + let n_features = if self.fit_intercept { + params.shape()[0] - 1 + } else { + params.shape()[0] + }; + let (w, intercept) = convert_params(n_features, params); Ok(FittedLogisticRegression::new(intercept, params, labels)) } } impl<'a, C: 'a + PartialOrd + Clone, F: Float, D: Data, T: AsTargets> - Fit, T, Error> for LogisticRegression + Fit, T, Error> for LogisticRegression { type Object = FittedLogisticRegression; @@ -317,6 +321,16 @@ impl<'a, C: 'a + PartialOrd + Clone, F: Float, D: Data, T: AsTargets, T: AsTargets> + Fit, T, Error> for LogisticRegression +{ + type Object = MultiFittedLogisticRegression; + + fn fit(&self, dataset: &DatasetBase, T>) -> Result { + self.fit(dataset.records(), dataset.targets()) + } +} + /// Identify the distinct values of the classes `y` and associate /// the target labels `-1.0` and `1.0` to it. -1.0 always labels the /// smaller class (by PartialOrd) and 1.0 always labels the larger @@ -446,6 +460,14 @@ fn log_sum_exp>( reduced.mapv_into(|e| e.ln() + max) } +/// Computes `exp(n - max) / sum(exp(n- max))`, which is a numerically stable version of softmax +fn softmax_inplace>(v: &mut ArrayBase) { + let max = v.iter().copied().reduce(F::max).unwrap(); + v.mapv_inplace(|n| (n - max).exp()); + let sum = v.sum(); + v.mapv_inplace(|n| n / max); +} + /// Computes the logistic loss assuming the training labels $y \in {-1, 1}$ /// /// Because the logistic function fullfills $\sigma(-z) = 1 - \sigma(z)$ @@ -601,24 +623,100 @@ impl FittedLogisticRegression { probs.mapv_inplace(logistic); probs } +} +impl> + PredictInplace, Array1> for FittedLogisticRegression +{ /// Given a feature matrix, predict the classes learned when the model was /// fitted. - fn predict>(&self, x: &ArrayBase) -> Array1 { + fn predict_inplace(&self, x: &ArrayBase, y: &mut Array1) { + assert_eq!( + x.nrows(), + y.len(), + "The number of data points must match the number of output targets." + ); + assert_eq!( + x.ncols(), + self.params.len(), + "Number of data features must match the number of features the model was trained with." + ); + let pos_class = class_from_label(&self.labels, F::POSITIVE_LABEL); let neg_class = class_from_label(&self.labels, F::NEGATIVE_LABEL); - self.predict_probabilities(x).mapv(|probability| { - if probability >= self.threshold { - pos_class.clone() - } else { - neg_class.clone() - } - }) + Zip::from(&self.predict_probabilities(x)) + .and(y) + .for_each(|prob, out| { + *out = if *prob >= self.threshold { + pos_class.clone() + } else { + neg_class.clone() + } + }); + } + + fn default_target(&self, x: &ArrayBase) -> Array1 { + Array1::default(x.nrows()) + } +} + +/// A fitted logistic regression which can make predictions +#[derive(PartialEq, Debug)] +pub struct MultiFittedLogisticRegression { + threshold: F, + intercept: F, + params: Array2, + classes: Vec, +} + +impl MultiFittedLogisticRegression { + fn new(intercept: F, params: Array2, classes: Vec) -> Self { + Self { + threshold: F::cast(0.5), + intercept, + params, + classes, + } + } + + /// Set the probability threshold for which the 'positive' class will be + /// predicted. Defaults to 0.5. + pub fn set_threshold(mut self, threshold: F) -> Self { + if threshold < F::zero() || threshold > F::one() { + panic!("FittedLogisticRegression::set_threshold: threshold needs to be between 0.0 and 1.0"); + } + self.threshold = threshold; + self + } + + pub fn intercept(&self) -> F { + self.intercept + } + + pub fn params(&self) -> &Array2 { + &self.params + } + + /// Return non-normalized probabilities (n_samples * n_classes) + fn predict_nonorm_probabilities>(&self, x: &ArrayBase) -> Array2 { + let mut probs = x.dot(&self.params) + self.intercept; + probs + } + + /// Return normalized probabilities for each output class. The output dimensions are (n_samples + /// * n_classes). + pub fn predict_probabilities>(&self, x: &ArrayBase) -> Array2 { + let mut probs = self.predict_nonorm_probabilities(x); + probs + .rows_mut() + .into_iter() + .for_each(|row| softmax_inplace(&mut row)); + probs } } impl> - PredictInplace, Array1> for FittedLogisticRegression + PredictInplace, Array1> for MultiFittedLogisticRegression { /// Given a feature matrix, predict the classes learned when the model was /// fitted. @@ -628,8 +726,16 @@ impl> y.len(), "The number of data points must match the number of output targets." ); + assert_eq!( + x.ncols(), + self.params.nrows(), + "Number of data features must match the number of features the model was trained with." + ); - *y = self.predict(x); + let probs = self.predict_nonorm_probabilities(x); + Zip::from(probs.rows()).and(y).for_each(|prob_row, out| { + *out = prob_row.argmax_skipnan().unwrap(); + }); } fn default_target(&self, x: &ArrayBase) -> Array1 { From 7ba41c1e921604a021ea9fcee7df452a8256ea5e Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Sun, 15 Aug 2021 17:04:51 -0400 Subject: [PATCH 05/18] Fixed non-test code --- algorithms/linfa-logistic/src/lib.rs | 179 ++++++++++----------------- 1 file changed, 68 insertions(+), 111 deletions(-) diff --git a/algorithms/linfa-logistic/src/lib.rs b/algorithms/linfa-logistic/src/lib.rs index cc09e441c..2d32e18d2 100644 --- a/algorithms/linfa-logistic/src/lib.rs +++ b/algorithms/linfa-logistic/src/lib.rs @@ -25,8 +25,8 @@ use argmin::solver::quasinewton::lbfgs::LBFGS; use linfa::prelude::{AsTargets, DatasetBase}; use linfa::traits::{Fit, PredictInplace}; use ndarray::{ - s, Array, Array1, Array2, ArrayBase, Axis, Data, DataMut, Dimension, Ix1, Ix2, RemoveAxis, - Slice, Zip, + s, Array, Array1, Array2, ArrayBase, Axis, Data, DataMut, Dimension, IntoDimension, Ix1, Ix2, + RemoveAxis, Slice, Zip, }; use ndarray_stats::QuantileExt; use std::default::Default; @@ -85,77 +85,6 @@ type LBFGSType = LBFGS, F>, Argmin type LBFGSType1 = LBFGSType; type LBFGSType2 = LBFGSType; -impl LogisticRegression { - /// Given a 2-dimensional feature matrix array `x` with shape - /// (n_samples, n_features) and an iterable of target classes to predict, - /// create a `FittedLinearRegression` object which allows making - /// predictions. - /// - /// The iterable of target classes `y` must have exactly two distinct - /// values, (e.g. 0.0 and 1.0, 0 and 1, "cat" and "dog", ...), which - /// represent the two different classes the model is supposed to predict. - /// - /// The iterable `y` must also produces exactly `n_samples` items, i.e. - /// exactly as many items as there are rows in the feature matrix `x`. - /// - /// This method returns an error if any of the preconditions are violated, - /// i.e. any values are `Inf` or `NaN`, `y` doesn't have as many items as - /// `x` has rows, or if other parameters (gradient_tolerance, alpha) have - /// been set to inalid values. - fn fit(&self, x: &ArrayBase, y: T) -> Result> - where - A: Data, - T: AsTargets, - C: PartialOrd + Clone, - { - let (labels, target) = label_classes(y)?; - self.validate_data(x, &target)?; - let problem = self.setup_problem(x, target); - let solver = self.setup_solver(); - let init_params = self.setup_init_params(x.ncols()); - let result = self.run_solver(problem, solver, ArgminParam(init_params))?; - self.convert_result(labels, &result) - } - - /// Create the initial parameters, either from a user supplied guess - /// or a 1-d array of `0`s. - fn setup_init_params(&self, n_features: usize) -> Array1 { - if let Some(params) = self.initial_params.as_ref() { - params.clone() - } else { - Array::zeros(n_features + self.fit_intercept as usize) - } - } -} - -impl LogisticRegression { - fn fit(&self, x: &ArrayBase, y: T) -> Result> - where - A: Data, - T: AsTargets, - C: PartialOrd + Clone, - { - let (labels, target) = label_classes_multi(y)?; - self.validate_data(x, &target)?; - let problem = self.setup_problem(x, target); - let solver = self.setup_solver(); - let init_params = self.setup_init_params(x.ncols(), target.ncols()); - let result = self.run_solver(problem, solver, ArgminParam(init_params))?; - self.convert_result(labels, &result) - } - - /// Create the initial parameters, either from a user supplied guess - /// or a 1-d array of `0`s. - fn setup_init_params(&self, n_features: usize, n_classes: usize) -> Array2 { - if let Some(params) = self.initial_params.as_ref() { - params.clone() - } else { - n_features + self.fit_intercept as usize; - Array::zeros((n_features + self.fit_intercept as usize, n_classes)) - } - } -} - impl LogisticRegression { /// Creates a new LogisticRegression with default configuration. pub fn new() -> Self { @@ -203,6 +132,18 @@ impl LogisticRegression { self } + /// Create the initial parameters, either from a user supplied guess + /// or a 1-d array of `0`s. + fn setup_init_params(&self, dims: D::Pattern) -> Array { + if let Some(params) = self.initial_params.as_ref() { + params.clone() + } else { + let mut dims = dims.into_dimension(); + dims.as_array_view_mut()[0] += self.fit_intercept as usize; + Array::zeros(dims) + } + } + /// Ensure that `x` and `y` have the right shape and that all data and /// configuration parameters are finite. fn validate_data, B: Data>( @@ -272,30 +213,9 @@ impl LogisticRegression { .run() .map_err(|err| err.into()) } - - /// Take an ArgminResult and return a FittedLogisticRegression. - fn convert_result<'a, A, C>( - &self, - labels: ClassLabels, - result: &ArgminResult>, - ) -> Result> - where - A: Data, - C: PartialOrd + Clone, - LogisticRegressionProblem<'a, F, A, D>: SolvableProblem, - { - let params = result.state().best_param.as_array(); - let n_features = if self.fit_intercept { - params.shape()[0] - 1 - } else { - params.shape()[0] - }; - let (w, intercept) = convert_params(n_features, params); - Ok(FittedLogisticRegression::new(intercept, params, labels)) - } } -impl<'a, C: 'a + PartialOrd + Clone, F: Float, D: Data, T: AsTargets> +impl<'a, C: 'a + Ord + Clone, F: Float, D: Data, T: AsTargets> Fit, T, Error> for LogisticRegression { type Object = FittedLogisticRegression; @@ -317,17 +237,41 @@ impl<'a, C: 'a + PartialOrd + Clone, F: Float, D: Data, T: AsTargets, T>) -> Result { - self.fit(dataset.records(), dataset.targets()) + let (x, y) = (dataset.records(), dataset.targets()); + let (labels, target) = label_classes(y)?; + self.validate_data(x, &target)?; + let problem = self.setup_problem(x, target); + let solver = self.setup_solver(); + let init_params = self.setup_init_params(x.ncols()); + let result = self.run_solver(problem, solver, ArgminParam(init_params))?; + + let params = result.state().best_param.as_array(); + let (w, intercept) = convert_params(x.nrows(), params); + Ok(FittedLogisticRegression::new( + intercept.into_scalar(), + w, + labels, + )) } } -impl<'a, C: 'a + PartialOrd + Clone, F: Float, D: Data, T: AsTargets> +impl<'a, C: 'a + Ord + Clone, F: Float, D: Data, T: AsTargets> Fit, T, Error> for LogisticRegression { type Object = MultiFittedLogisticRegression; fn fit(&self, dataset: &DatasetBase, T>) -> Result { - self.fit(dataset.records(), dataset.targets()) + let (x, y) = (dataset.records(), dataset.targets()); + let (classes, target) = label_classes_multi(y)?; + self.validate_data(x, &target)?; + let problem = self.setup_problem(x, target); + let solver = self.setup_solver(); + let init_params = self.setup_init_params((x.ncols(), classes.len())); + let result = self.run_solver(problem, solver, ArgminParam(init_params))?; + + let params = result.state().best_param.as_array(); + let (w, intercept) = convert_params(x.nrows(), params); + Ok(MultiFittedLogisticRegression::new(intercept, w, classes)) } } @@ -341,7 +285,7 @@ fn label_classes(y: T) -> Result<(ClassLabels, Array1)> where F: Float, T: AsTargets, - C: PartialOrd + Clone, + C: Ord + Clone, { let y_single_target = y.try_single_target()?; let mut classes: Vec<&C> = vec![]; @@ -390,13 +334,26 @@ where )) } -fn label_classes_multi(y: T) -> Result<(ClassLabels, Array2)> +fn label_classes_multi(y: T) -> Result<(Vec, Array2)> where F: Float, T: AsTargets, - C: PartialOrd + Clone, + C: Ord + Clone, { - todo!() + let y_single_target = y.try_single_target()?; + let mut classes = y_single_target.to_vec(); + // Dedup the list of classes + classes.sort(); + classes.dedup(); + + let mut onehot = Array2::zeros((y_single_target.len(), classes.len())); + Zip::from(onehot.rows_mut()) + .and(&y_single_target) + .for_each(|mut oh_row, cls| { + let idx = classes.binary_search(cls).unwrap(); + oh_row[idx] = F::one(); + }); + Ok((classes, onehot)) } /// Conditionally split the feature vector `w` into parameter vector and @@ -465,7 +422,7 @@ fn softmax_inplace>(v: &mut ArrayBase> #[derive(PartialEq, Debug)] pub struct MultiFittedLogisticRegression { threshold: F, - intercept: F, + intercept: Array1, params: Array2, classes: Vec, } impl MultiFittedLogisticRegression { - fn new(intercept: F, params: Array2, classes: Vec) -> Self { + fn new(intercept: Array1, params: Array2, classes: Vec) -> Self { Self { threshold: F::cast(0.5), intercept, @@ -689,8 +646,8 @@ impl MultiFittedLogisticRegression { self } - pub fn intercept(&self) -> F { - self.intercept + pub fn intercept(&self) -> &Array1 { + &self.intercept } pub fn params(&self) -> &Array2 { @@ -699,8 +656,7 @@ impl MultiFittedLogisticRegression { /// Return non-normalized probabilities (n_samples * n_classes) fn predict_nonorm_probabilities>(&self, x: &ArrayBase) -> Array2 { - let mut probs = x.dot(&self.params) + self.intercept; - probs + x.dot(&self.params) + &self.intercept } /// Return normalized probabilities for each output class. The output dimensions are (n_samples @@ -710,7 +666,7 @@ impl MultiFittedLogisticRegression { probs .rows_mut() .into_iter() - .for_each(|row| softmax_inplace(&mut row)); + .for_each(|mut row| softmax_inplace(&mut row)); probs } } @@ -734,7 +690,8 @@ impl> let probs = self.predict_nonorm_probabilities(x); Zip::from(probs.rows()).and(y).for_each(|prob_row, out| { - *out = prob_row.argmax_skipnan().unwrap(); + let idx = prob_row.argmax().unwrap(); + *out = self.classes[idx].clone(); }); } From 023fa481039bed782d1f8bdc826895895cd083d1 Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Sun, 15 Aug 2021 17:06:13 -0400 Subject: [PATCH 06/18] Implement dot and norm on ArgminParams --- algorithms/linfa-logistic/src/argmin_param.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/algorithms/linfa-logistic/src/argmin_param.rs b/algorithms/linfa-logistic/src/argmin_param.rs index 34f9b8b7e..be6316294 100644 --- a/algorithms/linfa-logistic/src/argmin_param.rs +++ b/algorithms/linfa-logistic/src/argmin_param.rs @@ -44,15 +44,13 @@ impl ArgminAdd, ArgminParam> for impl ArgminDot, F> for ArgminParam { fn dot(&self, other: &ArgminParam) -> F { - //self.0.dot(&other.0) - todo!() + elem_dot(&self.0, &other.0) } } impl ArgminNorm for ArgminParam { fn norm(&self) -> F { - //self.0.dot(&self.0) - todo!() + elem_dot(&self.0, &self.0) } } From 2e76958d365025708675c22bba6d8af3fab7369d Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Sun, 15 Aug 2021 17:36:27 -0400 Subject: [PATCH 07/18] Everything compiles --- algorithms/linfa-logistic/src/lib.rs | 112 +++++++++++++++++---------- 1 file changed, 71 insertions(+), 41 deletions(-) diff --git a/algorithms/linfa-logistic/src/lib.rs b/algorithms/linfa-logistic/src/lib.rs index 2d32e18d2..1e57a6517 100644 --- a/algorithms/linfa-logistic/src/lib.rs +++ b/algorithms/linfa-logistic/src/lib.rs @@ -67,7 +67,7 @@ use float::Float; /// let model = LogisticRegression::default().fit(&dataset).unwrap(); /// let prediction = model.predict(&dataset); /// ``` -pub struct LogisticRegression { +pub struct LogisticRegressionBase { alpha: F, fit_intercept: bool, max_iterations: u64, @@ -75,9 +75,12 @@ pub struct LogisticRegression { initial_params: Option>, } -impl Default for LogisticRegression { +pub type LogisticRegression = LogisticRegressionBase; +pub type MultiLogisticRegression = LogisticRegressionBase; + +impl Default for LogisticRegressionBase { fn default() -> Self { - LogisticRegression::new() + LogisticRegressionBase::new() } } @@ -85,10 +88,10 @@ type LBFGSType = LBFGS, F>, Argmin type LBFGSType1 = LBFGSType; type LBFGSType2 = LBFGSType; -impl LogisticRegression { +impl LogisticRegressionBase { /// Creates a new LogisticRegression with default configuration. pub fn new() -> Self { - LogisticRegression { + LogisticRegressionBase { alpha: F::cast(1.0), fit_intercept: true, max_iterations: 100, @@ -216,7 +219,7 @@ impl LogisticRegression { } impl<'a, C: 'a + Ord + Clone, F: Float, D: Data, T: AsTargets> - Fit, T, Error> for LogisticRegression + Fit, T, Error> for LogisticRegression { type Object = FittedLogisticRegression; @@ -256,7 +259,7 @@ impl<'a, C: 'a + Ord + Clone, F: Float, D: Data, T: AsTargets, T: AsTargets> - Fit, T, Error> for LogisticRegression + Fit, T, Error> for MultiLogisticRegression { type Object = MultiFittedLogisticRegression; @@ -796,6 +799,7 @@ mod test { use super::*; use approx::{assert_abs_diff_eq, AbsDiffEq}; + use linfa::prelude::*; use ndarray::{array, Array2}; /// Test that the logistic loss function works as expected. @@ -925,11 +929,15 @@ mod test { fn simple_example_1() { let log_reg = LogisticRegression::default(); let x = array![[-1.0], [-0.01], [0.01], [1.0]]; - let y = array![0.0, 0.0, 1.0, 1.0]; - let res = log_reg.fit(&x, &y).unwrap(); + let y = array![0, 0, 1, 1]; + let dataset = Dataset::new(x, y); + let res = log_reg.fit(&dataset).unwrap(); assert_abs_diff_eq!(res.intercept(), 0.0); assert!(res.params().abs_diff_eq(&array![0.681], 1e-3)); - assert_eq!(res.predict(&x), y); + assert_eq!( + &res.predict(dataset.records()), + dataset.targets().try_single_target().unwrap() + ); } #[test] @@ -937,13 +945,17 @@ mod test { let log_reg = LogisticRegression::default(); let x = array![[0.01], [1.0], [-1.0], [-0.01]]; let y = array!["dog", "dog", "cat", "cat"]; - let res = log_reg.fit(&x, &y).unwrap(); + let dataset = Dataset::new(x, y); + let res = log_reg.fit(&dataset).unwrap(); assert_abs_diff_eq!(res.intercept(), 0.0); assert!(res.params().abs_diff_eq(&array![0.681], 1e-3)); assert!(res - .predict_probabilities(&x) + .predict_probabilities(dataset.records()) .abs_diff_eq(&array![0.501, 0.664, 0.335, 0.498], 1e-3)); - assert_eq!(res.predict(&x), y); + assert_eq!( + &res.predict(dataset.records()), + dataset.targets().try_single_target().unwrap() + ); } #[test] @@ -961,11 +973,15 @@ mod test { [8.0], [9.0] ]; - let y = array![0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]; - let res = log_reg.fit(&x, &y).unwrap(); + let y = array![0, 0, 0, 0, 1, 1, 1, 1, 1, 1]; + let dataset = Dataset::new(x, y); + let res = log_reg.fit(&dataset).unwrap(); assert!(res.intercept().abs_diff_eq(&-4.124, 1e-3)); assert!(res.params().abs_diff_eq(&array![1.181], 1e-3)); - assert_eq!(res.predict(&x), y); + assert_eq!( + &res.predict(dataset.records()), + dataset.targets().try_single_target().unwrap() + ); } #[test] @@ -973,7 +989,7 @@ mod test { let log_reg = LogisticRegression::default(); let x = array![[0.01], [1.0], [-1.0], [-0.01]]; let y = array![[0, 0], [0, 0], [0, 0], [0, 0]]; - let res = log_reg.fit(&x, &y); + let res = log_reg.fit(&Dataset::new(x, y)); assert_eq!( res.unwrap_err().to_string(), "multiple targets not supported".to_string() @@ -984,8 +1000,8 @@ mod test { fn rejects_mismatching_x_y() { let log_reg = LogisticRegression::default(); let x = array![[-1.0], [-0.01], [0.01]]; - let y = array![0.0, 0.0, 1.0, 1.0]; - let res = log_reg.fit(&x, &y); + let y = array![0, 0, 1, 1]; + let res = log_reg.fit(&Dataset::new(x, y)); assert_eq!( res.unwrap_err().to_string(), "Expected `x` and `y` to have same number of rows, got 3 != 4".to_string() @@ -998,15 +1014,15 @@ mod test { let inf_xs: Vec<_> = infs.iter().map(|&inf| array![[1.0], [inf]]).collect(); let log_reg = LogisticRegression::default(); let normal_x = array![[-1.0], [1.0]]; - let y = array![0.0, 1.0]; + let y = array![0, 1]; let expected = "Values must be finite and not `Inf`, `-Inf` or `NaN`".to_string(); for inf_x in &inf_xs { - let res = log_reg.fit(inf_x, &y); + let res = log_reg.fit(&Dataset::new(inf_x.to_owned(), y.to_owned())); assert_eq!(res.unwrap_err().to_string(), expected); } for inf in &infs { let log_reg = LogisticRegression::default().alpha(*inf); - let res = log_reg.fit(&normal_x, &y); + let res = log_reg.fit(&Dataset::new(normal_x.to_owned(), y.to_owned())); assert_eq!(res.unwrap_err().to_string(), expected); } let mut non_positives = infs; @@ -1014,7 +1030,7 @@ mod test { non_positives.push(0.0); for inf in &non_positives { let log_reg = LogisticRegression::default().gradient_tolerance(*inf); - let res = log_reg.fit(&normal_x, &y); + let res = log_reg.fit(&Dataset::new(normal_x.to_owned(), y.to_owned())); assert_eq!( res.unwrap_err().to_string(), "gradient_tolerance must be a positive, finite number" @@ -1026,30 +1042,36 @@ mod test { fn validates_initial_params() { let infs = vec![std::f64::INFINITY, std::f64::NEG_INFINITY, std::f64::NAN]; let normal_x = array![[-1.0], [1.0]]; - let normal_y = array![0.0, 1.0]; + let normal_y = array![0, 1]; + let dataset = Dataset::new(normal_x, normal_y); let expected = "Initial parameter guess must be finite".to_string(); for inf in &infs { - let log_reg = LogisticRegression::default().initial_params(array![*inf], 0.0); - let res = log_reg.fit(&normal_x, &normal_y); - assert_eq!(res.unwrap_err().to_string(), expected); - } - for inf in &infs { - let log_reg = LogisticRegression::default().initial_params(array![0.0], *inf); - let res = log_reg.fit(&normal_x, &normal_y); + let log_reg = LogisticRegression::default().initial_params(array![*inf]); + let res = log_reg.fit(&dataset); assert_eq!(res.unwrap_err().to_string(), expected); } { - let log_reg = LogisticRegression::default().initial_params(array![0.0, 0.0], 0.0); - let res = log_reg.fit(&normal_x, &normal_y); + let log_reg = LogisticRegression::default().initial_params(array![0.0, 0.0]); + let res = log_reg.fit(&dataset); assert_eq!(res.unwrap_err().to_string(), "Size of initial parameter guess must be the same as the number of columns in the feature matrix `x`".to_string()); } + { + let log_reg = LogisticRegression::default() + .with_intercept(false) + .initial_params(array![0.0, 0.0]); + let res = log_reg.fit(&dataset); + assert_eq!( + res.unwrap_err().to_string(), + "Initial parameter should be smaller when there's no intercept".to_string() + ); + } } #[test] fn uses_initial_params() { - let (params, intercept) = (array![1.2], -4.12); + let params = array![1.2, -4.12]; let log_reg = LogisticRegression::default() - .initial_params(params, intercept) + .initial_params(params) .max_iterations(5); let x = array![ [0.0], @@ -1063,21 +1085,29 @@ mod test { [8.0], [9.0] ]; - let y = array![0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]; - let res = log_reg.fit(&x, &y).unwrap(); + let y = array![0, 0, 0, 0, 1, 1, 1, 1, 1, 1]; + let dataset = Dataset::new(x, y); + let res = log_reg.fit(&dataset).unwrap(); assert!(res.intercept().abs_diff_eq(&-4.124, 1e-3)); assert!(res.params().abs_diff_eq(&array![1.181], 1e-3)); - assert_eq!(res.predict(&x), y); + assert_eq!( + &res.predict(dataset.records()), + dataset.targets().try_single_target().unwrap() + ); } #[test] fn works_with_f32() { let log_reg = LogisticRegression::default(); let x: Array2 = array![[-1.0], [-0.01], [0.01], [1.0]]; - let y: Array1 = array![0.0, 0.0, 1.0, 1.0]; - let res = log_reg.fit(&x, &y).unwrap(); + let y = array![0, 0, 1, 1]; + let dataset = Dataset::new(x, y); + let res = log_reg.fit(&dataset).unwrap(); assert_abs_diff_eq!(res.intercept(), 0.0_f32); assert!(res.params().abs_diff_eq(&array![0.682_f32], 1e-3)); - assert_eq!(res.predict(&x), y); + assert_eq!( + &res.predict(dataset.records()), + dataset.targets().try_single_target().unwrap() + ); } } From 2624bfdb1990a8a703c896064eefa158964d4892 Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Sun, 15 Aug 2021 17:37:42 -0400 Subject: [PATCH 08/18] Get rid of ndarray-linalg --- algorithms/linfa-logistic/Cargo.toml | 1 - algorithms/linfa-logistic/src/float.rs | 2 -- 2 files changed, 3 deletions(-) diff --git a/algorithms/linfa-logistic/Cargo.toml b/algorithms/linfa-logistic/Cargo.toml index c7454605e..98f9ee528 100644 --- a/algorithms/linfa-logistic/Cargo.toml +++ b/algorithms/linfa-logistic/Cargo.toml @@ -15,7 +15,6 @@ categories = ["algorithms", "mathematics", "science"] [dependencies] ndarray = { version = "0.15", features = ["approx", "blas"] } -ndarray-linalg = "0.14" ndarray-stats = "0.5.0" num-traits = "0.2" argmin = { version = "0.4.6", features = ["ndarrayl"] } diff --git a/algorithms/linfa-logistic/src/float.rs b/algorithms/linfa-logistic/src/float.rs index 49d0f6846..1cac762b2 100644 --- a/algorithms/linfa-logistic/src/float.rs +++ b/algorithms/linfa-logistic/src/float.rs @@ -1,7 +1,6 @@ use crate::argmin_param::ArgminParam; use argmin::prelude::{ArgminFloat, ArgminMul}; use ndarray::{Dimension, Ix1, Ix2, NdFloat}; -use ndarray_linalg::Lapack; use num_traits::FromPrimitive; /// A Float trait that captures the requirements we need for the various @@ -9,7 +8,6 @@ use num_traits::FromPrimitive; pub trait Float: ArgminFloat + NdFloat - + Lapack + Default + Clone + FromPrimitive From 658acb3e181dce85ed62b1f7331e860637fd0d05 Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Sun, 15 Aug 2021 17:39:18 -0400 Subject: [PATCH 09/18] Revert "Get rid of ndarray-linalg" This reverts commit 2624bfdb1990a8a703c896064eefa158964d4892. --- algorithms/linfa-logistic/Cargo.toml | 1 + algorithms/linfa-logistic/src/float.rs | 2 ++ 2 files changed, 3 insertions(+) diff --git a/algorithms/linfa-logistic/Cargo.toml b/algorithms/linfa-logistic/Cargo.toml index 98f9ee528..c7454605e 100644 --- a/algorithms/linfa-logistic/Cargo.toml +++ b/algorithms/linfa-logistic/Cargo.toml @@ -15,6 +15,7 @@ categories = ["algorithms", "mathematics", "science"] [dependencies] ndarray = { version = "0.15", features = ["approx", "blas"] } +ndarray-linalg = "0.14" ndarray-stats = "0.5.0" num-traits = "0.2" argmin = { version = "0.4.6", features = ["ndarrayl"] } diff --git a/algorithms/linfa-logistic/src/float.rs b/algorithms/linfa-logistic/src/float.rs index 1cac762b2..49d0f6846 100644 --- a/algorithms/linfa-logistic/src/float.rs +++ b/algorithms/linfa-logistic/src/float.rs @@ -1,6 +1,7 @@ use crate::argmin_param::ArgminParam; use argmin::prelude::{ArgminFloat, ArgminMul}; use ndarray::{Dimension, Ix1, Ix2, NdFloat}; +use ndarray_linalg::Lapack; use num_traits::FromPrimitive; /// A Float trait that captures the requirements we need for the various @@ -8,6 +9,7 @@ use num_traits::FromPrimitive; pub trait Float: ArgminFloat + NdFloat + + Lapack + Default + Clone + FromPrimitive From 596ad8b9da5ed59e48d964d0c4179820d7dbd39c Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Sun, 15 Aug 2021 20:12:46 -0400 Subject: [PATCH 10/18] Fixed all tests to pass --- algorithms/linfa-logistic/src/argmin_param.rs | 2 +- algorithms/linfa-logistic/src/lib.rs | 64 +++++++++---------- 2 files changed, 32 insertions(+), 34 deletions(-) diff --git a/algorithms/linfa-logistic/src/argmin_param.rs b/algorithms/linfa-logistic/src/argmin_param.rs index be6316294..3905263df 100644 --- a/algorithms/linfa-logistic/src/argmin_param.rs +++ b/algorithms/linfa-logistic/src/argmin_param.rs @@ -38,7 +38,7 @@ impl ArgminSub, ArgminParam> for impl ArgminAdd, ArgminParam> for ArgminParam { fn add(&self, other: &ArgminParam) -> ArgminParam { - ArgminParam(&self.0 - &other.0) + ArgminParam(&self.0 + &other.0) } } diff --git a/algorithms/linfa-logistic/src/lib.rs b/algorithms/linfa-logistic/src/lib.rs index 1e57a6517..7723fa0b2 100644 --- a/algorithms/linfa-logistic/src/lib.rs +++ b/algorithms/linfa-logistic/src/lib.rs @@ -249,7 +249,7 @@ impl<'a, C: 'a + Ord + Clone, F: Float, D: Data, T: AsTargets, T: AsTargets> ArgminOp for LogisticRegressionProblem1<'a /// Apply the cost function to a parameter `p` fn apply(&self, p: &Self::Param) -> std::result::Result { let w = p.as_array(); - Ok(logistic_loss(self.x, &self.target, self.alpha, w)) + let cost = logistic_loss(self.x, &self.target, self.alpha, w); + println!("apply: {} {}", w, cost); + Ok(cost) } /// Compute the gradient at parameter `p`. fn gradient(&self, p: &Self::Param) -> std::result::Result { let w = p.as_array(); - Ok(ArgminParam(logistic_grad( - self.x, - &self.target, - self.alpha, - w, - ))) + let grad = ArgminParam(logistic_grad(self.x, &self.target, self.alpha, w)); + println!("grad: {} {}", w, grad.as_array()); + Ok(grad) } } @@ -797,6 +796,7 @@ impl<'a, F: Float, A: Data> SolvableProblem for LogisticRegressionProb mod test { extern crate linfa; + use super::Error; use super::*; use approx::{assert_abs_diff_eq, AbsDiffEq}; use linfa::prelude::*; @@ -990,10 +990,10 @@ mod test { let x = array![[0.01], [1.0], [-1.0], [-0.01]]; let y = array![[0, 0], [0, 0], [0, 0], [0, 0]]; let res = log_reg.fit(&Dataset::new(x, y)); - assert_eq!( - res.unwrap_err().to_string(), - "multiple targets not supported".to_string() - ); + assert!(matches!( + res.unwrap_err(), + Error::LinfaError(linfa::Error::MultipleTargets) + )); } #[test] @@ -1002,10 +1002,7 @@ mod test { let x = array![[-1.0], [-0.01], [0.01]]; let y = array![0, 0, 1, 1]; let res = log_reg.fit(&Dataset::new(x, y)); - assert_eq!( - res.unwrap_err().to_string(), - "Expected `x` and `y` to have same number of rows, got 3 != 4".to_string() - ); + assert!(matches!(res.unwrap_err(), Error::MismatchedShapes(3, 4))); } #[test] @@ -1015,15 +1012,14 @@ mod test { let log_reg = LogisticRegression::default(); let normal_x = array![[-1.0], [1.0]]; let y = array![0, 1]; - let expected = "Values must be finite and not `Inf`, `-Inf` or `NaN`".to_string(); for inf_x in &inf_xs { let res = log_reg.fit(&Dataset::new(inf_x.to_owned(), y.to_owned())); - assert_eq!(res.unwrap_err().to_string(), expected); + assert!(matches!(res.unwrap_err(), Error::InvalidValues)); } for inf in &infs { let log_reg = LogisticRegression::default().alpha(*inf); let res = log_reg.fit(&Dataset::new(normal_x.to_owned(), y.to_owned())); - assert_eq!(res.unwrap_err().to_string(), expected); + assert!(matches!(res.unwrap_err(), Error::InvalidValues)); } let mut non_positives = infs; non_positives.push(-1.0); @@ -1031,10 +1027,7 @@ mod test { for inf in &non_positives { let log_reg = LogisticRegression::default().gradient_tolerance(*inf); let res = log_reg.fit(&Dataset::new(normal_x.to_owned(), y.to_owned())); - assert_eq!( - res.unwrap_err().to_string(), - "gradient_tolerance must be a positive, finite number" - ); + assert!(matches!(res.unwrap_err(), Error::InvalidGradientTolerance)); } } @@ -1044,26 +1037,31 @@ mod test { let normal_x = array![[-1.0], [1.0]]; let normal_y = array![0, 1]; let dataset = Dataset::new(normal_x, normal_y); - let expected = "Initial parameter guess must be finite".to_string(); for inf in &infs { - let log_reg = LogisticRegression::default().initial_params(array![*inf]); + let log_reg = LogisticRegression::default().initial_params(array![*inf, 0.0]); let res = log_reg.fit(&dataset); - assert_eq!(res.unwrap_err().to_string(), expected); + assert!(matches!( + res.unwrap_err(), + Error::InvalidInitialParametersGuess + )); } { - let log_reg = LogisticRegression::default().initial_params(array![0.0, 0.0]); + let log_reg = LogisticRegression::default().initial_params(array![0.0, 0.0, 0.0]); let res = log_reg.fit(&dataset); - assert_eq!(res.unwrap_err().to_string(), "Size of initial parameter guess must be the same as the number of columns in the feature matrix `x`".to_string()); + assert!(matches!( + res.unwrap_err(), + Error::InvalidInitialParametersGuessSize + )); } { let log_reg = LogisticRegression::default() .with_intercept(false) .initial_params(array![0.0, 0.0]); let res = log_reg.fit(&dataset); - assert_eq!( - res.unwrap_err().to_string(), - "Initial parameter should be smaller when there's no intercept".to_string() - ); + assert!(matches!( + res.unwrap_err(), + Error::InvalidInitialParametersGuessSize + )); } } From 054bf3df1cf997075b3bfbddf6b52271d5587d80 Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Sun, 15 Aug 2021 22:17:28 -0400 Subject: [PATCH 11/18] Add test for multinomial loss and grad --- algorithms/linfa-logistic/src/lib.rs | 118 +++++++++++++++++++++++++-- 1 file changed, 113 insertions(+), 5 deletions(-) diff --git a/algorithms/linfa-logistic/src/lib.rs b/algorithms/linfa-logistic/src/lib.rs index 7723fa0b2..c8f306c75 100644 --- a/algorithms/linfa-logistic/src/lib.rs +++ b/algorithms/linfa-logistic/src/lib.rs @@ -450,7 +450,8 @@ fn logistic_loss>( -yz.sum() + F::cast(0.5) * alpha * params.dot(¶ms) } -/// Compute the log of probabilities, which is `log(softmax(H))`, along with `W`. +/// Compute the log of probabilities, which is `log(softmax(H))`, where H is `X . W + b`. Also +/// returns `W` without the intercept. /// `Y` is the output (n_samples * n_classes), `X` is the input (n_samples * n_features), `W` is the /// params (n_features * n_classes), `b` is the intercept vector (n_classes). fn multi_logistic_prob_params>( @@ -463,11 +464,11 @@ fn multi_logistic_prob_params>( let h = x.dot(¶ms) + intercept; // This computes `H - log(sum(exp(H)))`, which is equal to // `log(softmax(H)) = log(exp(H) / sum(exp(H)))` - let log_prob = &h - log_sum_exp(&h, Axis(1)); + let log_prob = &h - log_sum_exp(&h, Axis(1)).into_shape((h.nrows(), 1)).unwrap(); (log_prob, params) } -/// Computes loss function of `-sum(Y * log(softmax(H))) + alpha/2 * norm(W)`, where H is `X . W + b`. +/// Computes loss function of `-sum(Y * log(softmax(H))) + alpha/2 * norm(W)` fn multi_logistic_loss>( x: &ArrayBase, y: &Array2, @@ -515,7 +516,7 @@ fn multi_logistic_grad>( ) -> Array2 { let (log_prob, mut params) = multi_logistic_prob_params(x, w); let (n_features, n_classes) = params.dim(); - let intercept = x.ncols() > n_features; + let intercept = w.nrows() > n_features; let mut grad = Array::zeros((n_features + intercept as usize, n_classes)); // This value is `softmax(H)` @@ -798,7 +799,7 @@ mod test { use super::Error; use super::*; - use approx::{assert_abs_diff_eq, AbsDiffEq}; + use approx::{assert_abs_diff_eq, assert_relative_eq, AbsDiffEq}; use linfa::prelude::*; use ndarray::{array, Array2}; @@ -1108,4 +1109,111 @@ mod test { dataset.targets().try_single_target().unwrap() ); } + + #[test] + fn test_log_sum_exp() { + let data = array![[3.3, 0.4, -2.1], [0.4, 2.2, -0.1], [1., 0., -1.]]; + let out = log_sum_exp(&data, Axis(1)); + assert_abs_diff_eq!(out, array![3.35783, 2.43551, 1.40761], epsilon = 1e-5); + } + + #[test] + fn test_softmax() { + let mut data = array![3.3, 5.5, 0.1, -4.4, 8.0]; + softmax_inplace(&mut data); + assert_relative_eq!( + data, + array![0.0083324, 0.075200047, 0.000339647, 0.000003773, 0.91612413], + epsilon = 1e-8 + ); + assert_abs_diff_eq!(data.sum(), 1.0); + } + + #[test] + fn test_multi_logistic_loss_grad() { + let x = array![ + [0.0, 0.5], + [1.0, -1.0], + [2.0, -2.0], + [3.0, -3.0], + [4.0, -4.0], + [5.0, -5.0], + [6.0, -6.0], + [7.0, -7.0], + ]; + let y = array![ + [1.0, 0.0, 0.0], + [1.0, 0.0, 0.0], + [0.0, 1.0, 0.0], + [0.0, 1.0, 0.0], + [0.0, 1.0, 0.0], + [0.0, 0.0, 1.0], + [0.0, 0.0, 1.0], + [0.0, 0.0, 1.0], + ]; + let params1 = array![[4.4, -1.2, 3.3], [3.4, 0.1, 0.0]]; + let params2 = array![[0.001, -3.2, 2.9], [0.1, 4.5, 5.7], [4.5, 2.2, 1.7]]; + let alpha = 0.6; + + { + let (log_prob, w) = multi_logistic_prob_params(&x, ¶ms1); + assert_abs_diff_eq!( + log_prob, + array![ + [-3.18259845e-01, -1.96825985e+00, -2.01825985e+00], + [-2.40463987e+00, -4.70463987e+00, -1.04639868e-01], + [-4.61010168e+00, -9.21010168e+00, -1.01016809e-02], + [-6.90100829e+00, -1.38010083e+01, -1.00829256e-03], + [-9.20010104e+00, -1.84001010e+01, -1.01044506e-04], + [-1.15000101e+01, -2.30000101e+01, -1.01301449e-05], + [-1.38000010e+01, -2.76000010e+01, -1.01563199e-06], + [-1.61000001e+01, -3.22000001e+01, -1.01826043e-07], + ], + epsilon = 1e-6 + ); + assert_abs_diff_eq!(w, params1); + let loss = multi_logistic_loss(&x, &y, alpha, ¶ms1); + assert_abs_diff_eq!(loss, 57.11212197835295, epsilon = 1e-6); + let grad = multi_logistic_grad(&x, &y, alpha, ¶ms1); + assert_abs_diff_eq!( + grad, + array![ + [1.7536815, -9.71074369, 11.85706219], + [2.79002537, 9.12059357, -9.81061893] + ], + epsilon = 1e-6 + ); + } + + { + let (log_prob, w) = multi_logistic_prob_params(&x, ¶ms2); + assert_abs_diff_eq!( + log_prob, + array![ + [-1.06637742e+00, -1.16637742e+00, -1.06637742e+00], + [-4.12429463e-03, -9.90512429e+00, -5.50512429e+00], + [-2.74092305e-04, -1.75022741e+01, -8.20227409e+00], + [-1.84027855e-05, -2.51030184e+01, -1.09030184e+01], + [-1.23554225e-06, -3.27040012e+01, -1.36040012e+01], + [-8.29523046e-08, -4.03050001e+01, -1.63050001e+01], + [-5.56928016e-09, -4.79060000e+01, -1.90060000e+01], + [-3.73912013e-10, -5.55070000e+01, -2.17070000e+01] + ], + epsilon = 1e-6 + ); + assert_abs_diff_eq!(w, params2.slice(s![..params2.nrows() - 1, ..])); + let loss = multi_logistic_loss(&x, &y, alpha, ¶ms2); + assert_abs_diff_eq!(loss, 154.8177958366479, epsilon = 1e-6); + let grad = multi_logistic_grad(&x, &y, alpha, ¶ms2); + assert_abs_diff_eq!( + grad, + array![ + [26.99587549, -10.91995003, -16.25532546], + [-27.26314882, 11.85569669, 21.58745213], + [5.33984376, -2.68845675, -2.65138701] + ], + epsilon = 1e-6 + ); + } + } } From 1b136284455e4061a52d041c8f5ca36ceadde395 Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Mon, 16 Aug 2021 00:15:42 -0400 Subject: [PATCH 12/18] Fix argmin param norm --- algorithms/linfa-logistic/src/argmin_param.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/algorithms/linfa-logistic/src/argmin_param.rs b/algorithms/linfa-logistic/src/argmin_param.rs index 3905263df..e4867143a 100644 --- a/algorithms/linfa-logistic/src/argmin_param.rs +++ b/algorithms/linfa-logistic/src/argmin_param.rs @@ -50,7 +50,7 @@ impl ArgminDot, F> for ArgminParam ArgminNorm for ArgminParam { fn norm(&self) -> F { - elem_dot(&self.0, &self.0) + num_traits::Float::sqrt(elem_dot(&self.0, &self.0)) } } From 0cf4b440476631efd44f41bb19cf944975f12dde Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Mon, 16 Aug 2021 00:53:19 -0400 Subject: [PATCH 13/18] Add multi logistic regression tests --- algorithms/linfa-logistic/src/lib.rs | 98 ++++++++++++++++++++++++---- 1 file changed, 87 insertions(+), 11 deletions(-) diff --git a/algorithms/linfa-logistic/src/lib.rs b/algorithms/linfa-logistic/src/lib.rs index c8f306c75..14330de7e 100644 --- a/algorithms/linfa-logistic/src/lib.rs +++ b/algorithms/linfa-logistic/src/lib.rs @@ -743,7 +743,6 @@ impl<'a, F: Float, A: Data> ArgminOp for LogisticRegressionProblem1<'a fn apply(&self, p: &Self::Param) -> std::result::Result { let w = p.as_array(); let cost = logistic_loss(self.x, &self.target, self.alpha, w); - println!("apply: {} {}", w, cost); Ok(cost) } @@ -751,7 +750,6 @@ impl<'a, F: Float, A: Data> ArgminOp for LogisticRegressionProblem1<'a fn gradient(&self, p: &Self::Param) -> std::result::Result { let w = p.as_array(); let grad = ArgminParam(logistic_grad(self.x, &self.target, self.alpha, w)); - println!("grad: {} {}", w, grad.as_array()); Ok(grad) } } @@ -766,18 +764,15 @@ impl<'a, F: Float, A: Data> ArgminOp for LogisticRegressionProblem2<'a /// Apply the cost function to a parameter `p` fn apply(&self, p: &Self::Param) -> std::result::Result { let w = p.as_array(); - Ok(multi_logistic_loss(self.x, &self.target, self.alpha, w)) + let cost = multi_logistic_loss(self.x, &self.target, self.alpha, w); + Ok(cost) } /// Compute the gradient at parameter `p`. fn gradient(&self, p: &Self::Param) -> std::result::Result { let w = p.as_array(); - Ok(ArgminParam(multi_logistic_grad( - self.x, - &self.target, - self.alpha, - w, - ))) + let grad = ArgminParam(multi_logistic_grad(self.x, &self.target, self.alpha, w)); + Ok(grad) } } @@ -977,8 +972,6 @@ mod test { let y = array![0, 0, 0, 0, 1, 1, 1, 1, 1, 1]; let dataset = Dataset::new(x, y); let res = log_reg.fit(&dataset).unwrap(); - assert!(res.intercept().abs_diff_eq(&-4.124, 1e-3)); - assert!(res.params().abs_diff_eq(&array![1.181], 1e-3)); assert_eq!( &res.predict(dataset.records()), dataset.targets().try_single_target().unwrap() @@ -1216,4 +1209,87 @@ mod test { ); } } + + #[test] + fn simple_multi_example() { + let x = array![[-1., 0.], [0., 1.], [1., 1.]]; + let y = array![2, 1, 0]; + let log_reg = MultiLogisticRegression::default() + .alpha(0.1) + .initial_params(Array::zeros((3, 3))); + let dataset = Dataset::new(x, y); + let res = log_reg.fit(&dataset).unwrap(); + assert_eq!(res.params().dim(), (2, 3)); + assert_eq!(res.intercept().dim(), 3); + assert_eq!( + &res.predict(dataset.records()), + dataset.targets().try_single_target().unwrap() + ); + } + + #[test] + fn simple_multi_example_text() { + let log_reg = MultiLogisticRegression::default().alpha(0.1); + let x = array![[0.1], [1.0], [-1.0], [-0.1]]; + let y = array!["dog", "ape", "rocket", "cat"]; + let dataset = Dataset::new(x, y); + let res = log_reg.fit(&dataset).unwrap(); + assert_eq!(res.params().dim(), (1, 4)); + assert_eq!(res.intercept().dim(), 4); + assert_eq!( + &res.predict(dataset.records()), + dataset.targets().try_single_target().unwrap() + ); + } + + #[test] + fn multi_on_binary_problem() { + let log_reg = MultiLogisticRegression::default().alpha(1.0); + let x = array![ + [0.0], + [1.0], + [2.0], + [3.0], + [4.0], + [5.0], + [6.0], + [7.0], + [8.0], + [9.0] + ]; + let y = array![0, 0, 0, 0, 1, 1, 1, 1, 1, 1]; + let dataset = Dataset::new(x, y); + let res = log_reg.fit(&dataset).unwrap(); + assert_eq!(res.params().dim(), (1, 2)); + assert_eq!(res.intercept().dim(), 2); + assert_eq!( + &res.predict(dataset.records()), + dataset.targets().try_single_target().unwrap() + ); + } + + #[test] + fn reject_num_class_mismatch() { + let n_samples = 4; + let n_classes = 3; + let n_features = 1; + let x = Array2::::zeros((n_samples, n_features)); + let y = array![0, 1, 2, 0]; + let dataset = Dataset::new(x, y); + + let log_reg = MultiLogisticRegression::default() + .initial_params(Array::zeros((n_features, n_classes))); + assert!(matches!( + log_reg.fit(&dataset).unwrap_err(), + Error::InvalidInitialParametersGuessSize + )); + + let log_reg = MultiLogisticRegression::default() + .with_intercept(false) + .initial_params(Array::zeros((n_features + 1, n_classes))); + assert!(matches!( + log_reg.fit(&dataset).unwrap_err(), + Error::InvalidInitialParametersGuessSize + )); + } } From 9ea787f2c918ac5744995e19569c70c7866459b9 Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Mon, 23 Aug 2021 16:04:02 -0400 Subject: [PATCH 14/18] Add docs --- algorithms/linfa-logistic/src/lib.rs | 64 +++++++++++++++++----------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/algorithms/linfa-logistic/src/lib.rs b/algorithms/linfa-logistic/src/lib.rs index 14330de7e..58f34bacf 100644 --- a/algorithms/linfa-logistic/src/lib.rs +++ b/algorithms/linfa-logistic/src/lib.rs @@ -5,7 +5,7 @@ //! `linfa-logistic` is a crate in the [`linfa`](https://crates.io/crates/linfa) ecosystem, an effort to create a toolkit for classical Machine Learning implemented in pure Rust, akin to Python's `scikit-learn`. //! //! ## Current state -//! `linfa-logistic` provides a pure Rust implementation of a two class [logistic regression model](struct.LogisticRegression.html). +//! `linfa-logistic` provides a pure Rust implementation of a [binomial logistic regression model](struct.LogisticRegression.html) and a [multinomial logistic regression model](struct.MultiLogisticRegression). //! //! ## Examples //! @@ -37,6 +37,16 @@ mod float; use argmin_param::{elem_dot, ArgminParam}; use float::Float; +/// A generalized logistic regression type that specializes as either binomial logistic regression +/// or multinomial logistic regression. +pub struct LogisticRegressionBase { + alpha: F, + fit_intercept: bool, + max_iterations: u64, + gradient_tolerance: F, + initial_params: Option>, +} + /// A two-class logistic regression model. /// /// Logistic regression combines linear models with @@ -67,15 +77,18 @@ use float::Float; /// let model = LogisticRegression::default().fit(&dataset).unwrap(); /// let prediction = model.predict(&dataset); /// ``` -pub struct LogisticRegressionBase { - alpha: F, - fit_intercept: bool, - max_iterations: u64, - gradient_tolerance: F, - initial_params: Option>, -} - pub type LogisticRegression = LogisticRegressionBase; + +/// A multinomial class logistic regression model. +/// +/// The output labels can map to any discrete feature space, since the algorithm calculates the +/// likelihood of a feature vector corresponding to any given outcome using the softmax function +/// `softmax(x) = exp(x) / sum(exp(xi))` +/// +/// l2 regularization is used by this algorithm and is weighted by parameter `alpha`. Setting `alpha` +/// close to zero removes regularization and the problem solved minimizes only the +/// empirical risk. On the other hand, setting `alpha` to a high value increases +/// the weight of the l2 norm of the linear model coefficients in the cost function. pub type MultiLogisticRegression = LogisticRegressionBase; impl Default for LogisticRegressionBase { @@ -228,9 +241,9 @@ impl<'a, C: 'a + Ord + Clone, F: Float, D: Data, T: AsTargets, T: AsTargets; + /// Given a 2-dimensional feature matrix array `x` with shape + /// (n_samples, n_features) and an array of target classes to predict, + /// create a `MultiFittedLogisticRegression` object which allows making + /// predictions. The target classes can have any number of discrete values. + /// + /// This method returns an error if any of the preconditions are violated, + /// i.e. any values are `Inf` or `NaN`, `y` doesn't have as many items as + /// `x` has rows, or if other parameters (gradient_tolerance, alpha) have + /// been set to inalid values. The input features are also strongly recommended to be + /// normalized to ensure numerical stability. fn fit(&self, dataset: &DatasetBase, T>) -> Result { let (x, y) = (dataset.records(), dataset.targets()); let (classes, target) = label_classes_multi(y)?; @@ -337,6 +360,9 @@ where )) } +/// Identify the distinct values of the classes in `y` and map each value to an integer. Smaller +/// classes (by `PartialOrd`) map to smaller integers. Returns the mapping along with a one-hot +/// encoding of the numerical labels corresponding to `y`. fn label_classes_multi(y: T) -> Result<(Vec, Array2)> where F: Float, @@ -621,10 +647,9 @@ impl> } } -/// A fitted logistic regression which can make predictions +/// A fitted multinomial logistic regression which can make predictions #[derive(PartialEq, Debug)] pub struct MultiFittedLogisticRegression { - threshold: F, intercept: Array1, params: Array2, classes: Vec, @@ -633,23 +658,12 @@ pub struct MultiFittedLogisticRegression { impl MultiFittedLogisticRegression { fn new(intercept: Array1, params: Array2, classes: Vec) -> Self { Self { - threshold: F::cast(0.5), intercept, params, classes, } } - /// Set the probability threshold for which the 'positive' class will be - /// predicted. Defaults to 0.5. - pub fn set_threshold(mut self, threshold: F) -> Self { - if threshold < F::zero() || threshold > F::one() { - panic!("FittedLogisticRegression::set_threshold: threshold needs to be between 0.0 and 1.0"); - } - self.threshold = threshold; - self - } - pub fn intercept(&self) -> &Array1 { &self.intercept } From 84f6011981a1e201ec79935e772106f7bd7dc4b8 Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Tue, 24 Aug 2021 23:35:06 -0400 Subject: [PATCH 15/18] Get rid of some cloning in the tests --- algorithms/linfa-logistic/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/algorithms/linfa-logistic/src/lib.rs b/algorithms/linfa-logistic/src/lib.rs index 58f34bacf..2d82b4050 100644 --- a/algorithms/linfa-logistic/src/lib.rs +++ b/algorithms/linfa-logistic/src/lib.rs @@ -1021,12 +1021,12 @@ mod test { let normal_x = array![[-1.0], [1.0]]; let y = array![0, 1]; for inf_x in &inf_xs { - let res = log_reg.fit(&Dataset::new(inf_x.to_owned(), y.to_owned())); + let res = log_reg.fit(&DatasetBase::new(inf_x.view(), &y)); assert!(matches!(res.unwrap_err(), Error::InvalidValues)); } for inf in &infs { let log_reg = LogisticRegression::default().alpha(*inf); - let res = log_reg.fit(&Dataset::new(normal_x.to_owned(), y.to_owned())); + let res = log_reg.fit(&DatasetBase::new(normal_x.view(), &y)); assert!(matches!(res.unwrap_err(), Error::InvalidValues)); } let mut non_positives = infs; From 9f2ed3903294fe193bded36a2e51c278cc9c369e Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Thu, 26 Aug 2021 01:02:43 -0400 Subject: [PATCH 16/18] Fix comments and docs --- algorithms/linfa-logistic/src/lib.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/algorithms/linfa-logistic/src/lib.rs b/algorithms/linfa-logistic/src/lib.rs index 2d82b4050..1845b9fd7 100644 --- a/algorithms/linfa-logistic/src/lib.rs +++ b/algorithms/linfa-logistic/src/lib.rs @@ -113,7 +113,7 @@ impl LogisticRegressionBase { } } - /// Set the normalization parameter `alpha` used for L2 normalization, + /// Set the regularization parameter `alpha` used for L2 regularization, /// defaults to `1.0`. pub fn alpha(mut self, alpha: F) -> Self { self.alpha = alpha; @@ -503,7 +503,6 @@ fn multi_logistic_loss>( ) -> F { let (log_prob, params) = multi_logistic_prob_params(x, w); // Calculate loss - // XXX Should we divide cost by n_samples??? -elem_dot(&log_prob, y) + F::cast(0.5) * alpha * elem_dot(¶ms, ¶ms) } @@ -557,7 +556,6 @@ fn multi_logistic_grad>( if intercept { grad.row_mut(n_features).assign(&diff.sum_axis(Axis(0))); } - // XXX should we divide by n_samples?? grad } From 8e7cf9715787a9629984f0f00d8623c1ce6a793c Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Thu, 26 Aug 2021 23:00:29 -0400 Subject: [PATCH 17/18] Use CowArray and ArrayView to prevent copying --- algorithms/linfa-logistic/src/lib.rs | 103 +++++++++++++++------------ 1 file changed, 57 insertions(+), 46 deletions(-) diff --git a/algorithms/linfa-logistic/src/lib.rs b/algorithms/linfa-logistic/src/lib.rs index 1845b9fd7..9db737c4a 100644 --- a/algorithms/linfa-logistic/src/lib.rs +++ b/algorithms/linfa-logistic/src/lib.rs @@ -25,8 +25,8 @@ use argmin::solver::quasinewton::lbfgs::LBFGS; use linfa::prelude::{AsTargets, DatasetBase}; use linfa::traits::{Fit, PredictInplace}; use ndarray::{ - s, Array, Array1, Array2, ArrayBase, Axis, Data, DataMut, Dimension, IntoDimension, Ix1, Ix2, - RemoveAxis, Slice, Zip, + s, Array, Array1, Array2, ArrayBase, ArrayView, ArrayView2, Axis, CowArray, Data, DataMut, + Dimension, IntoDimension, Ix1, Ix2, RemoveAxis, Slice, Zip, }; use ndarray_stats::QuantileExt; use std::default::Default; @@ -140,16 +140,18 @@ impl LogisticRegressionBase { self } - /// Configure the initial parameters from where the optimization starts. - /// The `params` array must have at least the same number of rows as there are columns on the - /// feature matrix `x` passed to the `fit` method + /// Configure the initial parameters from where the optimization starts. The `params` array + /// must have the same number of rows as there are columns on the feature matrix `x` passed to + /// the `fit` method. If `with_intercept` is set, then it needs to have one more row. For + /// multinomial regression, `params` also must have the same number of columns as the number of + /// distinct classes in `y`. pub fn initial_params(mut self, params: Array) -> Self { self.initial_params = Some(params); self } - /// Create the initial parameters, either from a user supplied guess - /// or a 1-d array of `0`s. + /// Create the initial parameters, either from a user supplied array + /// or an array of 0s fn setup_init_params(&self, dims: D::Pattern) -> Array { if let Some(params) = self.initial_params.as_ref() { params.clone() @@ -264,8 +266,8 @@ impl<'a, C: 'a + Ord + Clone, F: Float, D: Data, T: AsTargets, T: AsTargets( n_features: usize, w: &Array, -) -> (Array, Array) { +) -> (ArrayView, CowArray) { let nrows = w.shape()[0]; if n_features == nrows { - (w.to_owned(), Array::zeros(w.raw_dim().remove_axis(Axis(0)))) + ( + w.view(), + Array::zeros(w.raw_dim().remove_axis(Axis(0))).into(), + ) } else if n_features + 1 == nrows { ( - w.slice_axis(Axis(0), Slice::from(..n_features)).to_owned(), - w.index_axis(Axis(0), n_features).to_owned(), + w.slice_axis(Axis(0), Slice::from(..n_features)), + w.index_axis(Axis(0), n_features).into(), ) } else { panic!( @@ -471,19 +480,47 @@ fn logistic_loss>( ) -> F { let n_features = x.shape()[1]; let (params, intercept) = convert_params(n_features, w); - let mut yz = (x.dot(¶ms) + intercept) * y; + let yz = x.dot(¶ms.into_shape((params.len(), 1)).unwrap()) + intercept; + let len = yz.len(); + let mut yz = yz.into_shape(len).unwrap() * y; yz.mapv_inplace(log_logistic); -yz.sum() + F::cast(0.5) * alpha * params.dot(¶ms) } +/// Computes the gradient of the logistic loss function +fn logistic_grad>( + x: &ArrayBase, + y: &Array1, + alpha: F, + w: &Array1, +) -> Array1 { + let n_features = x.shape()[1]; + let (params, intercept) = convert_params(n_features, w); + let yz = x.dot(¶ms.into_shape((params.len(), 1)).unwrap()) + intercept; + let len = yz.len(); + let mut yz = yz.into_shape(len).unwrap() * y; + yz.mapv_inplace(logistic); + yz -= F::one(); + yz *= y; + if w.len() == n_features + 1 { + let mut grad = Array::zeros(w.len()); + grad.slice_mut(s![..n_features]) + .assign(&(x.t().dot(&yz) + (¶ms * alpha))); + grad[n_features] = yz.sum(); + grad + } else { + x.t().dot(&yz) + (¶ms * alpha) + } +} + /// Compute the log of probabilities, which is `log(softmax(H))`, where H is `X . W + b`. Also /// returns `W` without the intercept. /// `Y` is the output (n_samples * n_classes), `X` is the input (n_samples * n_features), `W` is the /// params (n_features * n_classes), `b` is the intercept vector (n_classes). -fn multi_logistic_prob_params>( +fn multi_logistic_prob_params<'a, F: Float, A: Data>( x: &ArrayBase, - w: &Array2, // This parameter includes `W` and `b` -) -> (Array2, Array2) { + w: &'a Array2, // This parameter includes `W` and `b` +) -> (Array2, ArrayView2<'a, F>) { let n_features = x.shape()[1]; let (params, intercept) = convert_params(n_features, w); // Compute H @@ -506,30 +543,6 @@ fn multi_logistic_loss>( -elem_dot(&log_prob, y) + F::cast(0.5) * alpha * elem_dot(¶ms, ¶ms) } -/// Computes the gradient of the logistic loss function -fn logistic_grad>( - x: &ArrayBase, - y: &Array1, - alpha: F, - w: &Array1, -) -> Array1 { - let n_features = x.shape()[1]; - let (params, intercept) = convert_params(n_features, w); - let mut yz = (x.dot(¶ms) + intercept) * y; - yz.mapv_inplace(logistic); - yz -= F::one(); - yz *= y; - if w.len() == n_features + 1 { - let mut grad = Array::zeros(w.len()); - grad.slice_mut(s![..n_features]) - .assign(&(x.t().dot(&yz) + &(params * alpha))); - grad[n_features] = yz.sum(); - grad - } else { - x.t().dot(&yz) + &(params * alpha) - } -} - /// Computes multinomial gradients for `W` and `b` and combine them. /// Gradient for `W` is `Xt . (softmax(H) - Y) + alpha * W`. /// Gradient for `b` is `sum(softmax(H) - Y)`. @@ -539,7 +552,7 @@ fn multi_logistic_grad>( alpha: F, w: &Array2, ) -> Array2 { - let (log_prob, mut params) = multi_logistic_prob_params(x, w); + let (log_prob, params) = multi_logistic_prob_params(x, w); let (n_features, n_classes) = params.dim(); let intercept = w.nrows() > n_features; let mut grad = Array::zeros((n_features + intercept as usize, n_classes)); @@ -548,9 +561,7 @@ fn multi_logistic_grad>( let prob = log_prob.mapv_into(num_traits::Float::exp); let diff = prob - y; // Compute gradient for `W` and place it at start of the grad matrix - let mut dw = x.t().dot(&diff); - params *= alpha; - dw += ¶ms; + let dw = x.t().dot(&diff) + (¶ms * alpha); grad.slice_mut(s![..n_features, ..]).assign(&dw); // Compute gradient for `b` and place it at end of grad matrix if intercept { From 65dc9138329399fae1100e2cf783e48d4e0a4bfe Mon Sep 17 00:00:00 2001 From: YuhanLiin Date: Tue, 31 Aug 2021 00:13:52 -0400 Subject: [PATCH 18/18] Add getter for classes for fitted model --- algorithms/linfa-logistic/src/lib.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/algorithms/linfa-logistic/src/lib.rs b/algorithms/linfa-logistic/src/lib.rs index 9db737c4a..00d94a338 100644 --- a/algorithms/linfa-logistic/src/lib.rs +++ b/algorithms/linfa-logistic/src/lib.rs @@ -696,6 +696,11 @@ impl MultiFittedLogisticRegression { .for_each(|mut row| softmax_inplace(&mut row)); probs } + + /// Get the list of class labels, which maps the numerical class indices to the labels + pub fn classes(&self) -> &[C] { + &self.classes + } } impl>