# js implementation of Nick Beckers post

- https://beckernick.github.io/logistic-regression-from-scratch/
- https://github.com/beckernick/logistic_regression_from_scratch/blob/master/logistic_regression_scratch.ipynb

In [1]:
import {shape,transpose,dotProduct,full,zeros} from './src/util.module.js';
import {matrixSum1d,matrixSubtract1d,matrixMultiply1d} from './src/util.module.js';

In [2]:
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/eval
function looseJsonParse(obj){
    return Function('"use strict";return (' + obj + ')')();
}

In [3]:
const num_observations=5000;
const x1=looseJsonParse(require('fs').readFileSync('data/x1.txt').toString());
const x2=looseJsonParse(require('fs').readFileSync('data/x2.txt').toString());
const simulated_separableish_features = [...x1,...x2];
const simulated_labels = [...zeros(num_observations), ...full(num_observations,null,1)];

In [4]:
function sigmoid(scores) {
    return scores.map(score => 1./(1.+Math.pow(Math.E, -score)));
}

In [5]:
function exp(a) {
    return Math.pow(Math.E, a);
}

In [6]:
function log_likelihood(features, target, weights) {
    let scores = dotProduct(features, transpose([weights]));
    scores = transpose(scores)[0];
    let temp = matrixSubtract1d(
        matrixMultiply1d(target,scores),
        scores.map(score=>Math.log(1+exp(score))))
    return temp.reduce((a,b)=>a+b);
}

In [7]:
function logistic_regression(features, target, num_steps, learning_rate, add_intercept) {
    if (add_intercept) {
        features=features.map(feature=>[1, ...feature]);
    }
        
    let weights = zeros(shape(features)[1]);
    
    for (let step=0; step<num_steps; step++) {
        let scores = dotProduct(features, transpose([weights]));
        scores = transpose(scores)[0];
        let predictions = sigmoid(scores);
        
        // Update weights with log likelihood gradient
        let output_error_signal = matrixSubtract1d(target, predictions);
        let gradient = dotProduct(transpose(features), transpose([output_error_signal]));
        gradient=transpose(gradient)[0];
        weights=matrixSum1d(weights, matrixMultiply1d(gradient,learning_rate))

        // Print log-likelihood every so often
        if (step % 100 == 0) {
            console.log(step,weights,'log likelihood',log_likelihood(features, target, weights));
        }
    }
    return weights
}

Just a couple of changes from logistic_regression_scratch.ipynb ... I'm way too impatient to wait for 50000 epochs, so I train for less epochs at a higher learning rate. To get nearly the same loss and weights.

We're targeting;
- log likelihood of `-140.725421355` and
- weights of
    - `[-13.99400797] [[-5.02712572  8.23286799]]` sklearn LogisticRegression
    - `[-14.09225541  -5.05899648   8.28955762]` logistic_regression_scratch.ipynb

In [8]:
const weights = logistic_regression(simulated_separableish_features, simulated_labels, 2000, 3e-3, true)

0 [ 0, 7.397425245475898, 30.01541021294583 ] log likelihood -72310.42400198299
100 [ -17.82666273125494, -6.369924537519446, 10.445885059868809 ] log likelihood -146.29124937054002
200 [ -17.317295390044116, -6.190795753701512, 10.151332603893513 ] log likelihood -145.0298937263822
300 [ -16.85600559637878, -6.029144899861061, 9.884854139891312 ] log likelihood -143.99638603132615
400 [ -16.442948712538392, -5.884368861535347, 9.646286962075505 ] log likelihood -143.1676362217944
500 [ -16.077142806960314, -5.7561178277138, 9.43504911904683 ] log likelihood -142.51756242722146
600 [ -15.75673684147998, -5.643745398976429, 9.250058841631924 ] log likelihood -142.01875219214884
700 [ -15.479104810001434, -5.546337907261038, 9.08978876028366 ] log likelihood -141.64416106609727
800 [ -15.241000821268164, -5.462766473478286, 8.952355636869779 ] log likelihood -141.36858429542463
900 [ -15.03875298305558, -5.391753085164429, 8.835632322938354 ] log likelihood -141.16971380495966
1000 [ -14