# js implementation of Nick Beckers post

- https://beckernick.github.io/logistic-regression-from-scratch/
- https://github.com/beckernick/logistic_regression_from_scratch/blob/master/logistic_regression_scratch.ipynb

In [1]:
import {shape,transpose,full,zeros} from './src/util.module.js';
import {matrixSum1d,matrixSubtract1d,matrixMultiply1d} from './src/util.module.js';

In [2]:
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/eval
function looseJsonParse(obj){
    return Function(`"use strict";return (${obj})`)();
}

In [3]:
const num_observations=5000;
const x1=looseJsonParse(require('fs').readFileSync('data/x1.txt').toString());
const x2=looseJsonParse(require('fs').readFileSync('data/x2.txt').toString());
const simulated_separableish_features = [...x1, ...x2];
const simulated_labels = [...zeros(num_observations), ...full(num_observations,null,1)];

Note: we could use `dotProduct` from `util.module.js` is we use different shape weights but using the `dot` function below makes `logistic_regression` closer to the original.

In [4]:
/**
Returns the dot product of a[rows,cols] and b[cols] as a 1d array.
*/
function dot(a,b) {
    const result=[];
    a.forEach(function(aRow,aRowIndex) {
        const mults=aRow.map((aElem,i)=>aElem*b[i]);
        result.push(mults.reduce((a,b)=>a+b));
    });
    return result;
}

In [5]:
function sigmoid(scores) {
    return scores.map(score => 1./(1.+Math.pow(Math.E, -score)));
}

In [6]:
function exp(a) {
    return Math.pow(Math.E, a);
}

In [7]:
function log_likelihood(features, target, weights) {
    let scores = dot(features, weights);
    let temp = matrixSubtract1d(
        matrixMultiply1d(target,scores),
        scores.map(score=>Math.log(1+exp(score))))
    return temp.reduce((a,b)=>a+b);
}

In [8]:
function logistic_regression(features, target, num_steps, learning_rate, add_intercept) {
    if (add_intercept) {
        features=features.map(feature=>[1, ...feature]);
    }
        
    let weights = zeros(shape(features)[1]);
    
    for (let step=0; step<num_steps; step++) {
        let scores = dot(features, weights);
        let predictions = sigmoid(scores);
        
        // Update weights with log likelihood gradient
        let output_error_signal = matrixSubtract1d(target, predictions);
        let gradient = dot(transpose(features), output_error_signal);
        weights=matrixSum1d(weights, matrixMultiply1d(gradient,learning_rate))

        // Print log-likelihood every so often
        if (step == 0 || step % 100 == 99) {
            console.log(step,weights,'log likelihood',log_likelihood(features, target, weights));
        }
    }
    return weights
}

Just a couple of changes from logistic_regression_scratch.ipynb ... I'm way too impatient to wait for 50000 epochs, so we'll train for less epochs at a higher learning rate - (to hopefully get nearly the same loss and weights).

We're targeting;
- log likelihood of `-140.725421355` and
- weights of
    - `[-13.99400797] [[-5.02712572  8.23286799]]` sklearn LogisticRegression
    - `[-14.09225541  -5.05899648   8.28955762]` logistic_regression_scratch.ipynb

In [9]:
const weights = logistic_regression(simulated_separableish_features, simulated_labels, 2000, 3e-3, true)

0 [ 0, 7.397425245475898, 30.01541021294583 ] log likelihood -72310.42400198299
99 [ -17.831985432566956, -6.371830137093265, 10.448975784227374 ] log likelihood -146.3050815234575
199 [ -17.3221528845754, -6.192497940453306, 10.154139006269082 ] log likelihood -145.04134421360502
299 [ -16.860378856883734, -6.030677547538416, 9.887380225305396 ] log likelihood -144.00566607712898
399 [ -16.44684301234898, -5.885733989226151, 9.648535958305681 ] log likelihood -143.174994124599
499 [ -16.080572366316733, -5.757320425975913, 9.437029375056696 ] log likelihood -142.5232684873585
599 [ -15.759724219959509, -5.644793325446942, 9.251783504337425 ] log likelihood -142.02308143429894
699 [ -15.481679635529074, -5.547241465290732, 9.091275037986605 ] log likelihood -141.6473769821246
799 [ -15.243198003314033, -5.463537807227904, 8.953623766565578 ] log likelihood -141.37092595498117
899 [ -15.040610670967705, -5.39240547994631, 8.836704392207693 ] log likelihood -141.17138768237203
999 [ -14.