CTC loss #1759

marsiancba · 2019-07-19T05:24:55Z

Please add support for CTC loss.

My use-case is recognizing handwritten numbers (1-5 digits).

fahimsun · 2019-09-10T06:30:45Z

Please add CTC support in Tensorflow JS

CTC is crucial in all cases where Input length is not equal to output length.
For example: Speech Recognition using RNN , handwritten digits in sequence

WenheLI · 2019-09-21T13:10:11Z

Interested in this one, can I have a try with it?

rthadur · 2019-09-23T17:23:45Z

@WenheLI sure please submit a new Pull Request

marsiancba · 2019-09-24T10:42:45Z

@WenheLI I can provide you some rough (but working) implementation of CTC loss in tfjs, if you want something to start from...

WenheLI · 2019-09-24T11:58:03Z

@marsiancba Sure, that will be really helpful!

fahimsun · 2019-09-24T12:23:23Z

@marsiancba sure, i am interested. it would be very helpful.

marsiancba · 2019-09-24T13:44:56Z

export function ctcLoss(
    /** 
     * batch_size / max_label_seq_length 
     */
    labels: tf.Tensor2D,
    /**
     * batch_size / frames / num_labels
     */
    logits: tf.Tensor3D,
    options: {
    } = {}
): tf.Tensor1D {
    return tf.tidy(() => {
        const vec = ctcLoss_vec(labels, logits, options);

        return vec.mean().neg() as tf.Tensor1D;
    })
}

export function ctcLoss_vec(
    labels: tf.Tensor2D,
    logits: tf.Tensor3D,
    options: {
    } = {}
): tf.Tensor2D {
    function p(name: string, t?: tf.Tensor) {
        return;
        if (t)
            printTensor(name, t);
        else
            console.log(name);
    }

    return tf.tidy(() => {

        const SUPER_SMALL = -1e9; //-1e38;

        const logits_normalized = logits.sub(logits.logSumExp(2, true));

        p('labels', labels);
        p('logits', logits);
        p('logits_normalized', logits_normalized);

        const [batch_size, num_time_steps] = logits.shape;
        if (labels.shape[0] != batch_size) ASC.dieError('1694042736');
        const max_label_seq_length = labels.shape[1];
        const y0_size = max_label_seq_length * 2 + 1;
        const y_size = y0_size + 1;

        p('labels', labels);
        p('logits', logits);
        p('logits_normalized', logits_normalized);
        p('max_label_seq_length=' + max_label_seq_length);
        p('y0_size=' + y0_size);

        const labels_buff = labels.bufferSync();


        const y_loc_buff = tf.buffer([batch_size, y0_size, 2], 'int32');
        const res_loc_buff = tf.buffer([batch_size, 2], 'int32');

        for (let b = 0; b < batch_size; b++) {
            for (let y = 0; y < y0_size; y++)
                y_loc_buff.set(b, b, y, 0);
            let len = max_label_seq_length;
            for (let t = 0; t < max_label_seq_length; t++) {
                const l = labels_buff.get(b, t);
                if (l == 0) {
                    len = t;
                    break;
                }
                const y = t * 2 + 1;
                y_loc_buff.set(l, b, y, 1);
            }

            res_loc_buff.set(b, b, 0);
            res_loc_buff.set(2 * len, b, 1);
        }

        const incoming_loc_buff = tf.buffer([batch_size, y_size, 3, 2], 'int32');
        for (let b = 0; b < batch_size; b++) {
            for (let y = 0; y <= y0_size; y++) {
                for (let i = 0; i < 3; i++)
                    incoming_loc_buff.set(b, b, y, i, 0);
            }
            for (let y = 0; y < y0_size; y++) {
                incoming_loc_buff.set(y, b, y, 0, 1);
                incoming_loc_buff.set(y > 0 ? y - 1 : y0_size, b, y, 1, 1);
                let moze_double = false;
                if (y > 2) {
                    if (y_loc_buff.get(b, y, 1) != y_loc_buff.get(b, y - 2, 1))
                        moze_double = true;
                }
                incoming_loc_buff.set(moze_double ? y - 2 : y0_size, b, y, 2, 1);
            }
            incoming_loc_buff.set(y0_size, b, y0_size, 0, 1);
            incoming_loc_buff.set(y0_size, b, y0_size, 1, 1);
            incoming_loc_buff.set(y0_size, b, y0_size, 2, 1);
        }


        const y_loc = y_loc_buff.toTensor();
        const res_loc = res_loc_buff.toTensor();
        const incoming_loc = incoming_loc_buff.toTensor();
        p('y_loc', y_loc);
        p('res_loc', res_loc);
        p('incoming_loc', incoming_loc);

        const y0 = gatherND(
            logits_normalized.transpose([0, 2, 1]),
            y_loc,
        )
            .transpose([0, 2, 1]);

        const y = y0.pad([[0, 0], [0, 0], [0, 1]], SUPER_SMALL);

        p('y', y);

        let log_alpha =
            tf.scalar(0).reshape([1, 1]).tile([batch_size, 1])
                .concat(
                    tf.scalar(SUPER_SMALL).reshape([1, 1]).tile([batch_size, y0_size]),
                    1
                );

        function shift(t: tf.Tensor) {
            return t.pad([[0, 0], [1, 0]], SUPER_SMALL).slice([0, 0], t.shape);
        }
        function logSumExp(a: tf.Tensor, b: tf.Tensor) {
            return a.expandDims(2).concat(b.expandDims(2), 2).logSumExp(2);
        }

        const t2y = y.unstack(1);
        for (let t = 0; t < num_time_steps; t++) {
            p("Time: " + t);

            const ty = t2y[t];
            p('log_alpha', log_alpha);
            p('ty', ty);

            const incoming = gatherND(log_alpha, incoming_loc);
            p('incoming', incoming);

            const incoming_plus_ty = incoming.add(ty.expandDims(2));
            p('incoming_plus_ty', incoming_plus_ty);

            const new_log_alpha2 = incoming_plus_ty.logSumExp(2);
            p('new_log_alpha2', new_log_alpha2);

            log_alpha = new_log_alpha2;
        }

        const log_alpha_final = logSumExp(log_alpha, shift(log_alpha));
        p('log_alpha_final', log_alpha_final);

        const vec = gatherND(log_alpha_final, res_loc);
        //printTensor('vec', vec);

        return vec as tf.Tensor2D;
    })
}

function gatherND(x: tf.Tensor, indices: tf.Tensor): tf.Tensor {
    const grad = (dy: tf.Tensor, saved: tf.Tensor[]) => {
        return { x: () => tf.scatterND(saved[0], dy, x.shape) }
    }
    return ENGINE.runKernel(
        (backend, save) => {
            save([indices]);
            return backend.gatherND(x, indices);
        },
        { x },
        grad
    ) as
        tf.Tensor;
}

fahimsun · 2019-09-30T17:46:40Z

@WenheLI are you working on this?

WenheLI · 2019-10-01T14:22:22Z

@fahimsun Yep, but I am in the break right now, it may take some time to implement. If you want to take it over, it is fine as well.

jasonmayes · 2021-12-21T18:44:09Z

Update: This is blocking new folks work from being published around handwriting recognition and speech recognition. Is there an update for this?

Please see: https://discuss.tensorflow.org/t/ctc-loss-implementation-in-tfjs/6645/5

harangp · 2022-01-12T14:16:17Z

Hi, I've prepared a working CTC loss / gradient calculator for TFJS. Works on batches, handles various length labels for learning, built entirely from scratch based on the original papers. Plugabble into TFJS's model to calculate losses and gradients during model.fit() Open sourced here: https://github.com/harangp/tfjsctcloss Drop me a line if you are interested.

gaikwadrahul8 · 2023-04-30T08:11:30Z

Hi, @marsiancba

Apologize for the delayed response and It seems like at the moment we haven't implemented this feature request so still are you looking this feature ?

@harangp, Would you like to contribute for this feature ? If yes please refer this link, Thank you!

marsiancba · 2023-04-30T17:00:31Z

Hi, @gaikwadrahul8
The part of our project where we used tfjs is stalled atm, but we plan to return to it somewhere in the future.
We used our own implementation (#1759 (comment)) and it seemed to work ok, but native implementation will probably be better/faster.

harangp · 2023-05-02T20:23:33Z

@harangp, Would you like to contribute for this feature ? If yes please refer this link, Thank you!

Yes, in time. I still have to work on the backward and collection part to be native, and I'm not confident enough about the returned gradients. I also want to utilize masks to make things more efficient. And lastly, there's a problem with not being a drop-in compatible with the python version, so I'm not sure it would fit into the TFJS concept.

rthadur self-assigned this Jul 19, 2019

rthadur added the type:feature New feature or request label Jul 19, 2019

rthadur removed their assignment Aug 1, 2019

pyu10055 added the P3 label Aug 2, 2019

dsmilkov added the stat:contributions welcome label Sep 11, 2019

gaikwadrahul8 added the stat:awaiting response label Apr 30, 2023

google-ml-butler bot removed the stat:awaiting response label Apr 30, 2023

gaikwadrahul8 added the comp:core label Aug 10, 2023

gaikwadrahul8 assigned pyu10055 Aug 31, 2023

gaikwadrahul8 added the stat:awaiting tensorflower label Aug 31, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

CTC loss #1759

CTC loss #1759

marsiancba commented Jul 19, 2019

fahimsun commented Sep 10, 2019

WenheLI commented Sep 21, 2019

rthadur commented Sep 23, 2019

marsiancba commented Sep 24, 2019

WenheLI commented Sep 24, 2019

fahimsun commented Sep 24, 2019

marsiancba commented Sep 24, 2019

fahimsun commented Sep 30, 2019

WenheLI commented Oct 1, 2019

jasonmayes commented Dec 21, 2021

harangp commented Jan 12, 2022

gaikwadrahul8 commented Apr 30, 2023

marsiancba commented Apr 30, 2023

harangp commented May 2, 2023

CTC loss #1759

CTC loss #1759

Comments

marsiancba commented Jul 19, 2019

fahimsun commented Sep 10, 2019

WenheLI commented Sep 21, 2019

rthadur commented Sep 23, 2019

marsiancba commented Sep 24, 2019

WenheLI commented Sep 24, 2019

fahimsun commented Sep 24, 2019

marsiancba commented Sep 24, 2019

fahimsun commented Sep 30, 2019

WenheLI commented Oct 1, 2019

jasonmayes commented Dec 21, 2021

harangp commented Jan 12, 2022

gaikwadrahul8 commented Apr 30, 2023

marsiancba commented Apr 30, 2023

harangp commented May 2, 2023