Skip to content
This repository
Browse code

crypto: cast5 - simplify if-statements

I noticed that by factoring out common rounds from the
branches of the if-statements in the encryption and
decryption functions, the executable file size goes down
significantly, for crypto/cast5.ko from 26688 bytes
to 24336 bytes (amd64).

On my test system, I saw a slight speedup. This is the
first time I'm doing such a benchmark - I found a similar
one on the crypto mailing list, and I hope I did it right?

Before:
# cryptsetup create dm-test /dev/hda2 -c cast5-cbc-plain -s 128
Passsatz eingeben: 
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,43484 s, 21,5 MB/s
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,4089 s, 21,8 MB/s
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,41091 s, 21,7 MB/s

After:
# cryptsetup create dm-test /dev/hda2 -c cast5-cbc-plain -s 128
Passsatz eingeben: 
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,38128 s, 22,0 MB/s
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,29486 s, 22,8 MB/s
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,37162 s, 22,1 MB/s

Signed-off-by: Nicolas Kaiser <nikai@nikai.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
  • Loading branch information...
commit 895be15745d59cc7ede0e1c203e3432b0abdb71c 1 parent 90246e7
Nicolas Kaiser nikai3d authored herbertx committed

Showing 1 changed file with 24 additions and 50 deletions. Show diff stats Hide diff stats

  1. +24 50 crypto/cast5.c
74 crypto/cast5.c
@@ -604,36 +604,23 @@ static void cast5_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
604 604 * Rounds 3, 6, 9, 12, and 15 use f function Type 3.
605 605 */
606 606
  607 + t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
  608 + t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
  609 + t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
  610 + t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
  611 + t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
  612 + t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
  613 + t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
  614 + t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
  615 + t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
  616 + t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
  617 + t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
  618 + t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
607 619 if (!(c->rr)) {
608   - t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
609   - t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
610   - t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
611   - t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
612   - t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
613   - t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
614   - t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
615   - t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
616   - t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
617   - t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
618   - t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
619   - t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
620 620 t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]);
621 621 t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]);
622 622 t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]);
623 623 t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]);
624   - } else {
625   - t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
626   - t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
627   - t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
628   - t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
629   - t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
630   - t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
631   - t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
632   - t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
633   - t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
634   - t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
635   - t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
636   - t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
637 624 }
638 625
639 626 /* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and
@@ -663,32 +650,19 @@ static void cast5_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
663 650 t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]);
664 651 t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]);
665 652 t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]);
666   - t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
667   - t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
668   - t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
669   - t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
670   - t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
671   - t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
672   - t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
673   - t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
674   - t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
675   - t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
676   - t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
677   - t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
678   - } else {
679   - t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
680   - t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
681   - t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
682   - t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
683   - t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
684   - t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
685   - t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
686   - t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
687   - t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
688   - t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
689   - t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
690   - t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
691 653 }
  654 + t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
  655 + t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
  656 + t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
  657 + t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
  658 + t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
  659 + t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
  660 + t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
  661 + t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
  662 + t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
  663 + t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
  664 + t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
  665 + t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
692 666
693 667 dst[0] = cpu_to_be32(r);
694 668 dst[1] = cpu_to_be32(l);

0 comments on commit 895be15

Please sign in to comment.
Something went wrong with that request. Please try again.