Skip to content


Subversion checkout URL

You can clone with
Download ZIP
Browse files

john-1.7.9-jumbo-1 (1.7.8-jumbo-8 equivalent)

  • Loading branch information...
commit de989e6e12f48090f1b3dd095ea327ed8a2df30e 1 parent 7894cc8
magnum authored
Showing with 42,194 additions and 420 deletions.
  1. +17 −0 README-jumbo
  2. +26 −0 doc/CONFIG
  3. +1 −1  doc/CONTACT
  4. +2,648 −0 doc/DYNAMIC
  5. +483 −0 doc/DYNAMIC_SCRIPTING
  6. +58 −0 doc/ENCODINGS
  7. +33 −0 doc/EPi.patch.README
  8. +38 −0 doc/HDAA_README
  9. +1 −1  doc/LICENSE
  10. +4 −0 doc/LICENSE.mpi
  11. +122 −0 doc/MARKOV
  12. +143 −0 doc/NETNTLM_README
  13. +101 −7 doc/OPTIONS
  14. +154 −0 doc/README.mpi
  15. +36 −0 doc/RULES
  16. +48 −0 doc/pdfcrack_README
  17. +26 −0 doc/pdfcrack_TODO
  18. +254 −0 run/
  19. +1,073 −0 run/dumb16.conf
  20. +1,429 −0 run/dumb32.conf
  21. +214 −0 run/dynamic.conf
  22. +78 −0 run/genincstats.rb
  23. +183 −67 run/john.conf
  24. +25 −0 run/
  25. +34 −0 run/
  26. +38 −0 run/
  27. +262 −0 run/
  28. +132 −0 run/
  29. +131 −0 run/
  30. +89 −0 run/
  31. +23 −0 run/
  32. +19 −0 run/
  33. +4,986 −0 run/stats
  34. +4 −3 src/AFS_fmt.c
  35. +127 −0 src/BFEgg_fmt_plug.c
  36. +3 −2 src/BF_fmt.c
  37. +4 −3 src/BSDI_fmt.c
  38. +5 −0 src/DES_bs.c
  39. +3 −2 src/DES_fmt.c
  40. +1 −1  src/DES_std.c
  41. +2 −1  src/DES_std.h
  42. +342 −0 src/DMD5_fmt_plug.c
  43. +461 −0 src/DOMINOSEC_fmt_plug.c
  44. +203 −0 src/EPI_fmt_plug.c
  45. +398 −0 src/HDAA_fmt_plug.c
  46. +312 −0 src/IPB2_fmt_plug.c
  47. +291 −0 src/KRB4_fmt_plug.c
  48. +16 −0 src/KRB4_std.h
  49. +134 −0 src/KRB4_std_plug.c
  50. +361 −0 src/KRB5_fmt_plug.c
  51. +56 −0 src/KRB5_std.h
  52. +287 −0 src/KRB5_std_plug.c
  53. +13 −2 src/LM_fmt.c
  54. +170 −12 src/MD5_fmt.c
  55. +10 −1 src/MD5_std.c
  56. +26 −9 src/MD5_std.h
  57. +469 −0 src/MSCHAPv2_fmt_plug.c
  58. +268 −0 src/MYSQL_fast_fmt_plug.c
  59. +244 −0 src/MYSQL_fmt_plug.c
  60. +483 −95 src/Makefile
  61. +393 −0 src/NETLM_fmt_plug.c
  62. +490 −0 src/NETLMv2_fmt_plug.c
  63. +433 −0 src/NETNTLM_fmt_plug.c
  64. +550 −0 src/NETNTLMv2_fmt_plug.c
  65. +344 −0 src/NETSPLITLM_fmt_plug.c
  66. +334 −0 src/NSLDAPS_fmt_plug.c
  67. +287 −0 src/NSLDAP_fmt_plug.c
  68. +307 −0 src/NS_fmt_plug.c
  69. +943 −0 src/NT_fmt_plug.c
  70. +330 −0 src/OPENLDAPS_fmt_plug.c
  71. +180 −0 src/PHPS_fmt_plug.c
  72. +248 −0 src/PO_fmt_plug.c
  73. +258 −0 src/SKEY_fmt.c
  74. +293 −0 src/SybaseASE_fmt.c
  75. +812 −0 src/UnicodeData.h
  76. +314 −0 src/XSHA512_fmt.c
  77. +256 −0 src/XSHA_fmt_plug.c
  78. +74 −0 src/base64.c
  79. +6 −0 src/base64.h
  80. +193 −8 src/bench.c
  81. +277 −0 src/bf_tab.h
  82. +202 −0 src/blowfish.c
  83. +55 −0 src/blowfish.h
  84. +274 −0 src/byteorder.h
  85. +55 −3 src/c3_fmt.c
  86. +173 −0 src/calc_stat.c
  87. +10 −4 src/charset.c
  88. +160 −11 src/config.c
  89. +2 −0  src/config.h
  90. +44 −7 src/cracker.c
  91. +232 −0 src/crc32_fmt_plug.c
  92. +2 −1  src/dummy.c
  93. +281 −0 src/dynamic.h
  94. +6,945 −0 src/dynamic_fmt.c
  95. +670 −0 src/dynamic_parser.c
  96. +841 −0 src/dynamic_preloads.c
  97. +128 −0 src/dynamic_utils.c
  98. +452 −0 src/encoding_data.h
  99. +46 −2 src/external.c
  100. +50 −16 src/formats.c
  101. +30 −6 src/formats.h
  102. +287 −0 src/genmkvpwd.c
  103. +9 −0 src/getopt.c
  104. +88 −0 src/gladman_fileenc.h
  105. +146 −0 src/gladman_hmac.c
  106. +79 −0 src/gladman_hmac.h
  107. +160 −0 src/gladman_pwd2key.c
  108. +58 −0 src/gladman_pwd2key.h
  109. +18 −0 src/gladman_sha1.h
  110. +329 −0 src/hmacMD5_fmt_plug.c
  111. +165 −0 src/hmacmd5.c
  112. +52 −0 src/hmacmd5.h
  113. +260 −0 src/hmailserver_fmt.c
  114. +15 −3 src/idle.c
  115. +84 −1 src/inc.c
  116. +35 −0 src/john-mpi.c
  117. +15 −0 src/john-mpi.h
  118. +239 −17 src/john.c
  119. +130 −98 src/loader.c
  120. +24 −0 src/loader.h
  121. +66 −7 src/logger.c
  122. +3 −1 src/logger.h
  123. +248 −0 src/lotus5_fmt_plug.c
  124. +262 −0 src/md4.c
  125. +30 −0 src/md4.h
  126. +253 −0 src/md4_gen_fmt_plug.c
  127. +386 −0 src/md5-mmx.S
  128. +289 −0 src/md5.c
  129. +65 −0 src/md5.h
  130. +328 −0 src/md5_eq.c
  131. +275 −0 src/md5_go.c
  132. +20 −0 src/md5_go.h
  133. +181 −0 src/mediawiki_fmt_plug.c
  134. +110 −3 src/memory.c
  135. +19 −0 src/memory.h
  136. +55 −21 src/misc.c
  137. +17 −4 src/misc.h
  138. +394 −0 src/mkv.c
  139. +17 −0 src/mkv.h
  140. +191 −0 src/mkvcalcproba.c
  141. +222 −0 src/mkvlib.c
  142. +36 −0 src/mkvlib.h
  143. +958 −0 src/mscash1_fmt_plug.c
Sorry, we could not display the entire diff because it was too big.
17 README-jumbo
@@ -0,0 +1,17 @@
+The jumbo patch, which has been applied to this source tree of John the
+Ripper, adds a lot of code, documentation, and data contributed by the
+user community. This is not "official" John the Ripper code. It is
+very easy for new code to be added to the jumbo patch: the quality
+requirements are low. This means that you get a lot of functionality
+that is not "mature" enough or is otherwise inappropriate for the
+official JtR, which in turn also means that bugs in this code are to be
+expected, etc.
+If you have any comments on this release or on JtR in general, please
+join the john-users mailing list and post in there.
+Licensing info:
+How to contribute more code:
26 doc/CONFIG
@@ -118,6 +118,32 @@ appear in the charset file. Just list all those characters here and
they will be added, but considered the least probable.
+ "include" syntax within the john.conf file.
+At version 1.7.8-jumbo6 (and later), include syntax was added to john.conf
+file format. The syntax is one of these 3 types:
+.include "file"
+.include <file>
+.include [section]
+The include of a file, will load a file, and drop it right into where
+the .include is located at. This is similar to how #include "" and
+#include <> works in the C language. Some of the larger sections in
+the jumbo john, have been moved out of the 'john.conf' file, into their
+own .conf file (the dumb16/32 and the generic scripting sections)
+the .include [section], will take all of the lines found within [section]
+and place them into the current 'building' section. Thus, things like
+numerous small rules sections can be made, and then a couple of 'super'
+rules sections can be created, which will .include several of these
+smaller rules sections to make the body of this rules section. Also
+it can be used to load common information among several incremental
+sections, or a common init function (or other functions), in a pair
+of extern sections.
+See [Incremental:All7] and several others around there for examples.
Defining an external mode.
2  doc/CONTACT
@@ -22,7 +22,7 @@ Commercial support for John the Ripper is available from Openwall:
Solar Designer <solar at>
$Owl: Owl/packages/john/john/doc/CONTACT,v 1.3 2006/01/02 05:00:42 solar Exp $
2,648 doc/DYNAMIC
2,648 additions, 0 deletions not shown
@@ -0,0 +1,483 @@
+For most hashes where MD5 is used, building a proper md5 format is likely
+not the best bet overall. A format is not trivial. It requires maintainance
+and will likely requires specific enhancements to get it to perform
+optimally on all hardware. Likely there will need to be 'generic' C
+code done, then it will need code to tie it into CPU specific optimizations,
+such as SSE, MMX, intrisic SSE, GPU, ... ... ... This will also mean that
+to stay up to date, the format will require ongoing work and mainainance.
+However, there is one format which may reduce a lot of this maintainance
+work to very little. Now, that format itself will need to be kept up to
+date, but any formats that are built upon its internal workings. That
+format is $dynamic$. In this 'format', there is a scripting language, where
+a format developer only need to describe the actual operations properly,
+and the format is 'done', and working.
+This document will go over how to 'build' a format that uses this $dynamic$
+format, how to optimize it to work faster, and how to build a 'thin'
+quasi format which insulates the end user from the $dynamic$ format line
+**** Introduction ****
+To start off with, a little background on 'how' and 'where' to build the
+scripts that run $dynamic$, what interanal data structures are available to
+be used.
+The 'where' which a format developer can easily build into john, is to add a
+new $dynamic$ format 'script', into john.ini file (john.conf). This
+file usually is located in the current directory where john is run out
+of (but the --config=file can override the default behavior). Within the
+john.conf, a new 'section' can be added for a md5 genercic format. The
+new 'section' will be set by using this section naming:
+You replace the NUM with the sub-format number (from 1001 to 9999).
+Pick a number that is not used.
+Within this 'section', there will be multiple lines added. These lines
+are primarily of the form: Type=Value
+The actual contents of these scripts will be addressed later. That will
+be the 'How', and preforming this is actually outside of the intro section.
+The 'Data' and runtime information is this:
+Inside of the $dynamic$ format, there are 2 input buffers (actually ALL data
+is arrays of 128 of each buffer type). There is input1 and input2 buffers.
+The main operations on these buffers is to clear them, and to append data,
+to build string which will later be md5 hashed.
+There are also 2 output buffers. These buffers will receive the md5 hashing
+from the 2 input buffers. NOTE, when the format processing is complete, the
+results MUST be placed into output1 buffer. This is where all of the comparison
+functions will check against.
+In the format, there is a salt (if the format is salted). There may also be
+a second salt value.
+There are also 'keys' value(s). These are the passwords being tested at this
+given time.
+There are also 8 'constant' strings which can be used within a format. A
+format such as md5-po has a couple of constants within it.
+There are also numerous optimization 'flags' which do special things when
+loading keys or salts, and there are numourous special 'optimization' primative
+functions within the format, for speedup of certain operations.
+**** Simple format building ****
+We will start out with a few simple formats, and simply 'show' how to build
+a straight forward script. The scripts may or may not be optimal. Later
+we will optimize these somewhat. When building the formats here, there will
+be comments interspersed, listing just what is being done, and why.
+we will build these formats:
+dynamic_1030 md5($p.$p)
+dynamic_1031 md5($s.md5($p).$p)
+dynamic_1032 md5(md5($s).md5($p).$p)
+Expression=dynamic_1030: md5($p.$p)
+Here is the exact same format, with some comments added, describing the
+sub-sections, and exactly what is being done.
+#first line is the section name. It MUST be of the format shown.
+#the next line, is a required line. It serves 2 purposes. It is output
+#in john, when the format 'starts'. Also, the dynamic_# art is used
+#to destinguish this exact format (so the command line of --sub=dynamic_1030
+#would specify this and only this format)
+Expression=dynamic_1030: md5($p.$p)
+#This is the set of functions. This is the ONLY section of the format
+#where order IS important. The functions ARE handled one after the
+#other, from top to bottom, to perform the string operations, and md5
+#operations which are needed to perform the hash of this format
+#The functions ARE a required part of the format.
+#first step, clean the input. All work for this format is done using
+#only input 1 and output 1 buffers.
+#Step 2, append the keys. Note, the buffer is clean, so this is simply
+#the same as Input=keys (but required 2 steps, the clean and append keys).
+#Step 3, append keys again (the format is ($p.$p) or keys appended to keys.
+#Step 4, final step performs md5 of $p.$p This will properly leave the
+#results in output1
+#This is test string. These ARE required. You can provide more than
+#one. 5 or 6 are best, to make sure the format is valid.
+# There are also TestA= and TestU= lines. The TestA= lines are ONLY loaded
+# if --encoding=utf8 is NOT set on the command line (not running in utf8 mode),
+# and the TestU= lines are only loaded IF the --encoding=utf8 command is used.
+Ok, here is the second format. The format being done is md5($s.md5($p).$p)
+Here are a few comments about this format:
+1. There is a Flag= value. This is because this is a Salted format. This
+2. We only use input 1 and output 1.
+3. There are a couple of calls to crypt (md5). The first simply gets
+ md5($p) and puts it into output1, which will later be appeneded in
+ base-16 format as we build our string.
+4. After the first crypt (md5), we clear our input buffer, then put
+ the salt in, append the base-16 of md5($p), and then append $p
+5. Finally, and call to crypt is done, which leaves the results in
+ output1, so the rest of the $dynamic$ format can properly compare it.
+Expression=dynamic_1031: md5($s.md5($p).$p)
+Now, here is the final format: md5(md5($s).md5($p).$p)
+Expression=dynamic_1032: md5(md5($s).md5($p).$p)
+Ok, now that these have been built, here are a few 'benchmarks' listing
+that they are WORKING, and what speed they are working:
+Here is MinGW build 'x86'
+john_x86 -test -for=$dynamic$ -sub=dynamic_1030
+Benchmarking: dynamic_1030: md5($p.$p) [128x1 (MD5_Go)]... DONE
+Raw: 3530K c/s
+john_x86 -test -for=$dynamic$ -sub=dynamic_1031
+Benchmarking: dynamic_1031: md5($s.md5($p).$p) [128x1 (MD5_Go)]... DONE
+Many salts: 1945K c/s
+Only one salt: 1890K c/s
+john_x86 -test -for=$dynamic$ -sub=dynamic_1032
+Benchmarking: dynamic_1032: md5(md5($s).md5($p).$p) [128x1 (MD5_Go)]... DONE
+Many salts: 1016K c/s
+Only one salt: 1031K c/s
+Here is MinGW build of SSE2
+john_sse2 -test -for=$dynamic$ -sub=dynamic_1030
+Benchmarking: dynamic_1030: md5($p.$p) SSE2 [SSE2 32x4 (.S)]... DONE
+Raw: 7250K c/s
+john_sse2 -test -for=$dynamic$ -sub=dynamic_1031
+Benchmarking: dynamic_1031: md5($s.md5($p).$p) SSE2 [SSE2 32x4 (.S)]... DONE
+Many salts: 5065K c/s
+Only one salt: 4436K c/s
+john_sse2 -test -for=$dynamic$ -sub=dynamic_1032
+Benchmarking: dynamic_1032: md5(md5($s).md5($p).$p) SSE2 [SSE2 32x4 (.S)]... FAILED (get_hash[0](0))
+Here is some timings to check against:
+john_x86 -test -for=$dynamic$ -sub=dynamic_0
+Benchmarking: dynamic_0: md5($p) (raw-md5) [128x1 (MD5_Go)]... DONE
+Raw: 4005K c/s
+john_sse2 -test -for=$dynamic$ -sub=dynamic_0
+Benchmarking: dynamic_0: md5($p) (raw-md5) SSE2 [SSE2 32x4 (.S)]... DONE
+Raw: 10740K c/s
+**** Optimizations of prior formats ****
+For format 1030, the speed should be very close to that of dynamic_0.
+In both formats, there is only 1 call to md5(). However, we are seeing that the
+(1030) is slower than (0). The explanation of this, is that the (0) format has
+an optimization used, which we can not use in the (1030). The (1030) is likely
+about as optimal as it can be made in the current $dynamic$ format. The optimization
+for format (0) is: Flag=MGF_KEYS_INPUT What that does, is to place the keys
+directly into the input field, and then later, when john gets the keys back (it
+does this if a hash is cracked), john gets them from the input. In the (1030)
+format, we load the keys, into the 'keys' arrays. We then have to call a function
+to clean input buffer 1, and to append the keys (twice). Thus, what we have is
+additional memory movement, and that slows things down. However, to use the
+MGF_KEYS_INPUT optimization, we would have had to keep the input1 buffer prestine
+and ONLY put in the keys (passwords). Since we had to append the keys twice,
+we simply 'blew' that requirement, and thus, could NOT use it. At a later
+time, we will show a format WHERE we can use this optimization.
+For format 1031, there also appears to be no optimizations available.
+For 1032, there are optimizations. In this format, we notice that we have
+this sub expression: md5($s). Well, there is an optimization, which when it
+loads the input file, it converts all salts into md5($s) and uses that value
+instead. So, at startup time, we perform md5 hashes of all salts, but at
+runtime, we simply place the salt into the building string, instead of performing
+a MD5 on the salt. So, in the 1032, we had 3 calls to crypt. By using this
+optimization, we can reduce that to 2 crypts. The starting format is:
+md5(md5($s).md5($p).$p) This optimization makes the format 'behave' at
+runtime, like it is md5($s.md5($p).$p), which was format 1031. Note, after
+we make this optimzation, the timings will be almost identical to the 1031
+timings. Also note, the Test string for 1032 and 1042 are exactly the
+same. These are the same formats. It is just that 1042 performs fewer
+crypt calls per test. Also note, in the 'original' run of SSE2, the 1032
+format failed. This failure, is due to the SSE2 / MMX code only working
+for strings up to 54 bytes (optimization reason). The length of this string:
+md5($s).md5($p) is 64 bytes by itself, and we also append $p to that. Thus,
+our string is OVER 54 bytes in length, and thus, can not be used in SSE2
+mode. We do have a couple work arounds for this, to get it working properly
+on SSE2 builds. We can use a flag which simply stops SSE2 dead in its tracks
+(and preforms all work using x86 code). This is flag MGF_NOTSSE2Safe
+Expression=dynamic_1042: md5(md5($s).md5($p).$p)
+Once the above changes have been done, here are the speeds:
+john_x86 -test=5 -for=$dynamic$ -sub=dynamic_1031
+Benchmarking: dynamic_1031: md5($s.md5($p).$p) [128x1 (MD5_Go)]... DONE
+Many salts: 2007K c/s
+Only one salt: 1913K c/s
+john_x86 -test=5 -for=$dynamic$ -sub=dynamic_1032
+Benchmarking: dynamic_1032: md5(md5($s).md5($p).$p) [128x1 (MD5_Go)]... DONE
+Many salts: 1052K c/s
+Only one salt: 1030K c/s
+john_x86 -test=5 -for=$dynamic$ -sub=dynamic_1042
+Benchmarking: dynamic_1042: md5(md5($s).md5($p).$p) [128x1 (MD5_Go)]... DONE
+Many salts: 1420K c/s
+Only one salt: 1372K c/s
+john_sse2 -test=5 -for=$dynamic$ -sub=dynamic_1042
+Benchmarking: dynamic_1042: md5(md5($s).md5($p).$p) SSE2 [128x1 (MD5_Go)]... DONE
+Many salts: 1416K c/s
+Only one salt: 1372K c/s
+We can also perform even more optimizations in the format. What we do in this format, is we
+md5 the salt (when we first load the file). Thus the salts which john works with, are really
+md5($s) (same as we did in format 1042). Then we use a different flag, which puts the
+md5($p) into offset 32 of input1 (where we want it). Then we simply overwrite the data in
+input 1 with the salt (which is md5($s) in base-16 format), then force set length to 64, then
+append the keys, then crypt.
+Expression=dynamic_1052: md5(md5($s).md5($p).$p)
+Here are the benchmarks for the above format:
+john_x86 -test=5 -for=$dynamic$ -sub=dynamic_1052
+Benchmarking: dynamic_1052: md5(md5($s).md5($p).$p) [128x1 (MD5_Go)]... DONE
+Many salts: 2251K c/s
+Only one salt: 1369K c/s
+john_sse2 -test=5 -for=$dynamic$ -sub=dynamic_1052
+Benchmarking: dynamic_1052: md5(md5($s).md5($p).$p) SSE2 [128x1 (MD5_Go)]... DONE
+Many salts: 2251K c/s
+Only one salt: 1369K c/s
+Now, note the speed for 'many salts'. It is very close to the speed of (1031), actually faster.
+This speed is the speed john will have for a normal password cracking, where you have dozens (or
+hundreds, or 1000's) of password hashes to crack.
+To understand WHY this format is this much faster (the 'Many salts', is the normal way to
+benchmark the speed of a salted hash), is to understand what is happening under the hood within
+john's 'crypt all' loop.
+ while (!feof(password_file)) {
+ for (i = 0 to max_num_passwords)
+ SetKey(i, getnextpassword(password_file));
+ if (salted)
+ {
+ while (z<salt_count)
+ {
+ SetSalt(salt[z]);
+ crypt_all
+ for (all_binaries_for_salt[z])
+ CheckForMatched(binary)
+ }
+ }
+ }
+The above code is certainly not 'exact', but should show close enough, the algorithm used
+within john. Now, the algorithm as used within $dynamic$ will be shown (specifically for the
+flag MGF_KEYS_BASE16_IN1_Offset32).
+ - SetKey() is called numerous times. This will set a 'dirty flag' for the keys inside of $dynamic$.
+ - SetSalt() will be called. The salt handed to us is actually md5($s), since MGF_SALT_AS_HEX is set
+ The SetSalt() calls are happening within the 'while(z<salt_count)' loop in john.
+ - crypt_all is called.
+ Within crypt_all, $dynamic$ knows that we want the base-16 md5($p) to be placed at offset 32
+ within input1. So the first call to crypt_all (for the first salt), will cause the md5($p)
+ to be computed, and to be placed at offset 32.
+ Then the script will overwrite the starting bytes of input1 with the 32 bytes of the salt,
+ then the length is set to 64, then the key is appened, then a crypt, and then comparisons.
+ - NOW, we are at the next loop within the 'while(z<salt_count)'.
+ - Then john loads the next salt [ SetSalt() ].
+ - Then john calls crypt_all.
+ At this time, there have been NO additional SetKey() calls. Thus, $dynamic$ knows that the
+ base-16 text of md5($p) is STILL located at offset 32 of Input1. So, the format DOES NOT
+ perform this crypt again (until new SetKey() function calls happen).
+ - This SetSalt .. crypt_all .. compare continues until all salts are tested. However, there
+ will be no crypt calls to md5($p) again, UNTIL the working code within john calls SetKey()
+ again (when starting with new passwords, after all salts have been checked).
+Now, in the final format, we start from 1042, and do NOT turn off the sse2 code. What we do, is
+to turn off SSE2 when it is not valid. This will generate x86 code (generic) that runs exactly
+the same as in 1042 (the 2 function calls of DynamicFunc__SSEtoX86_switch_output1 and
+DynamicFunc__X86toSSE_switch_output1 are no-ops in x86 builds). However, in SSE mode,
+the first crypt will be done using SSE. Thus, as we see, the speed went from 1420k, up
+to almost 1800k. But note, this is NOT as fast as format 1052, for 'many' salts.
+Expression=dynamic_1062: md5(md5($s).md5($p).$p)
+john_sse2 -test=5 -for=$dynamic$ -sub=dynamic_1062
+Benchmarking: dynamic_1062: md5(md5($s).md5($p).$p) SSE2 [SSE2 32x4 (.S)]... DONE
+Many salts: 1792K c/s
+Only one salt: 1715K c/s
+So all in all, 1032, 1042, 1052, 1062 were all equivalent (1032 was not, since it fails in
+SSE2 builds, but that was 'fixed' in 1042). They all run using differing sets of flags, differing
+sets of Function primatives, and have different runtime speeds. However, in the end, they all
+Now, the above format 1062 is slower than 1052. This is due to the final crypt still having to be
+done in x86 mode. However, in 1062, we crypt EVERY password for each salt. Thus you can see there
+is no speed gain between many salts, and 1 salt. Yes, the md5($p) IS done using SSE2 which is much
+faster, but in version 1052, when there are multiple salts, the slower md5($p) is done only 1 time
+per password.
+Now, the flag MGF_KEYS_BASE16_IN1_Offset32 (or other flags like it), CAN be used in SSE2 to
+get much faster behavior, however, it has to be in a format that IS SSE2 friendly. Here
+is an example:
+md5(md5($p).$s) In this format, we CAN build an SSE2 friendly format, that is VERY fast.
+For this test, we will set the salt length to a fixed size of 12.
+Here is a very easy to read, but also very far from optimal format for the above type:
+Expression=dynamic_1033: md5(md5($p).$s)
+john_x86 -test -format=$dynamic$ -subf=dynamic_1033
+Benchmarking: dynamic_1033: md5(md5($p).$s) [128x1 (MD5_Go)]... DONE
+Many salts: 1918K c/s
+Only one salt: 1889K c/s
+john_sse2 -test -format=$dynamic$ -subf=dynamic_1033
+Benchmarking: dynamic_1033: md5(md5($p).$s) SSE2 [SSE2 32x4 (.S)]... DONE
+Many salts: 5479K c/s
+Only one salt: 4922K c/s
+Here is a MUCH more optimal version (1043). This version will use the flag
+MGF_KEYS_BASE16_IN1 to load the md5($p) into input 1, at the start of that string. That
+will ONLY be done, if there is a SetKeys() change. Then we simply set the input length
+to 32, append the salt, and call crypt.
+Expression=dynamic_1043: md5(md5($p).$s)
+john_x86 -test -format=$dynamic$ -subf=dynamic_1043
+Benchmarking: dynamic_1043: md5(md5($p).$s) [128x1 (MD5_Go)]... DONE
+Many salts: 4128K c/s
+Only one salt: 1890K c/s
+john_sse2 -test -format=$dynamic$ -subf=dynamic_1043
+Benchmarking: dynamic_1043: md5(md5($p).$s) SSE2 [SSE2 32x4 (.S)]... DONE
+Many salts: 13096K c/s
+Only one salt: 4834K c/s
+So in this case, we see that the 'only 1 salt' speed is pretty much a wash. However, the
+'many salts' speed, has gone from 1900k to 4100k for non-sse, and from 5500k to 13100k.
+NOTE, the above format is actually dynamic_6 (also dynamic_7) format.
@@ -0,0 +1,58 @@
+This version of John is UTF-8 and codepage aware, using a new command line
+argument. In short, this means "my wordlists and input files are encoded in
+UTF-8" (or CP1252 etc). It does NOT mean you can feed John with eg. a UTF-8
+encoded wordlist when cracking LM. It DOES make John recognise national vowels,
+lower or upper case characters, etc. when specifying a codepage.
+Currently supported encodings: utf-8, iso-8859-1 (or ansi), iso-8859-7,
+iso-8859-15, koi8-r, cp437, cp737, cp850, cp858, cp866, cp1251, cp1252 and
+cp1253. New encodings can be added with ease, using automated tools that rely
+on the Unicode Database (see Openwall wiki).
+Example usage:
+ john hashes.txt --wordlist:spanish.dic --encoding:iso-8859-1 --rules
+The behaviour is a little different depending on whether the format use Unicode
+(UTF-16 or UCS-2) internally or not.
+ * For Unicode formats (eg. NT, mssql, etc), you can pick any of the encodings
+ and the input candidates will be properly converted to UTF-16.
+ * For non-Unicode formats (eg. DES, LM, MD5 etc), no actual conversion will
+ be made so you need to use an encoding that matches what the hashes were
+ made from. But you should still tell John about it so it can handle national
+ characters properly.
+You can convert your wordlists from/to a large number of formats, using for
+example iconv(1). It may be a good idea to include the encoding in the filename,
+eg. "greek.8859-7.dict" or "rockyou.utf8.lst".
+You can also convert on the fly, example:
+$ iconv < greek.8859-7.dict -f iso-8859-7 -t cp737 | ./john -pipe -enc:cp737 ...
+The traditional behavior, and what is still happening if you don't specify an
+encoding, is that John will assume ISO-8859-1 when converting plaintexts or
+salts to UTF-16 (this also happens to be very fast), and assume ASCII in most
+other cases (it will use 8-bit candidates as-is, but not upper/lower-case them
+or recognise letters etc).
+Some new reject rules and character classes are implemented, see doc/RULES.
+Note that UTF-8 is not handled well in rules. Some wordlist rules may cut
+UTF-8 multibyte sequences in the middle, resulting in garbage. You can reject
+such rules with -U to have them in use only when --encoding=utf8 is not used.
+Also, the recognition of non-ASCII characters for upper/lower-casing etc does
+not work with UTF-8 in Rules engine. Formats that internally uppercase
+it's plaintexts, like old MSSQL, will uppercase properly though.
+Beware of UTF-8 BOM's. They will cripple the first word in your wordlist.
+These contributions to John are hereby placed in the public domain. In case
+that is not applicable, they are Copyright 2009, 2010, 2011 by magnum and
+JimF and hereby released to the general public. Redistribution and use in
+source and binary forms, with or without modification, is permitted.
33 doc/EPi.patch.README
@@ -0,0 +1,33 @@
+= Intro
+EPiServer is a popular webbased content management system from Elektropost (
+You can dump the password hashes using the SQL syntax "select name, salt, hash from tblSID". The tblSID
+tabel stores interesting things such as usernames, salt and password hashes, but also passwords in cleartext.
+If a password can be found in cleartext it is found in the password column of tblSID.
+= Install
+Copy the epibf_X.Y-john_1.7.2.patch (where X and Y needs to be replaced with the version you downloaded)
+to your john source directory, e.g. john-1.7.2/src and then run "patch -p2 < epibf_X.Y-john_1.7.2.patch" (remember the X and Y).
+The patch will create a file called EPI_fmt.c, some files for SHA1 support as well as update some of johns
+files in order to incorporate the patch with john.
+= Usage
+This patch needs the format of the password file to be: <user>:<salt> <hash>. (Currently you need to include
+an inital 0x of both salt and hash.)
+--- Contents of an example epipasswd file ---
+webadmin:0x6631F625DEC28716FC24FA3CC1B3E2055E4281F4465226905C10D3456035 0x4F25D9BD24B81D85B1F2D106037C71CD2C828168
+epiuser:0x48F9BA13F54CE7AF669C76EEBC6BEA4564EBB77F1866CA5F2B297F7159C1 0xDA4260812C195025B4442C5C84E0F890122B285A
+-------------- End --------------------------
+You can then run "john epipasswd", the format will be autodetected.
+In case you'd like to check the performance of the patch try "john --test --format:epi".
@@ -0,0 +1,38 @@
+ HTTP Digest access authentication
+ ---------------------------------
+- How to create the password string :
+'$' is use as separator, you can change it in HDAA_fmt.c
+Example of password string :
+Here the magic is '$response$'
+- Demonstration :
+Tested on a : AMD Athlon(tm) 64 Processor 3000+
+$ cat ./htdigest
+$ ./john ./htdigest
+Loaded 2 password hashes with 2 different salts (HTTP Digest access authentication [HDAA-MD5])
+kikou (moi)
+nocode (user)
+guesses: 2 time: 0:00:01:27 (3) c/s: 670223 trying: nocode
2  doc/LICENSE
@@ -36,7 +36,7 @@ in the public domain.
Commercial licenses (non-GPL) are available upon request.
Alexander Peslyak aka Solar Designer <solar at>
$Owl: Owl/packages/john/john/doc/LICENSE,v 1.12 2011/06/22 13:03:43 solar Exp $
4 doc/LICENSE.mpi
@@ -0,0 +1,4 @@
+This patch for John the Ripper MPI implementation is a continuation of
+Ryan Lim's original patch against 1.6.x, later maintained by John
+Anderson at, and modified by AoZ and magnum. It is
+licensed under the same terms as John the Ripper itself.
122 doc/MARKOV
@@ -0,0 +1,122 @@
+The Markov mode is based from [1], tested and applied to "classical" password
+cracking in [2]. This mode similar to the "wordlist" mode because it will only
+crack a fixed quantity of passwords. Its parameters are:
+* LEVEL is the "Markov level". This value is the maximum strength of passwords
+that are going to be cracked. When LEVEL increases, the quantity of passwords
+that are going to be tested increases exponentially.
+* START is the index of the first password that is going to be tested, starting
+with 0.
+* END is the index of the last password that is going to be tested. When it is
+set to 0, it will represent the last possible password.
+* LENGTH is the maximum length of the tested passwords.
+using --markov:100:0:0:12 will let john check every password whose length is 12
+or less and whose "Markov strength" is 100 or less.
+The "LEVEL" parameter should be selected based on the desired maximum running
+time. In order to select the appropriate LEVEL, the following steps should be
+1/ Run the -single and -wordlist modes of john, as they will find many passwords
+for a low price
+2/ Run john with a low markov level on the file, using the time utility. For
+time john -markov:180 test
+Loaded 156 password hashes with no different salts (NT LM DES [128/128 BS SSE2])
+Warning: MaxLen = 12 is too large for the current hash type, reduced to 7
+MKV start (lvl=180 len=7 pwd=30449568)
+guesses: 0 time: 0:00:00:10 99% c/s: 475013K trying:
+real 0m10.707s
+user 0m10.621s
+sys 0m0.012s
+This means that john can test 2.8M (30449568/10.707) passwords / seconds. It
+should be noted that with salted passwords the cracking speed will increase with
+every cracked password. This number should be corrected based on the experience
+of the user.
+3/ Evaluate the quantity of passwords that could be cracked during the selected
+time. Using the previous example, a cracking time of 3 hours will lead to a
+quantity of passwords of 30714M passwords (30449568/10.707*3600*3).
+4/ Use the genmkpwd command to find the corresponding level. Using the previous
+example, with a maximum password length of 12 (stupid because LM has a maximum
+length of 7 ...):
+genmkvpwd stats 0 12
+lvl=245 (5904 Kb for nbparts) 26 G possible passwords (26528306250)
+lvl=246 (5928 Kb for nbparts) 29 G possible passwords (29373638087)
+lvl=247 (5952 Kb for nbparts) 32 G possible passwords (32524537496)
+Here, the selected level will be 246 (the higher level where the number of
+possible passwords is less than 30714M).
+5/ Run john:
+john -markov:246:0:0:12 test
+The START and END parameter could be used to distribute work among many CPUs.
+The preferred method is to evaluate the combined cracking speed of all CPUs
+(adding the step 2 result for every CPUs available) and follow the previous
+At step 5, share the cracking space among all CPUs, where is share is
+proportionnal with the CPU's cracking speed.
+New options are available in the john.conf file:
+Statsfile - This is the path of the "stat" file.
+MkvLvl - the default level
+MkvMaxLen - the default length
+The markov mode is based on statistical data from real passwords. This data is
+stored in the "stat" file. In order to generate a custom stat file, it is
+recommanded to use the new calc_stat command:
+./calc_stat "dictionnary file" stats
+This program is used to generate statistics about cracked passwords. It accepts
+as input the "stat" file and a file with a single cracked password per line.
+Here is a sample output:
+./mkvcalcproba stats /tmp/passwordlist
+test 33+16+28+20 97 4 40030907 45
+password 29+16+30+22+51+25+24+30 227 8 2698006565378672 177
+32'[[! 55+24+98+1000+23+29 1229 6 39949021871 1169
+charsetsize = 92
+Its output is tab separated and should open nicely in spreadsheets. Here is the
+meaning of the column:
+1/ Cracked password, reprinted from the file
+2/ Sum of all "markov probabilities" of every letter of the word. This is
+supposed to help identify which parts of the password makes them strong. The
+number "1000" is written when no 1st/2nd letter combinations were found in the
+stat file (for exemple ' then [ here).
+3/ Markov strength
+4/ Password length
+5/ Rank when bruteforced "stupidly" (a, b, c, ..., aa, ab, ac ...) considering
+that letters are ordered given their appearance probability and the given
+charsetsize (92)
+6/ Markov strength of the password where the two first letters are removed
@@ -0,0 +1,143 @@
+LM/NTLM Challenge / Response Authentication
+JoMo-Kun (jmk at foofus dot net) ~ 2010
+Microsoft Windows-based systems employ a challenge-response authentication
+protocol as one of the mechanisms used to validate requests for remote file
+access. The configured/negotiated authentication type, or level, determines how
+the system will perform authentication attempts on behalf of users for either
+incoming or outbound requests. These requests may be due to a user initiating a
+logon session with a remote host or, in some cases, transparently by an
+application they are running. In many cases, these exchanges can be replayed,
+manipulated or captured for offline password cracking. The following text
+discusses the available tools within the John the Ripper "Jumbo" patch for
+performing offline password auditing of these specific captured challenge-
+response pairs.
+Why might these exchanges be of interest? A primary point of most penetration
+tests is to find avenues through which the assessor can gain unauthorized access
+to some resource. This often relies on the compromise of a system's local
+accounts or the exploitation of some service-level vulnerability. The ability to
+capture on-the-wire authentication exchanges and to crack the associated
+password adds another option to the mix. The fact that these exchanges can be
+cracked aids in demonstrating to clients why one authentication algorithm may be
+preferred to another.
+A given server is likely to use one of the following protocols for
+authentication challenge-response: LMv1, NTLMv1, LMv2 or NTLMv2. It should be
+noted that these protocols may use the LM and NTLM password hashes stored on a
+system, but they are not the same thing. For an excellent in-depth discussion of
+the protocols see the Davenport paper entitled "The NTLM Authentication Protocol
+and Security Support Provider" [1]. For the purposes of this discussion, the key
+item of note is that the LMv1 and NTLMv1 protocols consist of a only a single
+server challenge. This allows an attacker to force a client into authenticating
+using a specific challenge and then attack that response using precomputed
+Rainbow Tables.
+There are a variety of methods for capturing challenge-response pairs, including
+the use of tools such as MetaSploit and Ettercap. The author's preferred method
+is to use a modified version of Samba[2]. The provided patch sets the server's
+challenge to a fixed value (i.e. 0x1122334455667788) and logs all authentication
+attempts in a format suitable for use with John. The patch also includes a
+modification to the nmbd application. Nmbd is used to respond to broadcast
+requests for NetBIOS name/IP information. The modified service simply responds
+to all requests with its own IP address, often resulting in hosts unknowingly
+authenticating to the wrong system. Another common method of forcing systems to
+authenticate to the Samba server is through the use of HTML image source tags.
+For example, simply inserting the tag "<img src=file://>"
+into a HTML message will cause some email client applications to automatically
+perform an authentication attempt. Other examples include the use of specialized
+desktop.ini files and many other mischievous tricks.
+It is also worth noting that these challenge/response protocols are not limited
+to the Microsoft File and Print Services. For example, Cisco's LEAP wireless
+security mechanism, EAP-PEAP and PPTP all utilize a MS-CHAP handshake, or
+modified variant. The NTLMv1 challenge/response set can be extracted from this
+exchange and subjected to a brute-force guessing attack. Further discussion on
+this subject is outside of the scope of this write-up, but would certainly
+reveal numerous additional uses.
+The LMv1 challenge-response mechanism suffers a number of technical limitations.
+As previously noted, only a server challenge is used. This means that if the
+challenge is set to a constant value, a given password will always result in
+the same client authentication response. This allows for the precomputation of
+password / LMv1 responses and their subsequent retrieval using tools such as
+To further exacerbate the issue, the LM hash used during the generation of the
+LMv1 response converts a password into (at most) two 7 character upper-case
+passwords. The LM hash is then split into three pieces prior to calculating the
+LMv1 response. This process greatly reduces the size of the Rainbow Tables which
+need to be calculated in order to break a given password. For example, the
+so-called "halflmchall" tables widely available on the Internet utilize only the
+first third of the LMv1 response to break the first 7 characters of the
+respective password. The script discussed in this document can be used
+to attempt to break the remaining characters of the password and its original
+case-sensitive version. The following is an example of cracking a captured
+LMv1/NTLMv1 challenge/response set.
+Example LMv1/NTLMv1 Challenge/Response (.lc Format):
+LMv1 Response: 5237496CFCBD3C0CB0B1D6E0D579FE9977C173BC9AA997EF
+NTLMv1 Response: A37C5C9316D9175589FDC21F260993DAF3644F1AAE2A3DFE
+Server Challenge: 112233445566778
+RainbowCrack look-up of password's first 7 characters (upper-cased) using first
+third (8 bytes) of LMv1 response:
+% rcrack halflmchall/*.rt -f 5237496CFCBD3C0C
+Result: CRICKET
+First Pass (Crack Remaining Characters):
+% --file --seed CRICKET
+Result: CRICKET88!
+Second Pass (Determine Case Sensitive Password)[a]:
+% --file
+Result: Cricket88!
+[a] Note that the case-sensitive password will be shown about a third through
+the script's output following the text: "Performing NTLM case-sensitive crack
+for account".
+The following is an example of cracking a captured NTLMv1 challenge/response. If
+the LMv1 and NTLMv1 response hashes within a given client response are
+identical, it typically means one of two things: either the client machine is
+configured to send only a NTLMv1 response (e.g. LAN Manager Authentication Level
+Group Policy Object set to "Send NTLM response only"), or the user's password is
+greater than 14 characters. If the password is indeed over 14 characters in
+length, it is unlikely a suitable Rainbow Table set is available and brute-force
+guessing will be exhaustively time-consuming.
+Example NTLMv1 Challenge/Response (.lc Format):
+John Usage:
+% john -format:netntlm
+The LMv2 and NTLMv2 challenge/response protocols both employ unique client
+challenges. This additional data effectively defeats the ability to precompute
+password/response pairs via Rainbow Tables. It should also be noted that
+despite its name, the LMv2 response is computed using a NTLM hash. This results
+in a much harder-to-crack response hash, as the password was not truncated to
+seven characters or upper-cased during the process.
+The use of NTLMv2 is now the default policy within Microsoft Windows Vista and
+Windows 7. Its use can be enforced for older versions via the LAN Manager
+Authentication Level Group Policy Object ("Send NTLMv2 response only" (level 3
+or higher)).
+Example LMv2 Challenge/Response (.lc Format):
+John Usage:
+% john -format:netlmv2
+Example NTLMv2 Challenge/Response (.lc Format):
+John Usage:
+% john -format:netntlmv2
108 doc/OPTIONS
@@ -1,4 +1,5 @@
John the Ripper's command line syntax.
+ (Updated in/for the jumbo patch by JimF and magnum)
When invoked with no command line arguments, "john" prints its usage
@@ -23,19 +24,27 @@ argument (if supported for a given option).
The supported options are as follows, square brackets denote optional
---single "single crack" mode
+--single[=SECTION] "single crack" mode
Enables the "single crack" mode, using rules from the configuration
-file section [List.Rules:Single].
+file section [List.Rules:Single]. If --single=Single_2 then the rules
+from [List.Rules:Single_2] section would be used.
--wordlist=FILE wordlist mode, read words from FILE,
--stdin or from stdin
These are used to enable the wordlist mode.
---rules enable word mangling rules for wordlist mode
+Input data in a character encoding other than the default 'raw'. See also
+doc/ENCODINGS. --encoding=LIST gives a list of all handled encodings.
+--rules[=SECTION] enable word mangling rules for wordlist mode
Enables word mangling rules that are read from [List.Rules:Wordlist].
+If --rules=Wordlist_elite was used, then [List.Rules:Wordlist_elite]
+would be the section used.
--incremental[=MODE] "incremental" mode [using section MODE]
@@ -69,6 +78,8 @@ its purpose is to give the new session a name (to which John will
append the ".rec" suffix to form the session file name). This is
useful for running multiple instances of John in parallel or to be
able to later recover a session other than the last one you interrupt.
+john.log file will also be named NAME.log (whatever 'NAME' is), so
+that any logging of the session work will end up in this file.
--status[=NAME] print status of a session [called NAME]
@@ -87,18 +98,21 @@ may restrict the set of passwords used by specifying some password files
password files will be used), "--format", or/and "--external" (with an
external mode that defines a filter() function).
---show show cracked passwords
+--show[=left] show cracked passwords
Shows the cracked passwords for given password files (which you must
specify). You can use this option while another instance of John is
cracking to see what John did so far; to get the most up to date
information, first send a SIGHUP to the appropriate "john" process.
+if --show=left then all uncracked hashes are listed (in a john 'input'
+file format way). =left is just that literal string "=left".
--test[=TIME] run tests and benchmarks for TIME seconds each
Tests all of the compiled in hashing algorithms for proper operation and
benchmarks them. The "--format" option can be used to restrict this to
-a specific algorithm.
+a specific algorithm. Using --test=0 will do a very quick self-test but
+will not produce usable speed figures.
--users=[-]LOGIN|UID[,..] [do not] load this (these) user(s)
@@ -118,13 +132,21 @@ not load accounts with a bad shell. You can omit the path before a
shell name, so "--shells=csh" will match both "/bin/csh" and
"/usr/bin/csh", while "--shells=/bin/csh" will only match "/bin/csh".
---salts=[-]COUNT load salts with[out] at least COUNT passwords
+--salts=[-]COUNT[:MAX] load salts with[out] at least COUNT passwords
This is a feature which allows to achieve better performance in some
special cases. For example, you can crack only some salts using
"--salts=2" faster and then crack the rest using "--salts=-2". Total
cracking time will be about the same, but you will likely get some
-passwords cracked earlier.
+passwords cracked earlier. If MAX is listed, then no hashes are
+loaded where there are more than MAX salts. This is so that if you
+have run --salts=25 and then later can run --salts=10:24 and none of
+the hashes that were already done from the --salts=25 will be re-done.
+--pot=NAME pot filename to use
+By default, john will use john.pot. This override allows using a different
+john.pot-like file (to start from, and to store any found password into).
--format=NAME force hash type NAME
@@ -152,6 +174,11 @@ types unless you have other hash types (those supported by John
natively) in the password file(s) as well (in which case another hash
type may get detected unless you specify this option).
+When benchmarking "--format=crypt", it will default to benchmark DES.
+The "--subformat=TYPE" can be added for benchmarking other types, given
+they are supported by the system. Currently supported TYPEs are MD5, BF,
+SHA-256 and SHA-512.
"--format=crypt" is also a way to make John crack crypt(3) hashes of
different types at the same time, but doing so results in poor
performance and in unnecessarily poor results (in terms of passwords
@@ -159,6 +186,8 @@ cracked) for hashes of the "faster" types (as compared to the "slower"
ones loaded for cracking at the same time). So you are advised to use
separate invocations of John, one per hash type.
+--subformat=LIST displays all the built-in dynamic formats, and exits
--save-memory=LEVEL enable memory saving, at LEVEL 1..3
You might need this option if you don't have enough memory or don't
@@ -169,6 +198,67 @@ impact is that you won't see the login names while cracking. Higher
memory saving levels have a performance impact; you should probably
avoid using them unless John doesn't work or gets into swap otherwise.
+--mem-file-size=SIZE max size of wordlist to preload into memory
+One of the significant performance improvements for some builds of
+john, is preloading the wordlist file into memory, instead of reading
+line by line. This is especially true when running with a large list
+of --rules. The default max size file is 5 million bytes. Using this
+option allows making this larger. A special value is --mem-file-size=0.
+This will force loading to memory regardless of file size. NOTE if
+--save-memory is used, then memory file processing is turned off.
+--field-separator-char=c Use 'c' instead of the char ':'
+By design, john works with most files, as 'tokenized' files. The field
+separator used by john is the colon ':' character. However, there are
+hashes which use the colon in the salt field, and there are users which
+may have a colon for a user name (for a couple examples of problems
+with it). However, an advanced john user can change the input files,
+by using a different character than the ':' (and different than any
+other 'used' character), and avoid problems of lines not being properly
+processed. The side effects are that the pot file will get this
+'character' used in it also (and only lines in the pot file that HAVE
+that character will be loaded at startup), and there are other side
+effects. Usually, this is ONLY used in very advanced situations, where
+the user 'knows what he is doing'. If the character can not be easily
+represented by the keyboard, then the format of
+--field-separator-char=\xHH can be used. --field-separator-char=\x1F
+would represent the character right before the space (space is 0x20)
+--fix-state-delay=N only determine the wordlist offset every N times
+This is an optimization which helps on some systems. This just
+limits the number of times that the ftell() call is performed.
+The one side effect, is that if john is aborted, and restarted, it
+may redo more tests. Thus, the use of this option is only acceptable
+and desirable for fast hash types (e.g., raw MD5).
+--nolog turns off john.log file
+This will turn off creation, or updating to the john.log file (which may
+have a different name if the --session=NAME flag was used.) Often the
+logging is not wanted, and this log file can often become very large
+(such as working with many 'fast' rules on a fast format). The log file
+is often used to check what work has been done, but if this will not be
+needed, and the log file is simply going to be deleted when done, then
+running in --nolog mode may be used.
+--crack-status show status line on every crack
+This will automagically emit a status line at every cracked password. This is
+mostly for testing.
+--mkpc=N force min/max keys per crypt to N
+This option is for certain kinds of testing and is not mentioned in the usage
+blob. Many formats perform the crypts in batches of several (sometimes
+thousands or even tens of thousands) candidates. This option forces it down
+from the format's default. For most purposes, you would use 1. One good
+example is for studying which rules give most "hits": Without this options,
+you can't know for sure which rule produced a successful guess when analyzing
+the log file.
Additional utilities (compiled/installed along with John).
@@ -200,6 +290,9 @@ the order of entries. You might want to use this with John's
"--stdout" option if you've got a lot of disk space to trade for the
reduced cracking time (on possibly trying some duplicates as they
might be produced with word mangling rules).
+This program has been updated. It is faster, it now can 'cut' the
+lines (in a couple of ways), and can unique the files data, AND also
+unique it against an existing file.
@@ -241,4 +334,5 @@ with passwdqc, before you ask users to change their passwords - whether
using this script or otherwise. And you should edit the message inside
the script before possibly using it.
+Based on (and modified in the jumbo patch):
$Owl: Owl/packages/john/john/doc/OPTIONS,v 1.11 2011/11/20 00:51:35 solar Exp $
154 doc/README.mpi
@@ -0,0 +1,154 @@
+ The original implementation was ca. 2004 by Ryan Lim as an academic
+ project. It was later picked up and maintained at, adding
+ fixes for the JtR 1.7 releases and various cipher patches.
+ In 2008, it was picked up by AoZ and stripped back down to the original
+ MPI-only changes to improve its compatibility with the 'jumbo' patchsets,
+ which had better-maintained alternate cipher support. This is often
+ referred to as "the mpi10 patch"
+ In 2010, it was extended by magnum to support all cracking modes. This
+ should be referred to as "the fullmpi patch" to avoid confusion. With the
+ exception of Markov it is far from perfect but it works just fine and
+ should support correct resuming in all modes. It is well tested but you
+ have absolutely NO guarantees.
+ Unless using OMP, you should consider applying the nsk-3 patch, also known
+ as "Faster bitslice DES key setup".
+ To enable MPI in John, un-comment these two line in Makefile:
+# Uncomment the TWO lines below for MPI (can be used together with OMP as well)
+CC = mpicc -DHAVE_MPI
+MPIOBJ = john-mpi.o
+ You must have an operational MPI environment prior to both compiling and
+ using the MPI version; configuring one is outside the scope of this
+ document but for a single, multi-core, host you don't need much
+ configuration. MPICH2 or OpenMPI seems to do the job fine, for example.
+ Most testing of fullmpi is now done under latest stable OpenMPI.
+ Debian Linux example for installing OpenMPI:
+ sudo apt-get install libopenmpi-dev openmpi-bin
+ Note that this patch works just fine together with OMP enabled as well.
+ When MPI is in use (with more than one process), OMP is (by default)
+ automatically disabled. Advanced users may want to change this setting
+ (change MPIOMPmutex to N in john.conf) and start one MPI node per
+ multi-core host, letting OMP do the rest. Warnings are printed; these
+ can be muted in john.conf too.
+ Typical invocation is as follows:
+ mpiexec -np 4 ./john --incremental passwd
+ The above will launch four parallel processes that will split the
+ Incremental keyspace in a more-or-less even fashion. If you run it to
+ completion, some nodes will however finish very early due to how this
+ mode is implemented, decreasing the overall performance. This problem
+ gets much worse with a lot of nodes.
+ In MARKOV mode, the range is automatically split evenly across the nodes,
+ just like you could do manually. This does not introduce any overhead,
+ assuming job runs to completion - and also assuming your MPI compiler
+ behaves.
+ The single and wordlist modes scale fairly well and cleartexts will not be
+ tried by more than one node (except when different word + rule combinations
+ result in the same candidate, but that problem is not MPI specific).
+ In SINGLE mode, and sometimes in Wordlist mode (see below), john will
+ distribute (actually leapfrog) the rules (after preprocessor expansion).
+ This works very well but will not likely result in a perfectly even
+ workload across nodes.
+ WORDLIST mode with rules will work the same way. Without rules, or when
+ rules can't be split across the nodes, john will distribute (again, it
+ really just leapfrogs) the words instead. This is practically the same as
+ using the External:Parallel example filter in john.conf, but much more user
+ friendly.
+ If the --mem-file-size parameter (default 5000000) will allow the file to
+ be loaded in memory, this will be preferred and each node will only load
+ its own share of words. In this case, there is no further leapfrogging and
+ no other overhead. Note that the limit is per node, so using the default
+ and four nodes, a 16 MB file WILL be loaded to memory, with 4 MB on each
+ node.
+ You can override the leapfrogging selection. This is debug code really and
+ should eventually be replace by proper options:
+ --mem-file-size=0 (force split loading, no leapfrog)
+ --mem-file-size=1 (force leapfrogging of words)
+ --mem-file-size=2 (force leapfrogging of rules)
+ In EXTERNAL mode, john will distribute candidates in the same way as in
+ Wordlist mode without rules. That is, all candidates will be produced on
+ all nodes, and then skipped by all nodes but one. This is the mode where
+ the fullmpi patch performs worst. When attacking very fast formats, this
+ scales VERY poorly.
+ You may send a USR1 signal to the parent MPI process (or HUP to all
+ individual processes) to cause the subprocesses to print out their status.
+ Be aware that they may not appear in order, because they blindly share the
+ same terminal.
+ skill -USR1 -c mpiexec
+ Another approach would be to do a normal status print. This must be done
+ with mpiexec and using the same -np as used for starting the job:
+ mpiexec -np 4 ./john --status
+ Which will dump the status of each process as recorded in the .rec files.
+ This way you also get a line with total statistics.
+ - This implementation does not account for heterogeneous clusters or nodes
+ that come and go.
+ - In interest of cooperating with other patches, benchmarking is less
+ accurate. Specifically, it assumes all participant cores are the same
+ as the fastest.
+ - Benchmark virtual c/s will appear inflated if launching more processes
+ than cores available. It will basically indicate what the speed would be
+ with that many real cores.
+ - There is no inter-process communication of cracked hashes yet. This means
+ that if one node cracks a hash, all other nodes will continue to waste
+ time on it. The current workaround is aborting and restarting the jobs
+ regularly. This also means that you may have to manually stop some or all
+ nodes after all hashes are cracked.
+ - Aborting a job using ctrl-c will often kill all nodes without updating
+ state files and logs. I have tried to mitigate this but it is still a
+ good idea to send a -USR1 to the parent before killing them. You should
+ lower the SAVE parameter in john.conf to 60 (seconds) if running MPI,
+ this will be the maximum time of repeated work after restarting.
+Following is the verbatim original content of this file:
+This distribution of John the Ripper (1.6.36) requires MPI to compile.
+If you don't have MPI, download and install it before proceeeding.
+Any bugs, patches, comments or love letters should be sent to Hate mail, death threates should be sent to
+Ryan Lim <>
36 doc/RULES
@@ -7,6 +7,19 @@ multiple rules for a single source line. Below you will find
descriptions of the rule reject flags, the rule commands (many of them
are compatible with those of Crack 5.0a), and the preprocessor syntax.
+ Note about .include [section] syntax.
+A rule section, can include another section of rules, or in more general,
+terms, any section can include another section, within the john.conf
+file format (new in 1.7.8-jumbo-6). This sectional include is done
+using a 'period' directive. So within a rule set, a line like:
+.include [list.rules.someother] will be replaced by all of the rules
+from the 'someother' ruleset. One side effect of being able to include
+rules, is that any rule which would need to start with a period character,
+will have to escape that character (or the config file loader will bail
+out, listing a bugus . directive). At this time, it is not valid for a
+john rule to start with a period, so at this time, this is not a problem.
Rule reject flags.
@@ -15,6 +28,9 @@ are compatible with those of Crack 5.0a), and the preprocessor syntax.
-8 reject this rule unless current hash type uses 8-bit characters
-s reject this rule unless some password hashes were split at loading
-p reject this rule unless word pair commands are currently allowed
+-u reject this rule unless the --encoding=utf8 flag is used
+-U reject this rule if the --encoding=utf8 flag is used
+->N reject this rule unless length N or longer is supported
Numeric constants and variables.
@@ -56,11 +72,25 @@ than that of "l" (length).
?d matches digits [0-9]
?a matches letters [a-zA-Z]
?x matches letters and digits [a-zA-Z0-9]
+?o matches control characters
+?y matches valid characters
?z matches all characters
+?b matches characters with 8th bit set (mnemonic "b for binary")
+?N where N is 0...9 are user-defined character classes. They match characters
+ as defined in john.conf, section [UserClasses]
The complement of a class can be specified by uppercasing its name. For
example, "?D" matches everything but digits.
+NOTE, if running in --encoding=iso-8859-1 (or koi8-r/cp1251/cp866,etc), then the
+high bit characters are added to the respective classes. So in iso-8859-1 mode,
+lower case ?l would include �������������������������������� while in 'normal'
+runs, it is only a-z.
+NOTE 2, the rules engine currently have very limited understanding of UTF-8 so
+character classes etc. will only work with ASCII characters, even if using
Simple commands.
@@ -79,6 +109,10 @@ f reflect: "Fred" -> "FredderF"
$X append character X to the word
^X prefix the word with character X
+NOTE, all of these are encoding-aware. Eg. if you do not specify an encoding,
+the l command will lowercase A-Z only. If you use --encoding=iso-8859-1 it will
+also recognise ������������������������������ and lowercase them properly.
String commands.
@@ -138,6 +172,8 @@ V lowercase vowels, uppercase consonants: "Crack96" -> "CRaCK96"
R shift each character right, by keyboard: "Crack96" -> "Vtsvl07"
L shift each character left, by keyboard: "Crack96" -> "Xeaxj85"
+NOTE, of these, only S and V are encoding-aware.
Memory access commands.
48 doc/pdfcrack_README
@@ -0,0 +1,48 @@
+Code and documentation are copyright 2006-2008 Henning Norén
+Parts of pdfcrack.c and md5.c is derived/copied/inspired from
+xpdf/poppler and are copyright 1995-2006 Glyph & Cog, LLC.
+The PDF data structures, operators, and specification are
+copyright 1985-2006 Adobe Systems Inc.
+Project page:
+pdfcrack is a simple tool for recovering passwords from pdf-documents.
+It should be able to handle all pdfs that uses the standard security handler
+but the pdf-parsing routines are a bit of a quick hack so you might stumble
+across some pdfs where the parser needs to be fixed to handle.
+Type 'make' (or 'gmake' if you have BSD-make as default) to build the program.
+You will need to have GNU Make and a recent version of GCC installed but there
+are no external dependencies on libraries.
+You will have to add the -march-switch in the CFLAGS-option in Makefile
+for best optimization on your platform. Look into the GCC-manual
+( if you are unsure.
+The program is distributed under GPL version 2 (or later).
+Features available in this release (check TODO for features that might come):
+* Both owner- and user-passwords with the Standard Security Handler, rev 2 & 3.
+* Search by wordlist
+* Search by bruteforcing with specific charset
+* Optimized search for owner-password when user-password is known (or empty)
+* Extremely simple permutations of passwords (makes first letter uppercase)
+- currently only useful for bruteforcing with charsets:
+* Auto-save when interrupted (Ctrl-C or send SIGINT to the process)
+* Loading saved state
+- currently only for bruteforcing with charsets:
+* Minimum length of password to start at
+* Maximum length of password to try
+Sort your wordlist by length for best performance and consider that almost
+all passwords in PDFs are in iso latin 1 so use the correct character encoding
+in your terminal and/or wordlist when using special characters.
+This tool can not decrypt a Password Protected PDF.
+Look up the pdftk toolkit which can do that, when you know the password.
26 doc/pdfcrack_TODO
@@ -0,0 +1,26 @@
+Replace pdfparser with a more robust and complete parsing of a PDF
+Optimize the crack and crypto-routines for greater performance
+Add support for third-party security handlers
+Add real permutation-support. Maybe John the Ripper could be a
+source of inspiration?
+Add support for masked passwords, when we already knows parts of the password
+Replace pdfparser with a complete representation of the structure of a
+PDF-file that can be used to find information and more importantly, can be
+written out again to a file.
+For this we need to understand/represent objects (indirect and direct), the
+file structure (lineraized and standard) and xreftables with trailers.
+We need to be able to update/fix the xreftables when writing it.
+We also need support for LZM-compression that is used for many streams.
+When the above item is done there is nothing stopping us from adding complete
+RC4-keyspace search and decrypt without having to bother cracking the
+passwords. An example of this search can be viewed at:
254 run/
@@ -0,0 +1,254 @@
+#!/usr/bin/perl -w
+use strict;
+use Encode;
+use Switch;
+use Unicode::Normalize;
+use utf8; # This source file MUST be stored UTF-8 encoded
+# code page data builder, by magnum / JimF. v1.2
+# August 8, added parsing of UnicodeData.txt for building more macros
+# Coded July-Aug 2011, as a tool to build codepage encoding data needed
+# for John the Ripper code page conversions. The data output from this file
+# is made to be directly placed into the ./src/encoding_data.h file in john's
+# source tree.
+# run without any arguments will show a list of possible code pages.
+# This should set our output to your terminal settings
+use open ':locale';
+# Set to 1 to permanently enable Unicode comments
+my $verbose = 0;
+if ($ARGV[0] eq "-v") {
+ $verbose++;
+ shift;
+my $enc;
+if (@ARGV==1) {$enc=$ARGV[0];}
+else {
+ print "Supported encodings:\n", join(", ", Encode->encodings(":all")), "\n\n";
+ exit(0);
+my %cat;
+open FILE, "unused/UnicodeData.txt" or die $!;
+while (my $line = <FILE>) {
+ next if substr($line,0,1) eq "#";
+ my @line = split(';', $line);
+ $cat{hex($line[0])} = $line[2];
+sub lookupCategory {
+ my $c = shift;
+ return $cat{$c};
+sub printdef {
+ my $param = shift;
+ if (length($param)>80) {print" \\\n\t";}
+ elsif (length($param)>0) {print" ";}
+ if (length($param)>0) {print "\"".$param."\"";}
+my $to_unicode_high128="";
+my $lower=""; my $upper=""; my $lowonly=""; my $uponly=""; my $specials = ""; my $punctuation = ""; my $alpha = ""; my $digits = ""; my $control = ""; my $invalid = ""; my $whitespace = ""; my $vowels = "\\x59\\x79"; my $consonants = "";
+my $clower=""; my $cupper=""; my $clowonly=""; my $cuponly=""; my $cspecials = ""; my $cpunctuation = ""; my $calpha = ""; my $cdigits = ""; my $cvowels = "Yy"; my $cconsonants = "";
+my $encu = uc($enc);my $hs = "";
+$encu =~ s/-/_/g;
+# first step, compute the unicode array
+foreach my $i (0x80..0xFF) {
+ my $u = chr($i);
+ $u = Encode::decode($enc, $u);
+ $hs .= $u;
+ if (ord($u) == 0xfffd) {
+ $u = chr($i);
+ }
+ $to_unicode_high128 .= "0x" . sprintf "%04X", ord($u);
+ if ($i % 16 == 15 && $i != 255) { $to_unicode_high128 .= ",\n"; }
+ elsif ($i != 255) { $to_unicode_high128 .= ","; }
+if ($verbose) {
+ print "\n// "; foreach (8..9, 'A'..'F') { print $_, " "x15 };
+ print "\n// "; foreach (8..9, 'A'..'F') { print '0'..'9','A'..'F' };
+ print "\n// ", $hs, "\n";
+print "\n// here is the $encu to Unicode conversion for $encu characters from 0x80 to 0xFF\n";
+print "static UTF16 ".$encu."_to_unicode_high128[] = {\n";
+print $to_unicode_high128 . " };\n";
+# Now build upcase/downcase data.
+foreach my $i (0x80..0xFF) {
+ my $c = chr($i);
+ # converts $c into utf8, from $enc code page, and 'sets' the 'flag' in perl that $c IS a utf8 char.
+ $c = Encode::decode($enc, $c);
+ # upcase and low case the utf8 chars
+ my $ulc = lc $c; my $uuc = uc $c;
+ # reconvert the utf8 char's back into $enc code page.
+ my $elc = Encode::encode($enc, $ulc); my $euc = Encode::encode($enc, $uuc);
+ if ( (chr($i) eq $elc || chr($i) eq $euc) && $elc ne $euc) {
+ if (chr($i) ne $euc) {
+ if (chr($i) ne $elc && chr($i) ne $euc) {
+ no warnings;
+ printf("// *** WARNING, char at 0x%X U+%04X (%s) needs to be looked into. Neither conversion gets back to original value!\n",$i,ord($c), $c);
+ } elsif ( length($euc) > 1) {
+ $lowonly .= sprintf("\\x%02X", ord($elc));
+ $clowonly .= $c;
+ printf("// *** WARNING, char at 0x%X U+%04X (%s -> %s) needs to be looked into. Single to multi-byte conversion\n",$i,ord($c), $ulc, $uuc);
+ } elsif ( length($elc) > 1) {
+ $uponly .= sprintf("\\x%02X", ord($euc));
+ $cuponly .= $c;
+ printf("// *** WARNING, char at 0x%X U+%04X (%s -> %s) needs to be looked into. Single to multi-byte conversion\n",$i,ord($c), $ulc, $uuc);
+ } elsif ( ord($euc) < 0x80) {
+ $lowonly .= sprintf("\\x%02X", ord($elc));
+ $clowonly .= $c;
+ if (ord($euc) != 0x3f) {
+ printf("// *** WARNING, char at 0x%X -> U+%04X -> U+%04X -> 0x%X (%s -> %s) needs to be looked into. Likely one way casing conversion\n",$i,ord($ulc),ord($uuc),ord($euc), $ulc, $uuc);
+ }
+ } elsif ( ord($elc) < 0x80) {
+ $uponly .= sprintf("\\x%02X", ord($euc));
+ $cuponly .= $c;
+ if (ord($elc) != 0x3f) {
+ printf("// *** WARNING, char at 0x%X -> U+%04X -> U+%04X -> 0x%X (%s -> %s) needs to be looked into. Likely one way casing conversion\n",$i,ord($ulc),ord($uuc),ord($euc), $ulc, $uuc);
+ }
+ } else {
+ $lower .= sprintf("\\x%02X", ord($elc));
+ $clower .= lc($c);
+ $upper .= sprintf("\\x%02X", ord($euc));
+ $cupper .= uc($c);
+ }
+ }
+ } else {
+ # NOTE, we can have letters which fail above. Examples are U+00AA, U+00BA. These are letters, lower case only, and there IS no upper case.
+ # this causes the original if to not find them. Thus, we 'look them up' here.
+ my $cat = lookupCategory(ord($c));
+ switch ($cat) {
+ case /^Ll/ { $lowonly .= sprintf("\\x%02X", ord($elc)); $clowonly .= $c; }
+ case /^Lu/ { $uponly .= sprintf("\\x%02X", ord($euc)); $cuponly .= $c; }
+ else {}
+ }
+ }
+ if (ord($c) == 0xfffd) {
+ $invalid .= sprintf("\\x%02X", $i);
+ } else {
+ my $cat = lookupCategory(ord($c));
+ switch ($cat) {
+ case /^Cf/ { $specials .= sprintf("\\x%02X", $i); $cspecials .= $c }
+ case /^L[lotu]/ {
+ $alpha .= sprintf("\\x%02X", $i);
+ $calpha .= $c;
+ # best-effort vowel/consonant matching
+ # We normalize to decomposed and match known vowels in lc
+ my $nfd = substr(NFD($c), 0, 1);
+ # Done: Latin, Nordic, Greek, Russian, Ukrainian, Turkish
+ if ($nfd =~ m/[aoueiyœæøɪʏɛɔαεηιοωυаэыуояеюиєіı]/i) {
+ $vowels .= sprintf("\\x%02X", $i);
+ $cvowels .= $c;
+ # Note eg. in English, y depends on situation
+ # (yellow, happy). We set latin yY variants as both!
+ if ($nfd =~ m/y/i) {
+ $consonants .= sprintf("\\x%02X", $i);
+ $cconsonants .= $c;
+ }
+ } else {
+ $consonants .= sprintf("\\x%02X", $i);
+ $cconsonants .= $c;
+ }
+ }
+ case /^Lm/ { $specials .= sprintf("\\x%02X", $i); $cspecials .= $c }
+ #case /^Ll/ { $lower .= sprintf("\\x%02X", $i); }
+ #case /^L[tu]/ { $upper .= sprintf("\\x%02X", $i); }
+ case /^M[cen]/ { $specials .= sprintf("\\x%02X", $i); $cspecials .= $c }
+ case /^S[ckmo]/ { $specials .= sprintf("\\x%02X", $i); $cspecials .= $c }
+ case /^N[dlo]/ { $digits .= sprintf("\\x%02X", $i); $cdigits .= $c }
+ case /^P[cdefios]/ { $punctuation .= sprintf("\\x%02X", $i); $cpunctuation .= $c }
+ case /^Z[lps]/ { $whitespace .= sprintf("\\x%02X", $i); }
+ case /^C/ { $control .= sprintf("\\x%02X", $i); }
+ else { print STDERR "*** Warning, $cat not handled\n"; }
+ }
+ }
+print "\n// $clower\n" if $verbose;
+print "#define CHARS_LOWER_".$encu;
+print "\n";
+print "\n// $clowonly\n" if $verbose;
+print "#define CHARS_LOW_ONLY_".$encu;
+print "\n";
+print "\n// $cupper\n" if $verbose;
+print "#define CHARS_UPPER_".$encu;
+print "\n";
+print "\n// $cuponly\n" if $verbose;
+print "#define CHARS_UP_ONLY_".$encu;
+print "\n";
+print "\n// $cdigits\n" if $verbose;
+print "#define CHARS_DIGITS_".$encu;
+print "\n";
+print "\n// $cpunctuation\n" if $verbose;
+print "#define CHARS_PUNCTUATION_".$encu;
+print "\n";
+print "\n// $cspecials\n" if $verbose;
+print "#define CHARS_SPECIALS_".$encu;
+print "\n";
+print "\n// $calpha\n" if $verbose;
+print "#define CHARS_ALPHA_".$encu;
+print "\n";
+print "\n" if $verbose;
+print "#define CHARS_WHITESPACE_".$encu;
+print "\n";
+print "\n" if $verbose;
+print "#define CHARS_CONTROL_".$encu;
+print "\n";
+print "\n" if $verbose;
+print "#define CHARS_INVALID_".$encu." ";
+if (length($invalid)>80) {print"\\\n\t";}
+print "\"".$invalid."\"\n"; # we ALWAYS want to print the "" even if string empty.
+print "\n// $cvowels\n" if $verbose;
+print "#define CHARS_VOWELS_".$encu;
+print "\n";
+print "\n// $cconsonants\n" if $verbose;
+print "#define CHARS_CONSONANTS_".$encu;
+print "\n";
+# Ok, provide a check to see if any of the characters UNDER 0x80
+# are non-standard. At this time, there is no plan on HOW to handle
+# this within john. The information is simply listed at this time.
+foreach my $i (0x20..0x7E) {
+ my $u = chr($i);
+ Encode::from_to($u, $enc, "utf8");
+ my $str = sprintf "%04X", ord Encode::decode("UTF-8", $u);
+ if ( hex($str) != $i) { printf("WARNING, low character %X maps into Unicode 0x%s\n", $i, $str);}
1,073 run/dumb16.conf
@@ -0,0 +1,1073 @@
+# Generic implementation of "dumb" exhaustive search of Unicode/UCS-2 and
+# an arbitrary charset. Default is to try *all* allocated characters (there's
+# 54473 of them). Even if a fast format can exhaust two characters in 15
+# minutes, three characters would take 1,5 years...
+# The output is UTF-8, so for 16-bit formats you need to give --enc=utf8
+int maxlength; // Maximum password length to try
+int last; // Last character position, zero-based
+int lastid; // Character index in the last position
+int id[0x7f]; // Current character indices for other positions
+int charset[0x10000], c0; // Characters
+int ucs2[0x7F]; // Word in UCS-2
+void init()
+ int minlength;
+ int i, c;
+ minlength = 1; // Initial password length to try, must be at least 1
+ maxlength = 2; // Must be at least same as minlength
+ * This defines the character set. This is auto-generated from UnicodeData.txt
+ * and we skip control characters.
+ */
+ i = 0;
+ c = 0x20; // from SPACE
+ while (c < 0x7f) // TILDE
+ charset[i++] = c++;
+ c = 0xa0; // from NO-BREAK SPACE
+ charset[i++] = c++;
+ c = 0x37a; // from GREEK YPOGEGRAMMENI
+ while (c < 0x37f) // GREEK QUESTION MARK
+ charset[i++] = c++;
+ c = 0x384; // from GREEK TONOS
+ charset[i++] = c++;
+ while (c < 0x3a2) // GREEK CAPITAL LETTER RHO
+ charset[i++] = c++;
+ c = 0x3a3; // from GREEK CAPITAL LETTER SIGMA
+ charset[i++] = c++;
+ c = 0x531; // from ARMENIAN CAPITAL LETTER AYB
+ while (c < 0x557) // ARMENIAN CAPITAL LETTER FEH
+ charset[i++] = c++;
+ while (c < 0x560) // ARMENIAN ABBREVIATION MARK
+ charset[i++] = c++;
+ c = 0x561; // from ARMENIAN SMALL LETTER AYB
+ charset[i++] = c++;
+ charset[i++] = 0x589; // ARMENIAN FULL STOP
+ charset[i++] = 0x58a; // ARMENIAN HYPHEN
+ c = 0x591; // from HEBREW ACCENT ETNAHTA
+ while (c < 0x5c8) // HEBREW POINT QAMATS QATAN
+ charset[i++] = c++;
+ c = 0x5d0; // from HEBREW LETTER ALEF
+ while (c < 0x5eb) // HEBREW LETTER TAV
+ charset[i++] = c++;
+ charset[i++] = c++;
+ c = 0x600; // from ARABIC NUMBER SIGN
+ while (c < 0x604) // ARABIC SIGN SAFHA
+ charset[i++] = c++;
+ c = 0x606; // from ARABIC-INDIC CUBE ROOT
+ while (c < 0x61c) // ARABIC SEMICOLON
+ charset[i++] = c++;
+ while (c < 0x70e) // SYRIAC HARKLEAN ASTERISCUS
+ charset[i++] = c++;
+ c = 0x70f; // from SYRIAC ABBREVIATION MARK
+ while (c < 0x74b) // SYRIAC BARREKH
+ charset[i++] = c++;
+ c = 0x74d; // from SYRIAC LETTER SOGDIAN ZHAIN
+ while (c < 0x7b2) // THAANA LETTER NAA
+ charset[i++] = c++;
+ c = 0x7c0; // from NKO DIGIT ZERO
+ while (c < 0x7fb) // NKO LAJANYALAN
+ charset[i++] = c++;
+ c = 0x800; // from SAMARITAN LETTER ALAF
+ while (c < 0x82e) // SAMARITAN MARK NEQUDAA
+ charset[i++] = c++;
+ charset[i++] = c++;
+ c = 0x840; // from MANDAIC LETTER HALQA
+ while (c < 0x85c) // MANDAIC GEMINATION MARK
+ charset[i++] = c++;
+ while (c < 0x978) // DEVANAGARI LETTER UUE
+ charset[i++] = c++;
+ c = 0x979; // from DEVANAGARI LETTER ZHA
+ while (c < 0x980) // DEVANAGARI LETTER BBA
+ charset[i++] = c++;
+ charset[i++] = 0x981; // BENGALI SIGN CANDRABINDU
+ charset[i++] = 0x983; // BENGALI SIGN VISARGA
+ c = 0x985; // from BENGALI LETTER A
+ while (c < 0x98d) // BENGALI LETTER VOCALIC L
+ charset[i++] = c++;
+ charset[i++] = 0x98f; // BENGALI LETTER E
+ charset[i++] = 0x990; // BENGALI LETTER AI
+ c = 0x993; // from BENGALI LETTER O
+ while (c < 0x9a9) // BENGALI LETTER NA
+ charset[i++] = c++;
+ c = 0x9aa; // from BENGALI LETTER PA
+ while (c < 0x9b1) // BENGALI LETTER RA
+ charset[i++] = c++;
+ c = 0x9b6; // from BENGALI LETTER SHA
+ while (c < 0x9ba) // BENGALI LETTER HA
+ charset[i++] = c++;
+ c = 0x9bc; // from BENGALI SIGN NUKTA
+ while (c < 0x9c5) // BENGALI VOWEL SIGN VOCALIC RR
+ charset[i++] = c++;
+ charset[i++] = 0x9c7; // BENGALI VOWEL SIGN E
+ charset[i++] = 0x9c8; // BENGALI VOWEL SIGN AI
+ c = 0x9cb; // from BENGALI VOWEL SIGN O
+ while (c < 0x9cf) // BENGALI LETTER KHANDA TA
+ charset[i++] = c++;
+ charset[i++] = 0x9dc; // BENGALI LETTER RRA
+ charset[i++] = 0x9dd; // BENGALI LETTER RHA
+ c = 0x9df; // from BENGALI LETTER YYA
+ while (c < 0x9e4) // BENGALI VOWEL SIGN VOCALIC LL
+ charset[i++] = c++;
+ c = 0x9e6; // from BENGALI DIGIT ZERO
+ while (c < 0x9fc) // BENGALI GANDA MARK
+ charset[i++] = c++;
+ charset[i++] = 0xa01; // GURMUKHI SIGN ADAK BINDI
+ charset[i++] = 0xa03; // GURMUKHI SIGN VISARGA
+ c = 0xa05; // from GURMUKHI LETTER A
+ while (c < 0xa0b) // GURMUKHI LETTER UU
+ charset[i++] = c++;
+ charset[i++] = 0xa0f; // GURMUKHI LETTER EE
+ charset[i++] = 0xa10; // GURMUKHI LETTER AI
+ c = 0xa13; // from GURMUKHI LETTER OO
+ while (c < 0xa29) // GURMUKHI LETTER NA
+ charset[i++] = c++;
+ c = 0xa2a; // from GURMUKHI LETTER PA
+ while (c < 0xa31) // GURMUKHI LETTER RA
+ charset[i++] = c++;
+ charset[i++] = 0xa32; // GURMUKHI LETTER LA
+ charset[i++] = 0xa33; // GURMUKHI LETTER LLA
+ charset[i++] = 0xa35; // GURMUKHI LETTER VA
+ charset[i++] = 0xa36; // GURMUKHI LETTER SHA
+ charset[i++] = 0xa38; // GURMUKHI LETTER SA
+ charset[i++] = 0xa39; // GURMUKHI LETTER HA
+ c = 0xa3e; // from GURMUKHI VOWEL SIGN AA
+ while (c < 0xa43) // GURMUKHI VOWEL SIGN UU
+ charset[i++] = c++;
+ charset[i++] = 0xa47; // GURMUKHI VOWEL SIGN EE
+ charset[i++] = 0xa48; // GURMUKHI VOWEL SIGN AI
+ charset[i++] = 0xa4b; // GURMUKHI VOWEL SIGN OO
+ charset[i++] = 0xa4d; // GURMUKHI SIGN VIRAMA
+ c = 0xa59; // from GURMUKHI LETTER KHHA
+ while (c < 0xa5d) // GURMUKHI LETTER RRA
+ charset[i++] = c++;
+ c = 0xa66; // from GURMUKHI DIGIT ZERO
+ while (c < 0xa76) // GURMUKHI SIGN YAKASH
+ charset[i++] = c++;
+ charset[i++] = 0xa81; // GUJARATI SIGN CANDRABINDU
+ charset[i++] = 0xa83; // GUJARATI SIGN VISARGA
+ c = 0xa85; // from GUJARATI LETTER A
+ while (c < 0xa8e) // GUJARATI VOWEL CANDRA E
+ charset[i++] = c++;
+ charset[i++] = 0xa8f; // GUJARATI LETTER E
+ charset[i++] = 0xa91; // GUJARATI VOWEL CANDRA O
+ c = 0xa93; // from GUJARATI LETTER O
+ while (c < 0xaa9) // GUJARATI LETTER NA
+ charset[i++] = c++;
+ c = 0xaaa; // from GUJARATI LETTER PA
+ while (c < 0xab1) // GUJARATI LETTER RA
+ charset[i++] = c++;
+ charset[i++] = 0xab2; // GUJARATI LETTER LA
+ charset[i++] = 0xab3; // GUJARATI LETTER LLA
+ c = 0xab5; // from GUJARATI LETTER VA
+ while (c < 0xaba) // GUJARATI LETTER HA
+ charset[i++] = c++;
+ c = 0xabc; // from GUJARATI SIGN NUKTA
+ while (c < 0xac6) // GUJARATI VOWEL SIGN CANDRA E
+ charset[i++] = c++;
+ charset[i++] = 0xac7; // GUJARATI VOWEL SIGN E
+ charset[i++] = 0xac9; // GUJARATI VOWEL SIGN CANDRA O
+ charset[i++] = 0xacb; // GUJARATI VOWEL SIGN O
+ charset[i++] = 0xacd; // GUJARATI SIGN VIRAMA
+ c = 0xae0; // from GUJARATI LETTER VOCALIC RR
+ while (c < 0xae4) // GUJARATI VOWEL SIGN VOCALIC LL
+ charset[i++] = c++;
+ c = 0xae6; // from GUJARATI DIGIT ZERO
+ while (c < 0xaf0) // GUJARATI DIGIT NINE
+ charset[i++] = c++;
+ charset[i++] = 0xb01; // ORIYA SIGN CANDRABINDU
+ charset[i++] = 0xb03; // ORIYA SIGN VISARGA
+ c = 0xb05; // from ORIYA LETTER A
+ while (c < 0xb0d) // ORIYA LETTER VOCALIC L
+ charset[i++] = c++;
+ charset[i++] = 0xb0f; // ORIYA LETTER E
+ charset[i++] = 0xb10; // ORIYA LETTER AI
+ c = 0xb13; // from ORIYA LETTER O
+ while (c < 0xb29) // ORIYA LETTER NA
+ charset[i++] = c++;
+ c = 0xb2a; // from ORIYA LETTER PA
+ while (c < 0xb31) // ORIYA LETTER RA
+ charset[i++] = c++;
+ charset[i++] = 0xb32; // ORIYA LETTER LA
+ charset[i++] = 0xb33; // ORIYA LETTER LLA
+ c = 0xb35; // from ORIYA LETTER VA
+ while (c < 0xb3a) // ORIYA LETTER HA
+ charset[i++] = c++;
+ c = 0xb3c; // from ORIYA SIGN NUKTA
+ while (c < 0xb45) // ORIYA VOWEL SIGN VOCALIC RR
+ charset[i++] = c++;
+ charset[i++] = 0xb47; // ORIYA VOWEL SIGN E
+ charset[i++] = 0xb48; // ORIYA VOWEL SIGN AI
+ charset[i++] = 0xb4b; // ORIYA VOWEL SIGN O
+ charset[i++] = 0xb4d; // ORIYA SIGN VIRAMA
+ charset[i++] = 0xb56; // ORIYA AI LENGTH MARK
+ charset[i++] = 0xb57; // ORIYA AU LENGTH MARK
+ charset[i++] = 0xb5c; // ORIYA LETTER RRA
+ charset[i++] = 0xb5d; // ORIYA LETTER RHA
+ c = 0xb5f; // from ORIYA LETTER YYA
+ while (c < 0xb64) // ORIYA VOWEL SIGN VOCALIC LL
+ charset[i++] = c++;
+ c = 0xb66; // from ORIYA DIGIT ZERO
+ charset[i++] = c++;
+ charset[i++] = 0xb82; // TAMIL SIGN ANUSVARA
+ charset[i++] = 0xb83; // TAMIL SIGN VISARGA
+ c = 0xb85; // from TAMIL LETTER A
+ while (c < 0xb8b) // TAMIL LETTER UU
+ charset[i++] = c++;
+ charset[i++] = 0xb8e; // TAMIL LETTER E
+ charset[i++] = 0xb90; // TAMIL LETTER AI
+ c = 0xb92; // from TAMIL LETTER O
+ while (c < 0xb96) // TAMIL LETTER KA
+ charset[i++] = c++;
+ charset[i++] = 0xb99; // TAMIL LETTER NGA
+ charset[i++] = 0xb9a; // TAMIL LETTER CA
+ charset[i++] = 0xb9e; // TAMIL LETTER NYA
+ charset[i++] = 0xb9f; // TAMIL LETTER TTA
+ charset[i++] = 0xba3; // TAMIL LETTER NNA
+ charset[i++] = 0xba4; // TAMIL LETTER TA
+ charset[i++] = 0xba8; // TAMIL LETTER NA
+ charset[i++] = 0xbaa; // TAMIL LETTER PA
+ c = 0xbae; // from TAMIL LETTER MA
+ while (c < 0xbba) // TAMIL LETTER HA
+ charset[i++] = c++;
+ c = 0xbbe; // from TAMIL VOWEL SIGN AA
+ while (c < 0xbc3) // TAMIL VOWEL SIGN UU
+ charset[i++] = c++;
+ charset[i++] = 0xbc6; // TAMIL VOWEL SIGN E
+ charset[i++] = 0xbc8; // TAMIL VOWEL SIGN AI
+ c = 0xbca; // from TAMIL VOWEL SIGN O
+ while (c < 0xbce) // TAMIL SIGN VIRAMA
+ charset[i++] = c++;
+ c = 0xbe6; // from TAMIL DIGIT ZERO
+ while (c < 0xbfb) // TAMIL NUMBER SIGN
+ charset[i++] = c++;
+ charset[i++] = 0xc01; // TELUGU SIGN CANDRABINDU
+ charset[i++] = 0xc03; // TELUGU SIGN VISARGA
+ c = 0xc05; // from TELUGU LETTER A
+ while (c < 0xc0d) // TELUGU LETTER VOCALIC L
+ charset[i++] = c++;
+ charset[i++] = 0xc0e; // TELUGU LETTER E
+ charset[i++] = 0xc10; // TELUGU LETTER AI
+ c = 0xc12; // from TELUGU LETTER O
+ while (c < 0xc29) // TELUGU LETTER NA
+ charset[i++] = c++;
+ c = 0xc2a; // from TELUGU LETTER PA
+ while (c < 0xc34) // TELUGU LETTER LLA
+ charset[i++] = c++;
+ c = 0xc35; // from TELUGU LETTER VA
+ while (c < 0xc3a) // TELUGU LETTER HA
+ charset[i++] = c++;
+ c = 0xc3d; // from TELUGU SIGN AVAGRAHA
+ while (c < 0xc45) // TELUGU VOWEL SIGN VOCALIC RR
+ charset[i++] = c++;
+ charset[i++] = 0xc46; // TELUGU VOWEL SIGN E
+ charset[i++] = 0xc48; // TELUGU VOWEL SIGN AI
+ c = 0xc4a; // from TELUGU VOWEL SIGN O
+ while (c < 0xc4e) // TELUGU SIGN VIRAMA
+ charset[i++] = c++;
+ charset[i++] = 0xc55; // TELUGU LENGTH MARK
+ charset[i++] = 0xc56; // TELUGU AI LENGTH MARK
+ charset[i++] = 0xc58; // TELUGU LETTER TSA
+ charset[i++] = 0xc59; // TELUGU LETTER DZA
+ c = 0xc60; // from TELUGU LETTER VOCALIC RR
+ while (c < 0xc64) // TELUGU VOWEL SIGN VOCALIC LL
+ charset[i++] = c++;
+ c = 0xc66; // from TELUGU DIGIT ZERO
+ while (c < 0xc70) // TELUGU DIGIT NINE
+ charset[i++] = c++;
+ while (c < 0xc80) // TELUGU SIGN TUUMU
+ charset[i++] = c++;
+ charset[i++] = 0xc82; // KANNADA SIGN ANUSVARA
+ charset[i++] = 0xc83; // KANNADA SIGN VISARGA
+ c = 0xc85; // from KANNADA LETTER A
+ while (c < 0xc8d) // KANNADA LETTER VOCALIC L
+ charset[i++] = c++;
+ charset[i++] = 0xc8e; // KANNADA LETTER E
+ charset[i++] = 0xc90; // KANNADA LETTER AI
+ c = 0xc92; // from KANNADA LETTER O
+ while (c < 0xca9) // KANNADA LETTER NA
+ charset[i++] = c++;
+ c = 0xcaa; // from KANNADA LETTER PA
+ while (c < 0xcb4) // KANNADA LETTER LLA
+ charset[i++] = c++;
+ c = 0xcb5; // from KANNADA LETTER VA
+ while (c < 0xcba) // KANNADA LETTER HA
+ charset[i++] = c++;
+ c = 0xcbc; // from KANNADA SIGN NUKTA
+ while (c < 0xcc5) // KANNADA VOWEL SIGN VOCALIC RR
+ charset[i++] = c++;
+ charset[i++] = 0xcc6; // KANNADA VOWEL SIGN E
+ charset[i++] = 0xcc8; // KANNADA VOWEL SIGN AI
+ c = 0xcca; // from KANNADA VOWEL SIGN O
+ while (c < 0xcce) // KANNADA SIGN VIRAMA
+ charset[i++] = c++;
+ charset[i++] = 0xcd5; // KANNADA LENGTH MARK
+ charset[i++] = 0xcd6; // KANNADA AI LENGTH MARK
+ c = 0xce0; // from KANNADA LETTER VOCALIC RR
+ while (c < 0xce4) // KANNADA VOWEL SIGN VOCALIC LL
+ charset[i++] = c++;
+ c = 0xce6; // from KANNADA DIGIT ZERO
+ while (c < 0xcf0) // KANNADA DIGIT NINE
+ charset[i++] = c++;
+ charset[i++] = 0xcf1; // KANNADA SIGN JIHVAMULIYA
+ charset[i++] = 0xcf2; // KANNADA SIGN UPADHMANIYA
+ charset[i++] = 0xd02; // MALAYALAM SIGN ANUSVARA
+ charset[i++] = 0xd03; // MALAYALAM SIGN VISARGA
+ c = 0xd05; // from MALAYALAM LETTER A
+ while (c < 0xd0d) // MALAYALAM LETTER VOCALIC L
+ charset[i++] = c++;
+ charset[i++] = 0xd0e; // MALAYALAM LETTER E
+ charset[i++] = 0xd10; // MALAYALAM LETTER AI
+ c = 0xd12; // from MALAYALAM LETTER O
+ while (c < 0xd3b) // MALAYALAM LETTER TTTA
+ charset[i++] = c++;
+ c = 0xd3d; // from MALAYALAM SIGN AVAGRAHA
+ while (c < 0xd45) // MALAYALAM VOWEL SIGN VOCALIC RR
+ charset[i++] = c++;
+ charset[i++] = 0xd46; // MALAYALAM VOWEL SIGN E
+ charset[i++] = 0xd48; // MALAYALAM VOWEL SIGN AI
+ c = 0xd4a; // from MALAYALAM VOWEL SIGN O
+ while (c < 0xd4f) // MALAYALAM LETTER DOT REPH
+ charset[i++] = c++;
+ c = 0xd60; // from MALAYALAM LETTER VOCALIC RR
+ while (c < 0xd64) // MALAYALAM VOWEL SIGN VOCALIC LL
+ charset[i++] = c++;
+ c = 0xd66; // from MALAYALAM DIGIT ZERO
+ charset[i++] = c++;
+ c = 0xd79; // from MALAYALAM DATE MARK
+ while (c < 0xd80) // MALAYALAM LETTER CHILLU K
+ charset[i++] = c++;
+ charset[i++] = 0xd82; // SINHALA SIGN ANUSVARAYA
+ charset[i++] = 0xd83; // SINHALA SIGN VISARGAYA
+ c = 0xd85; // from SINHALA LETTER AYANNA
+ while (c < 0xd97) // SINHALA LETTER AUYANNA
+ charset[i++] = c++;
+ charset[i++] = c++;
+ while (c < 0xdbc) // SINHALA LETTER RAYANNA
+ charset[i++] = c++;
+ c = 0xdc0; // from SINHALA LETTER VAYANNA
+ while (c < 0xdc7) // SINHALA LETTER FAYANNA
+ charset[i++] = c++;
+ c = 0xdcf; // from SINHALA VOWEL SIGN AELA-PILLA
+ charset[i++] = c++;
+ charset[i++] = c++;
+ charset[i++] = 0xdf2; // SINHALA VOWEL SIGN DIGA GAETTA-PILLA
+ charset[i++] = 0xdf4; // SINHALA PUNCTUATION KUNDDALIYA
+ c = 0xe01; // from THAI CHARACTER KO KAI
+ while (c < 0xe3b) // THAI CHARACTER PHINTHU
+ charset[i++] = c++;
+ c = 0xe3f; // from THAI CURRENCY SYMBOL BAHT
+ while (c < 0xe5c) // THAI CHARACTER KHOMUT
+ charset[i++] = c++;
+ charset[i++] = 0xe81; // LAO LETTER KO
+ charset[i++] = 0xe82; // LAO LETTER KHO SUNG
+ charset[i++] = 0xe87; // LAO LETTER NGO
+ charset[i++] = 0xe88; // LAO LETTER CO
+ c = 0xe94; // from LAO LETTER DO
+ while (c < 0xe98) // LAO LETTER THO TAM
+ charset[i++] = c++;
+ c = 0xe99; // from LAO LETTER NO
+ while (c < 0xea0) // LAO LETTER FO SUNG
+ charset[i++] = c++;
+ charset[i++] = 0xea1; // LAO LETTER MO
+ charset[i++] = 0xea3; // LAO LETTER LO LING
+ charset[i++] = 0xeaa; // LAO LETTER SO SUNG
+ charset[i++] = 0xeab; // LAO LETTER HO SUNG
+ c = 0xead; // from LAO LETTER O
+ while (c < 0xeba) // LAO VOWEL SIGN UU
+ charset[i++] = c++;
+ charset[i++] = 0xebb; // LAO VOWEL SIGN MAI KON
+ charset[i++] = 0xebd; // LAO SEMIVOWEL SIGN NYO
+ c = 0xec0; // from LAO VOWEL SIGN E
+ while (c < 0xec5) // LAO VOWEL SIGN AI
+ charset[i++] = c++;
+ c = 0xec8; // from LAO TONE MAI EK
+ while (c < 0xece) // LAO NIGGAHITA
+ charset[i++] = c++;
+ c = 0xed0; // from LAO DIGIT ZERO
+ while (c < 0xeda) // LAO DIGIT NINE
+ charset[i++] = c++;
+ charset[i++] = 0xedc; // LAO HO NO
+ charset[i++] = 0xedd; // LAO HO MO
+ c = 0xf00; // from TIBETAN SYLLABLE OM
+ while (c < 0xf48) // TIBETAN LETTER JA
+ charset[i++] = c++;
+ c = 0xf49; // from TIBETAN LETTER NYA
+ while (c < 0xf6d) // TIBETAN LETTER RRA
+ charset[i++] = c++;
+ c = 0xf71; // from TIBETAN VOWEL SIGN AA
+ while (c < 0xf98) // TIBETAN SUBJOINED LETTER JA
+ charset[i++] = c++;
+ charset[i++] = c++;
+ c = 0xfbe; // from TIBETAN KU RU KHA
+ while (c < 0xfcd) // TIBETAN SYMBOL NOR BU BZHI -KHYIL
+ charset[i++] = c++;
+ c = 0xfce; // from TIBETAN SIGN RDEL NAG RDEL DKAR
+ charset[i++] = c++;
+ c = 0x1000; // from MYANMAR LETTER KA
+ while (c < 0x10c6) // GEORGIAN CAPITAL LETTER HOE
+ charset[i++] = c++;
+ c = 0x10d0; // from GEORGIAN LETTER AN
+ while (c < 0x10fd) // MODIFIER LETTER GEORGIAN NAR
+ charset[i++] = c++;
+ c = 0x1100; // from HANGUL CHOSEONG KIYEOK
+ while (c < 0x1249) // ETHIOPIC SYLLABLE QWA
+ charset[i++] = c++;
+ c = 0x124a; // from ETHIOPIC SYLLABLE QWI
+ while (c < 0x124e) // ETHIOPIC SYLLABLE QWE
+ charset[i++] = c++;
+ c = 0x1250; // from ETHIOPIC SYLLABLE QHA
+ while (c < 0x1257) // ETHIOPIC SYLLABLE QHO
+ charset[i++] = c++;
+ c = 0x125a; // from ETHIOPIC SYLLABLE QHWI
+ while (c < 0x125e) // ETHIOPIC SYLLABLE QHWE
+ charset[i++] = c++;
+ c = 0x1260; // from ETHIOPIC SYLLABLE BA
+ while (c < 0x1289) // ETHIOPIC SYLLABLE XWA
+ charset[i++] = c++;
+ c = 0x128a; // from ETHIOPIC SYLLABLE XWI
+ while (c < 0x128e) // ETHIOPIC SYLLABLE XWE
+ charset[i++] = c++;
+ c = 0x1290; // from ETHIOPIC SYLLABLE NA
+ while (c < 0x12b1) // ETHIOPIC SYLLABLE KWA
+ charset[i++] = c++;
+ c = 0x12b2; // from ETHIOPIC SYLLABLE KWI
+ while (c < 0x12b6) // ETHIOPIC SYLLABLE KWE
+ charset[i++] = c++;
+ c = 0x12b8; // from ETHIOPIC SYLLABLE KXA
+ while (c < 0x12bf) // ETHIOPIC SYLLABLE KXO
+ charset[i++] = c++;
+ c = 0x12c2; // from ETHIOPIC SYLLABLE KXWI
+ while (c < 0x12c6) // ETHIOPIC SYLLABLE KXWE
+ charset[i++] = c++;
+ c = 0x12c8; // from ETHIOPIC SYLLABLE WA
+ while (c < 0x12d7) // ETHIOPIC SYLLABLE PHARYNGEAL O
+ charset[i++] = c++;
+ c = 0x12d8; // from ETHIOPIC SYLLABLE ZA
+ while (c < 0x1311) // ETHIOPIC SYLLABLE GWA
+ charset[i++] = c++;
+ c = 0x1312; // from ETHIOPIC SYLLABLE GWI
+ while (c < 0x1316) // ETHIOPIC SYLLABLE GWE
+ charset[i++] = c++;
+ c = 0x1318; // from ETHIOPIC SYLLABLE GGA
+ while (c < 0x135b) // ETHIOPIC SYLLABLE FYA
+ charset[i++] = c++;
+ while (c < 0x137d) // ETHIOPIC NUMBER TEN THOUSAND
+ charset[i++] = c++;
+ while (c < 0x139a) // ETHIOPIC TONAL MARK KURT
+ charset[i++] = c++;
+ c = 0x13a0; // from CHEROKEE LETTER A
+ while (c < 0x13f5) // CHEROKEE LETTER YV
+ charset[i++] = c++;
+ c = 0x1400; // from CANADIAN SYLLABICS HYPHEN
+ while (c < 0x169d) // OGHAM REVERSED FEATHER MARK
+ charset[i++] = c++;
+ c = 0x16a0; // from RUNIC LETTER FEHU FEOH FE F
+ while (c < 0x16f1) // RUNIC BELGTHOR SYMBOL
+ charset[i++] = c++;
+ c = 0x1700; // from TAGALOG LETTER A
+ while (c < 0x170d) // TAGALOG LETTER YA
+ charset[i++] = c++;
+ c = 0x170e; // from TAGALOG LETTER LA
+ while (c < 0x1715) // TAGALOG SIGN VIRAMA
+ charset[i++] = c++;
+ c = 0x1720; // from HANUNOO LETTER A
+ charset[i++] = c++;
+ c = 0x1740; // from BUHID LETTER A
+ while (c < 0x1754) // BUHID VOWEL SIGN U
+ charset[i++] = c++;
+ c = 0x1760; // from TAGBANWA LETTER A
+ while (c < 0x176d) // TAGBANWA LETTER YA
+ charset[i++] = c++;
+ charset[i++] = 0x176e; // TAGBANWA LETTER LA
+ charset[i++] = 0x1770; // TAGBANWA LETTER SA
+ charset[i++] = 0x1772; // TAGBANWA VOWEL SIGN I
+ charset[i++] = 0x1773; // TAGBANWA VOWEL SIGN U
+ c = 0x1780; // from KHMER LETTER KA
+ while (c < 0x17de) // KHMER SIGN ATTHACAN
+ charset[i++] = c++;
+ c = 0x17e0; // from KHMER DIGIT ZERO
+ while (c < 0x17ea) // KHMER DIGIT NINE
+ charset[i++] = c++;
+ c = 0x17f0; // from KHMER SYMBOL LEK ATTAK SON