New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
New feature request: support incremental search by Chinese PinYin #253
Comments
Sorry, I don't know anything about it nor such dll usage. So how would this work? You type: I can imagine a translation table, but not much else although I'm not sure how to do that. Wouldn't this be a solution: Part1: 扽 so when you type wo3 you'll find the snippet because wo3 is in part2? |
Working AutoHotkey v1.1+ code #Requires AutoHotkey v1.1.31+
string:="教师节快乐 我 sfla lfalksdf lkads flk a"
TCMatchPath:=A_ScriptDir . "\tcmatch64.dll"
g_TCMatchModule := DllCall("LoadLibrary", "Str", TCMatchPath, "Ptr")
a:= TCMatch(string,"w j")
msgbox % a "`n" TCMatchPath
DllCall("FreeLibrary", "Ptr", g_TCMatchModule) ; free memory
g_TCMatchModule := ""
return
TCMatch(aHaystack, aNeedle)
{
if (A_PtrSize == 8)
return DllCall("TCMatch64\MatchFileW", "WStr", aNeedle, "WStr", aHaystack)
return DllCall("TCMatch\MatchFileW", "WStr", aNeedle, "WStr", aHaystack)
} With working ini with [general]
simple_search_activate_char=
; 简单搜索前导符号 regex_search_activate_char=?
; 正则搜索前导符号 leven_search_activate_char=<
srch_activate_char=*
preset_activate_char=>
; 加载搜索模版前导符号 simple_search_match_beginning_activate_char=^
and_separator_char=" "
; 与 关系符号 or_separator_char=|
; 或 关系符号 wdx_separator_char=/
negate_char=!
case_sensitive=0
; 大小写敏感 allow_empty_result=0
filter_files_and_folders=3
match_beginning=0
; 从第几个字符开始匹配 use_pinyin=1
; 使用中文 use_korean=0
; 使用韩文 [gui]
override_search=1
invert_result=0
one_line_gui=1
show_presets=0
regex_search_activate_char=?
leven_search_activate_char=<
simple_search_match_beginning_activate_char=^
or_separator_char=|
wdx_separator_char=/
allow_empty_result=1
use_pinyin=1
use_korean=0
[presets]
e=.exe|.bat|.com|.scr|.lnk
; 搜索模版,输入 >e 即可搜索对应字符串 [replace]
chars1=》|>
; 搜索前先替换字符,可用于中文输入法没切换的情况 chars2=?|?
[wdx]
debug_output=1
wdx_cache=1000
[gui]
override_search=1
invert_result=0 |
Not sure what this AutoHotkey solution is https://www-autohotkey-com.translate.goog/boards/viewtopic.php?f=27&t=4255&p=24114&hilit=pinyin&_x_tr_sl=auto&_x_tr_tl=en&_x_tr_hl=en&_x_tr_pto=wapp#p24114 |
Does this do the same as tcmatch? The forum thread above searches for words as far as I can understand how it works. #NoEnv
#SingleInstance, force
FileRead, Uni2Pinyin, Uni2Pinyin.db
; Source of Uni2Pinyin: https://raw.githubusercontent.com/yangyangwithgnu/hanz2piny/master/refer/Uni2Pinyin
; Save file as Uni2Pinyin.db
Text:="教师节快乐 我 sfla lfalksdf lkads flk a"
find:="w j"
MsgBox, % PinyinSearch(Text,find)
ExitApp
PinyinSearch(haystack,needle)
{
Global Uni2Pinyin
FoundCount:=0
Search:=StrSplit(Needle," ")
for k, v in Search
{
Loop, Parse, haystack
{
SetFormat, Integer, HEX
ChrCode := Asc(A_LoopField)
SetFormat, Integer, D
If ChrCode between 0x3007 and 0x9FA5
{
If RegexMatch(Uni2Pinyin, "im`n)^" SubStr(ChrCode, 3) "\t" v)
FoundCount++
}
If (FoundCount = Search.length())
Return 1
}
}
Return 0
} |
I've converted the hex values in the http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/CJK.html Uni2Pinyin table to Decimal values so the SetFormat Conversion + SubStr used above are no longer needed. 📄 #NoEnv
#SingleInstance, force
FileRead, PinYinTable, DecimalPinYinData.txt
Text:="教师节快乐, 我, 丌, 1234, abcde"
find:="ji wo3"
MsgBox, % PinYinSearch(Text,find)
ExitApp
PinYinSearch(haystack,needle)
{
Global PinYinTable
FoundCount:=0
Search:=StrSplit(Needle," ")
for k, v in Search
{
Loop, Parse, haystack
{
ChrCode := Ord(A_LoopField)
If ChrCode between 12295 and 40869
{
If RegexMatch(PinYinTable, "im`n)^" ChrCode "\t" v)
FoundCount++
}
If (FoundCount = Search.length())
Return 1
}
}
Return 0
} |
Thanks for listening and provide the solution. ;https://www.autohotkey.com/boards/viewtopic.php?f=27&t=1629
MsgBox, % zh2py("二级汉字 -> 廿") ; 输出“EJHZ -> N”
Return
; 从 php 转换而来的 (http://www.sjyhome.com/php/201311170606.html)
zh2py(str)
{
; 根据汉字区位表,(http://www.mytju.com/classcode/tools/QuWeiMa_FullList.asp)
; 我们可以看到从16-55区之间是按拼音字母排序的,所以我们只需要判断某个汉字的区位码就可以得知它的拼音首字母.
; 区位表第一部份,按拼音字母排序的.
; 16区-55区
/*
'A'=>0xB0A1, 'B'=>0xB0C5, 'C'=>0xB2C1, 'D'=>0xB4EE, 'E'=>0xB6EA, 'F'=>0xB7A2, 'G'=>0xB8C1,'H'=>0xB9FE,
'J'=>0xBBF7, 'K'=>0xBFA6, 'L'=>0xC0AC, 'M'=>0xC2E8, 'N'=>0xC4C3, 'O'=>0xC5B6, 'P'=>0xC5BE,'Q'=>0xC6DA,
'R'=>0xC8BB, 'S'=>0xC8F6, 'T'=>0xCBFA, 'W'=>0xCDDA, 'X'=>0xCEF4, 'Y'=>0xD1B9, 'Z'=>0xD4D1
*/
static FirstTable := [ 0xB0C5, 0xB2C1, 0xB4EE, 0xB6EA, 0xB7A2, 0xB8C1, 0xB9FE, 0xBBF7, 0xBFA6, 0xC0AC, 0xC2E8
, 0xC4C3, 0xC5B6, 0xC5BE, 0xC6DA, 0xC8BB, 0xC8F6, 0xCBFA, 0xCDDA, 0xCEF4, 0xD1B9, 0xD4D1, 0xD7FA ]
static FirstLetter := StrSplit("ABCDEFGHJKLMNOPQRSTWXYZ")
; 区位表第二部份,不规则的,下面的字母是每个区里面对应字的拼音首字母.从网上查询整理出来的,可能会有部份错误.
; 56区-87区
static SecondTable := [ StrSplit("CJWGNSPGCGNEGYPBTYYZDXYKYGTZJNMJQMBSGZSCYJSYYFPGKBZGYDYWJKGKLJSWKPJQHYJWRDZLSYMRYPYWWCCKZNKYYG")
, StrSplit("TTNGJEYKKZYTCJNMCYLQLYPYSFQRPZSLWBTGKJFYXJWZLTBNCXJJJJTXDTTSQZYCDXXHGCKBPHFFSSTYBGMXLPBYLLBHLX")
, StrSplit("SMZMYJHSOJNGHDZQYKLGJHSGQZHXQGKXZZWYSCSCJXYEYXADZPMDSSMZJZQJYZCJJFWQJBDZBXGZNZCPWHWXHQKMWFBPBY")
, StrSplit("DTJZZKXHYLYGXFPTYJYYZPSZLFCHMQSHGMXXSXJYQDCSBBQBEFSJYHWWGZKPYLQBGLDLCDTNMAYDDKSSNGYCSGXLYZAYPN")
, StrSplit("PTSDKDYLHGYMYLCXPYCJNDQJWXQXFYYFJLEJPZRXCCQWQQSBZKYMGPLBMJRQCFLNYMYQMSQYRBCJTHZTQFRXQHXMQJCJLY")
, StrSplit("QGJMSHZKBSWYEMYLTXFSYDXWLYCJQXSJNQBSCTYHBFTDCYZDJWYGHQFRXWCKQKXEBPTLPXJZSRMEBWHJLBJSLYYSMDXLCL")
, StrSplit("QKXLHXJRZJMFQHXHWYWSBHTRXXGLHQHFNMGYKLDYXZPYLGGSMTCFBAJJZYLJTYANJGBJPLQGSZYQYAXBKYSECJSZNSLYZH")
, StrSplit("ZXLZCGHPXZHZNYTDSBCJKDLZAYFFYDLEBBGQYZKXGLDNDNYSKJSHDLYXBCGHXYPKDJMMZNGMMCLGWZSZXZJFZNMLZZTHCS")
, StrSplit("YDBDLLSCDDNLKJYKJSYCJLKWHQASDKNHCSGAGHDAASHTCPLCPQYBSZMPJLPCJOQLCDHJJYSPRCHNWJNLHLYYQYYWZPTCZG")
, StrSplit("WWMZFFJQQQQYXACLBHKDJXDGMMYDJXZLLSYGXGKJRYWZWYCLZMSSJZLDBYDCFCXYHLXCHYZJQSQQAGMNYXPFRKSSBJLYXY")
, StrSplit("SYGLNSCMHCWWMNZJJLXXHCHSYZSTTXRYCYXBYHCSMXJSZNPWGPXXTAYBGAJCXLYXDCCWZOCWKCCSBNHCPDYZNFCYYTYCKX")
, StrSplit("KYBSQKKYTQQXFCMCHCYKELZQBSQYJQCCLMTHSYWHMKTLKJLYCXWHEQQHTQKZPQSQSCFYMMDMGBWHWLGSLLYSDLMLXPTHMJ")
, StrSplit("HWLJZYHZJXKTXJLHXRSWLWZJCBXMHZQXSDZPSGFCSGLSXYMJSHXPJXWMYQKSMYPLRTHBXFTPMHYXLCHLHLZYLXGSSSSTCL")
, StrSplit("SLDCLRPBHZHXYYFHBMGDMYCNQQWLQHJJCYWJZYEJJDHPBLQXTQKWHLCHQXAGTLXLJXMSLJHTZKZJECXJCJNMFBYCSFYWYB")
, StrSplit("JZGNYSDZSQYRSLJPCLPWXSDWEJBJCBCNAYTWGMPAPCLYQPCLZXSBNMSGGFNZJJBZSFZYNTXHPLQKZCZWALSBCZJXSYZGWK")
, StrSplit("YPSGXFZFCDKHJGXTLQFSGDSLQWZKXTMHSBGZMJZRGLYJBPMLMSXLZJQQHZYJCZYDJWFMJKLDDPMJEGXYHYLXHLQYQHKYCW")
, StrSplit("CJMYYXNATJHYCCXZPCQLBZWWYTWBQCMLPMYRJCCCXFPZNZZLJPLXXYZTZLGDLTCKLYRZZGQTTJHHHJLJAXFGFJZSLCFDQZ")
, StrSplit("LCLGJDJZSNZLLJPJQDCCLCJXMYZFTSXGCGSBRZXJQQCTZHGYQTJQQLZXJYLYLBCYAMCSTYLPDJBYREGKLZYZHLYSZQLZNW")
, StrSplit("CZCLLWJQJJJKDGJZOLBBZPPGLGHTGZXYGHZMYCNQSYCYHBHGXKAMTXYXNBSKYZZGJZLQJTFCJXDYGJQJJPMGWGJJJPKQSB")
, StrSplit("GBMMCJSSCLPQPDXCDYYKYPCJDDYYGYWRHJRTGZNYQLDKLJSZZGZQZJGDYKSHPZMTLCPWNJYFYZDJCNMWESCYGLBTZZGMSS")
, StrSplit("LLYXYSXXBSJSBBSGGHFJLYPMZJNLYYWDQSHZXTYYWHMCYHYWDBXBTLMSYYYFSXJCBDXXLHJHFSSXZQHFZMZCZTQCXZXRTT")
, StrSplit("DJHNRYZQQMTQDMMGNYDXMJGDXCDYZBFFALLZTDLTFXMXQZDNGWQDBDCZJDXBZGSQQDDJCMBKZFFXMKDMDSYYSZCMLJDSYN")
, StrSplit("SPRSKMKMPCKLGTBQTFZSWTFGGLYPLLJZHGJJGYPZLTCSMCNBTJBQFKDHBYZGKPBBYMTDSSXTBNPDKLEYCJNYCDYKZTDHQH")
, StrSplit("SYZSCTARLLTKZLGECLLKJLQJAQNBDKKGHPJTZQKSECSHALQFMMGJNLYJBBTMLYZXDXJPLDLPCQDHZYCBZSCZBZMSLJFLKR")
, StrSplit("ZJSNFRGJHXPDHYJYBZGDLQCSEZGXLBLGYXTWMABCHECMWYJYZLLJJYHLGNDJLSLYGKDZPZXJYYZLWCXSZFGWYYDLYHCLJS")
, StrSplit("CMBJHBLYZLYCBLYDPDQYSXQZBYTDKYXJYYCNRJMPDJGKLCLJBCTBJDDBBLBLCZQRPYXJCJLZCSHLTOLJNMDDDLNGKATHQH")
, StrSplit("JHYKHEZNMSHRPHQQJCHGMFPRXHJGDYCHGHLYRZQLCYQJNZSQTKQJYMSZSWLCFQQQXYFGGYPTQWLMCRNFKKFSYYLQBMQAMM")
, StrSplit("MYXCTPSHCPTXXZZSMPHPSHMCLMLDQFYQXSZYJDJJZZHQPDSZGLSTJBCKBXYQZJSGPSXQZQZRQTBDKYXZKHHGFLBCSMDLDG")
, StrSplit("DZDBLZYYCXNNCSYBZBFGLZZXSWMSCCMQNJQSBDQSJTXXMBLTXZCLZSHZCXRQJGJYLXZFJPHYMZQQYDFQJJLZZNZJCDGZYG")
, StrSplit("CTXMZYSCTLKPHTXHTLBJXJLXSCDQXCBBTJFQZFSLTJBTKQBXXJJLJCHCZDBZJDCZJDCPRNPQCJPFCZLCLZXZDMXMPHJSGZ")
, StrSplit("GSZZQLYLWTJPFSYASMCJBTZYYCWMYTZSJJLJCQLWZMALBXYFBPNLSFHTGJWEJJXXGLLJSTGSHJQLZFKCGNNNSZFDEQFHBS")
, StrSplit("AQTGYLBXMMYGSZLDYDQMJJRGBJTKGDHGKBLQKBDMBYLXWCXYTTYBKMRTJZXQJBHLMHMJJZMQASLDCYXYQDLQCAFYWYXQHZ") ]
static nothing := VarSetCapacity(var, 2)
; 如果不包含中文字符,则直接返回原字符
if !RegExMatch(str, "[^\x{00}-\x{ff}]")
Return str
Loop, Parse, str
{
StrPut(A_LoopField, &var, "CP936")
H := NumGet(var, 0, "UChar")
L := NumGet(var, 1, "UChar")
; 字符集非法
if (H < 0xB0 || L < 0xA1 || H > 0xF7 || L = 0xFF)
{
newStr .= A_LoopField
Continue
}
if (H < 0xD8)//(H >= 0xB0 && H <=0xD7) ; 查询文字在一级汉字区(16-55)
{
W := (H << 8) | L
For key, value in FirstTable
{
if (W < value)
{
newStr .= FirstLetter[key]
Break
}
}
}
else ; if (H >= 0xD8 && H <= 0xF7) ; 查询中文在二级汉字区(56-87)
newStr .= SecondTable[ H - 0xD8 + 1 ][ L - 0xA1 + 1 ]
}
Return newStr
} |
Ah, ok, so you never search for
edit: probably a copy error, copying again and it seems to work |
So my idea for now would be:
|
Hi, Thanks for developing this wonderful app.
Is it OK for Lintalist to add the feature of doing incremental search by Chinese PinYin First letter.
E.g.: when the user input
w
, Chinese character我
whose PinYin iswo3
can be found.I think
tcmatch.dll
is good for this kind of indexing.https://valuex.github.io/2023/07/23/Autohotkey%E8%B0%83%E7%94%A8TCMatch.dll/
The text was updated successfully, but these errors were encountered: