/
pseudoYKT.xml
109 lines (98 loc) · 5 KB
/
pseudoYKT.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
<?xml version="1.0" encoding="UTF-8" ?>
<!-- $Id$ -->
<Module>
<ModulePrefs title="PseudoYKT to Unicode converter" height="400"
author="Mikhail Kyraha"
author_email="scriptdesigner+pseudoykt.gadget@gmail.com"
description="Converting tool from various pseudo encodings to Unicode"
screenshot=""
thumbnail="" />
<Content type="html"><![CDATA[
<form name="uniconvert" method="get" action="">
<input type="button" id="buttonConvert" value="Convert" onclick="convertText(this.form)" />
<label for="enc0"><input type="radio" name="encodingNum" id="enc0" value="0" checked="1"/> Auto</label>
<label for="enc1"><input type="radio" name="encodingNum" id="enc1" value="1" /> Times</label>
<label for="enc2"><input type="radio" name="encodingNum" id="enc2" value="2" /> Yak</label>
<label for="enc3"><input type="radio" name="encodingNum" id="enc3" value="3" /> Saxa</label>
<label for="enc4"><input type="radio" name="encodingNum" id="enc4" value="4" /> Dabyl</label>
<label for="enc5"><input type="radio" name="encodingNum" id="enc5" value="5" /> Lazurski</label>
<br />
<textarea id="bigText" style="width:100%;height:26em;"></textarea>
</form>
<script type="text/javascript">
// Magic vector, obtained statistically on correct texts
const frequencies = { "Ҕ":0, "ҕ":50, "Ҥ":0, "ҥ":39, "Ү":15, "ү":249, "Һ":0, "һ":127, "Ө":2, "ө":87 };
// Various mappings of correct (Unicode) encodings onto incorrect (private) ones
const mappings = [
{ "ҕ":"ҕ", "ҥ":"ҥ", "ө":"ө", "һ":"һ", "ү":"ү", "Ҕ":"Ҕ", "Ҥ":"Ҥ", "Ө":"Ө", "Һ":"Һ", "Ү":"Ү" }, // No convert
{ "ҕ":"±", "ҥ":"І", "ө":"і", "һ":"ґ", "ү":"µ", "Ҕ":"Ў", "Ҥ":"ў", "Ө":"Ј", "Һ":"¤", "Ү":"Ґ" }, // Times_uni ?ҔҤ?
{ "ҕ":"і", "ҥ":"µ", "ө":"є", "һ":"Ї", "ү":"ў", "Ҕ":"І", "Ҥ":"Μ", "Ө":"Є", "Һ":"ї", "Ү":"Ў" }, // Yak_uni ?ҔҤҺ?
{ "ҕ":"є", "ҥ":"Ѕ", "ө":"№", "һ":"»", "ү":"ј", "Ҕ":"Є", "Ҥ":String.fromCharCode(173), "Ө":"©", "Һ":"«", "Ү":"¬" }, // Saxa_uni ?ҔҤ?
{ "ҕ":"±", "ҥ":"²", "ө":"³", "һ":"´", "ү":"µ", "Ҕ":"¡", "Ҥ":"¢", "Ө":"£", "Һ":"¤", "Ү":"¥" }, // Dabyl
{ "ҕ":"±", "ҥ":"²", "ө":"³", "һ":"´", "ү":"°", "Ҕ":"¡", "Ҥ":"¢", "Ө":"£", "Һ":"¤", "Ү":"¥" }, // Lazurski
];
function convertText(formObj) {
var inputElm = formObj.getElementsByTagName("textarea")[0];
var radioObj = formObj.elements['encodingNum'];
var mapIndex;
for( var i=0; i<radioObj.length; i++ ) {
if(radioObj[i].checked) mapIndex = radioObj[i].value;
}
if( mapIndex == undefined || mapIndex == 0 ) {
// Try to guess mapIndex and check the corresponding radio button
mapIndex = analyzeEncoding(inputElm.value);
for( var i=0; i < radioObj.length; i++ ) {
radioObj[i].checked = radioObj[i].value == mapIndex
? true : false;
}
}
if( mapIndex > 0 ) inputElm.value = doConvert( inputElm.value, mapIndex );
}
function analyzeEncoding(current) {
var cosines = new Array();
for( var i = 0; i < mappings.length; i++ ) {
var occurrance = {};
for( var key in mappings[i] ) {
if( mappings[i].hasOwnProperty(key) ) {
var patt = new RegExp(mappings[i][key],"g");
var matches = current.match(patt);
occurrance[key] = matches ? matches.length : 0;
}
cosines[i] = cosineVec(occurrance,frequencies)
}
}
var maxcos=0, candidate=0;
for( i = 0; i < cosines.length; i++ ) {
if( cosines[i] > maxcos ) {
candidate = i;
maxcos = cosines[i];
}
}
return candidate;
}
function doConvert( current, mapIndex ) {
for( var key in mappings[mapIndex] ) {
var patt = new RegExp( mappings[mapIndex][key],"g" );
current = current.replace( patt, key );
}
return current;
}
function cosineVec( vec1, vec2 ) {
var ort = {};
var key, dot=0, magn1=0, magn2=0;
for( key in vec1 ) {
ort[key] = 1;
magn1 += vec1[key] * vec1[key];
}
for( key in vec2 ) {
ort[key] = 1;
magn2 += vec2[key] * vec2[key];
}
if( magn1 == 0 || magn2 == 0 ) return 0;
for( key in ort ) dot += (vec1[key]||0)*(vec2[key]||0);
return dot / Math.sqrt( magn1 * magn2 );
}
</script>
]]>
</Content>
</Module>