Permalink
Browse files

Gerard new file and suggestions

  • Loading branch information...
1 parent fac4546 commit 76ecb3f29c7235ab51d1b11276f0bbfc895bfe79 @arademaker arademaker committed Dec 7, 2011
Showing with 1,429,538 additions and 1,427,955 deletions.
  1. +15 −7 convert.sh
  2. +1 −0 header
  3. +0 −182,861 uwn-pt-sorted-a.xml
  4. +6,085 −0 uwn-pt-sorted-aa.xml
  5. +6,145 −0 uwn-pt-sorted-ab.xml
  6. +6,106 −0 uwn-pt-sorted-ac.xml
  7. +6,121 −0 uwn-pt-sorted-ad.xml
  8. +6,072 −0 uwn-pt-sorted-ae.xml
  9. +6,144 −0 uwn-pt-sorted-af.xml
  10. +6,106 −0 uwn-pt-sorted-ag.xml
  11. +6,108 −0 uwn-pt-sorted-ah.xml
  12. +6,116 −0 uwn-pt-sorted-ai.xml
  13. +6,134 −0 uwn-pt-sorted-aj.xml
  14. +6,121 −0 uwn-pt-sorted-ak.xml
  15. +6,087 −0 uwn-pt-sorted-al.xml
  16. +6,121 −0 uwn-pt-sorted-am.xml
  17. +6,111 −0 uwn-pt-sorted-an.xml
  18. +6,069 −0 uwn-pt-sorted-ao.xml
  19. +6,074 −0 uwn-pt-sorted-ap.xml
  20. +6,129 −0 uwn-pt-sorted-aq.xml
  21. +6,102 −0 uwn-pt-sorted-ar.xml
  22. +6,082 −0 uwn-pt-sorted-as.xml
  23. +6,112 −0 uwn-pt-sorted-at.xml
  24. +6,130 −0 uwn-pt-sorted-au.xml
  25. +6,073 −0 uwn-pt-sorted-av.xml
  26. +6,098 −0 uwn-pt-sorted-aw.xml
  27. +6,057 −0 uwn-pt-sorted-ax.xml
  28. +6,064 −0 uwn-pt-sorted-ay.xml
  29. +6,090 −0 uwn-pt-sorted-az.xml
  30. +0 −182,006 uwn-pt-sorted-b.xml
  31. +6,090 −0 uwn-pt-sorted-ba.xml
  32. +6,082 −0 uwn-pt-sorted-bb.xml
  33. +6,084 −0 uwn-pt-sorted-bc.xml
  34. +6,022 −0 uwn-pt-sorted-bd.xml
  35. +6,035 −0 uwn-pt-sorted-be.xml
  36. +6,040 −0 uwn-pt-sorted-bf.xml
  37. +6,053 −0 uwn-pt-sorted-bg.xml
  38. +6,042 −0 uwn-pt-sorted-bh.xml
  39. +6,042 −0 uwn-pt-sorted-bi.xml
  40. +6,037 −0 uwn-pt-sorted-bj.xml
  41. +6,047 −0 uwn-pt-sorted-bk.xml
  42. +6,070 −0 uwn-pt-sorted-bl.xml
  43. +6,078 −0 uwn-pt-sorted-bm.xml
  44. +6,117 −0 uwn-pt-sorted-bn.xml
  45. +6,132 −0 uwn-pt-sorted-bo.xml
  46. +6,108 −0 uwn-pt-sorted-bp.xml
  47. +6,148 −0 uwn-pt-sorted-bq.xml
  48. +6,138 −0 uwn-pt-sorted-br.xml
  49. +6,104 −0 uwn-pt-sorted-bs.xml
  50. +6,083 −0 uwn-pt-sorted-bt.xml
  51. +6,033 −0 uwn-pt-sorted-bu.xml
  52. +6,029 −0 uwn-pt-sorted-bv.xml
  53. +6,032 −0 uwn-pt-sorted-bw.xml
  54. +6,021 −0 uwn-pt-sorted-bx.xml
  55. +6,030 −0 uwn-pt-sorted-by.xml
  56. +6,040 −0 uwn-pt-sorted-bz.xml
  57. +0 −182,585 uwn-pt-sorted-c.xml
  58. +6,038 −0 uwn-pt-sorted-ca.xml
  59. +6,037 −0 uwn-pt-sorted-cb.xml
  60. +6,027 −0 uwn-pt-sorted-cc.xml
  61. +6,050 −0 uwn-pt-sorted-cd.xml
  62. +6,106 −0 uwn-pt-sorted-ce.xml
  63. +6,139 −0 uwn-pt-sorted-cf.xml
  64. +6,120 −0 uwn-pt-sorted-cg.xml
  65. +6,108 −0 uwn-pt-sorted-ch.xml
  66. +6,091 −0 uwn-pt-sorted-ci.xml
  67. +6,101 −0 uwn-pt-sorted-cj.xml
  68. +6,087 −0 uwn-pt-sorted-ck.xml
  69. +6,103 −0 uwn-pt-sorted-cl.xml
  70. +6,113 −0 uwn-pt-sorted-cm.xml
  71. +6,072 −0 uwn-pt-sorted-cn.xml
  72. +6,094 −0 uwn-pt-sorted-co.xml
  73. +6,108 −0 uwn-pt-sorted-cp.xml
  74. +6,098 −0 uwn-pt-sorted-cq.xml
  75. +6,152 −0 uwn-pt-sorted-cr.xml
  76. +6,065 −0 uwn-pt-sorted-cs.xml
  77. +6,110 −0 uwn-pt-sorted-ct.xml
  78. +6,071 −0 uwn-pt-sorted-cu.xml
  79. +6,097 −0 uwn-pt-sorted-cv.xml
  80. +6,110 −0 uwn-pt-sorted-cw.xml
  81. +6,063 −0 uwn-pt-sorted-cx.xml
  82. +6,093 −0 uwn-pt-sorted-cy.xml
  83. +6,101 −0 uwn-pt-sorted-cz.xml
  84. +0 −181,664 uwn-pt-sorted-d.xml
  85. +6,103 −0 uwn-pt-sorted-da.xml
  86. +6,087 −0 uwn-pt-sorted-db.xml
  87. +6,104 −0 uwn-pt-sorted-dc.xml
  88. +6,046 −0 uwn-pt-sorted-dd.xml
  89. +6,074 −0 uwn-pt-sorted-de.xml
  90. +6,078 −0 uwn-pt-sorted-df.xml
  91. +6,089 −0 uwn-pt-sorted-dg.xml
  92. +6,114 −0 uwn-pt-sorted-dh.xml
  93. +6,081 −0 uwn-pt-sorted-di.xml
  94. +6,024 −0 uwn-pt-sorted-dj.xml
  95. +6,063 −0 uwn-pt-sorted-dk.xml
  96. +6,067 −0 uwn-pt-sorted-dl.xml
  97. +6,077 −0 uwn-pt-sorted-dm.xml
  98. +6,105 −0 uwn-pt-sorted-dn.xml
  99. +6,159 −0 uwn-pt-sorted-do.xml
  100. +6,075 −0 uwn-pt-sorted-dp.xml
  101. +6,080 −0 uwn-pt-sorted-dq.xml
  102. +6,083 −0 uwn-pt-sorted-dr.xml
  103. +6,080 −0 uwn-pt-sorted-ds.xml
  104. +6,070 −0 uwn-pt-sorted-dt.xml
  105. +6,102 −0 uwn-pt-sorted-du.xml
  106. +6,066 −0 uwn-pt-sorted-dv.xml
  107. +6,070 −0 uwn-pt-sorted-dw.xml
  108. +6,044 −0 uwn-pt-sorted-dx.xml
  109. +6,067 −0 uwn-pt-sorted-dy.xml
  110. +6,070 −0 uwn-pt-sorted-dz.xml
  111. +0 −181,473 uwn-pt-sorted-e.xml
  112. +6,078 −0 uwn-pt-sorted-ea.xml
  113. +6,055 −0 uwn-pt-sorted-eb.xml
  114. +6,027 −0 uwn-pt-sorted-ec.xml
  115. +6,039 −0 uwn-pt-sorted-ed.xml
  116. +6,045 −0 uwn-pt-sorted-ee.xml
  117. +6,022 −0 uwn-pt-sorted-ef.xml
  118. +6,042 −0 uwn-pt-sorted-eg.xml
  119. +6,042 −0 uwn-pt-sorted-eh.xml
  120. +6,027 −0 uwn-pt-sorted-ei.xml
  121. +6,037 −0 uwn-pt-sorted-ej.xml
  122. +6,026 −0 uwn-pt-sorted-ek.xml
  123. +6,033 −0 uwn-pt-sorted-el.xml
  124. +6,033 −0 uwn-pt-sorted-em.xml
  125. +6,026 −0 uwn-pt-sorted-en.xml
  126. +6,033 −0 uwn-pt-sorted-eo.xml
  127. +6,031 −0 uwn-pt-sorted-ep.xml
  128. +6,065 −0 uwn-pt-sorted-eq.xml
  129. +6,041 −0 uwn-pt-sorted-er.xml
  130. +6,027 −0 uwn-pt-sorted-es.xml
  131. +6,045 −0 uwn-pt-sorted-et.xml
  132. +6,051 −0 uwn-pt-sorted-eu.xml
  133. +6,041 −0 uwn-pt-sorted-ev.xml
  134. +6,053 −0 uwn-pt-sorted-ew.xml
  135. +6,053 −0 uwn-pt-sorted-ex.xml
  136. +6,048 −0 uwn-pt-sorted-ey.xml
  137. +6,036 −0 uwn-pt-sorted-ez.xml
  138. +0 −182,201 uwn-pt-sorted-f.xml
  139. +6,058 −0 uwn-pt-sorted-fa.xml
  140. +6,050 −0 uwn-pt-sorted-fb.xml
  141. +6,055 −0 uwn-pt-sorted-fc.xml
  142. +6,064 −0 uwn-pt-sorted-fd.xml
  143. +6,052 −0 uwn-pt-sorted-fe.xml
  144. +6,059 −0 uwn-pt-sorted-ff.xml
  145. +6,054 −0 uwn-pt-sorted-fg.xml
  146. +6,048 −0 uwn-pt-sorted-fh.xml
  147. +6,047 −0 uwn-pt-sorted-fi.xml
  148. +6,052 −0 uwn-pt-sorted-fj.xml
  149. +6,059 −0 uwn-pt-sorted-fk.xml
  150. +6,050 −0 uwn-pt-sorted-fl.xml
  151. +6,043 −0 uwn-pt-sorted-fm.xml
  152. +6,042 −0 uwn-pt-sorted-fn.xml
  153. +6,061 −0 uwn-pt-sorted-fo.xml
  154. +6,056 −0 uwn-pt-sorted-fp.xml
  155. +6,055 −0 uwn-pt-sorted-fq.xml
  156. +6,061 −0 uwn-pt-sorted-fr.xml
  157. +6,047 −0 uwn-pt-sorted-fs.xml
  158. +6,075 −0 uwn-pt-sorted-ft.xml
  159. +6,075 −0 uwn-pt-sorted-fu.xml
  160. +6,059 −0 uwn-pt-sorted-fv.xml
  161. +6,077 −0 uwn-pt-sorted-fw.xml
  162. +6,090 −0 uwn-pt-sorted-fx.xml
  163. +6,080 −0 uwn-pt-sorted-fy.xml
  164. +6,080 −0 uwn-pt-sorted-fz.xml
  165. +0 −182,220 uwn-pt-sorted-g.xml
  166. +6,098 −0 uwn-pt-sorted-ga.xml
  167. +6,146 −0 uwn-pt-sorted-gb.xml
  168. +6,102 −0 uwn-pt-sorted-gc.xml
  169. +6,115 −0 uwn-pt-sorted-gd.xml
  170. +6,118 −0 uwn-pt-sorted-ge.xml
  171. +6,107 −0 uwn-pt-sorted-gf.xml
  172. +6,071 −0 uwn-pt-sorted-gg.xml
  173. +6,118 −0 uwn-pt-sorted-gh.xml
  174. +6,108 −0 uwn-pt-sorted-gi.xml
  175. +6,089 −0 uwn-pt-sorted-gj.xml
  176. +6,090 −0 uwn-pt-sorted-gk.xml
  177. +6,061 −0 uwn-pt-sorted-gl.xml
  178. +6,145 −0 uwn-pt-sorted-gm.xml
  179. +6,446 −0 uwn-pt-sorted-gn.xml
  180. +6,059 −0 uwn-pt-sorted-go.xml
  181. +6,073 −0 uwn-pt-sorted-gp.xml
  182. +6,073 −0 uwn-pt-sorted-gq.xml
  183. +6,081 −0 uwn-pt-sorted-gr.xml
  184. +6,070 −0 uwn-pt-sorted-gs.xml
  185. +6,038 −0 uwn-pt-sorted-gt.xml
  186. +6,027 −0 uwn-pt-sorted-gu.xml
  187. +6,018 −0 uwn-pt-sorted-gv.xml
  188. +6,023 −0 uwn-pt-sorted-gw.xml
  189. +6,028 −0 uwn-pt-sorted-gx.xml
  190. +6,023 −0 uwn-pt-sorted-gy.xml
  191. +6,060 −0 uwn-pt-sorted-gz.xml
  192. +0 −152,938 uwn-pt-sorted-h.xml
  193. +6,183 −0 uwn-pt-sorted-ha.xml
  194. +6,136 −0 uwn-pt-sorted-hb.xml
  195. +6,105 −0 uwn-pt-sorted-hc.xml
  196. +6,138 −0 uwn-pt-sorted-hd.xml
  197. +6,120 −0 uwn-pt-sorted-he.xml
  198. +6,073 −0 uwn-pt-sorted-hf.xml
  199. +6,132 −0 uwn-pt-sorted-hg.xml
  200. +6,229 −0 uwn-pt-sorted-hh.xml
  201. +6,191 −0 uwn-pt-sorted-hi.xml
  202. +6,256 −0 uwn-pt-sorted-hj.xml
  203. +6,127 −0 uwn-pt-sorted-hk.xml
  204. +6,090 −0 uwn-pt-sorted-hl.xml
  205. +6,093 −0 uwn-pt-sorted-hm.xml
  206. +6,065 −0 uwn-pt-sorted-hn.xml
  207. +6,061 −0 uwn-pt-sorted-ho.xml
  208. +6,065 −0 uwn-pt-sorted-hp.xml
  209. +6,089 −0 uwn-pt-sorted-hq.xml
  210. +6,038 −0 uwn-pt-sorted-hr.xml
  211. +6,050 −0 uwn-pt-sorted-hs.xml
  212. +6,025 −0 uwn-pt-sorted-ht.xml
  213. +6,042 −0 uwn-pt-sorted-hu.xml
  214. +6,045 −0 uwn-pt-sorted-hv.xml
  215. +6,052 −0 uwn-pt-sorted-hw.xml
  216. +6,047 −0 uwn-pt-sorted-hx.xml
  217. +6,083 −0 uwn-pt-sorted-hy.xml
  218. +6,067 −0 uwn-pt-sorted-hz.xml
  219. +6,057 −0 uwn-pt-sorted-ia.xml
  220. +6,035 −0 uwn-pt-sorted-ib.xml
  221. +6,088 −0 uwn-pt-sorted-ic.xml
  222. +6,048 −0 uwn-pt-sorted-id.xml
  223. +6,032 −0 uwn-pt-sorted-ie.xml
  224. +6,021 −0 uwn-pt-sorted-if.xml
  225. +6,026 −0 uwn-pt-sorted-ig.xml
  226. +6,032 −0 uwn-pt-sorted-ih.xml
  227. +6,026 −0 uwn-pt-sorted-ii.xml
  228. +6,034 −0 uwn-pt-sorted-ij.xml
  229. +6,047 −0 uwn-pt-sorted-ik.xml
  230. +6,024 −0 uwn-pt-sorted-il.xml
  231. +6,031 −0 uwn-pt-sorted-im.xml
  232. +6,039 −0 uwn-pt-sorted-in.xml
  233. +6,042 −0 uwn-pt-sorted-io.xml
  234. +6,053 −0 uwn-pt-sorted-ip.xml
  235. +6,059 −0 uwn-pt-sorted-iq.xml
  236. +6,056 −0 uwn-pt-sorted-ir.xml
  237. +6,069 −0 uwn-pt-sorted-is.xml
  238. +6,046 −0 uwn-pt-sorted-it.xml
  239. +6,029 −0 uwn-pt-sorted-iu.xml
  240. +6,030 −0 uwn-pt-sorted-iv.xml
  241. +6,033 −0 uwn-pt-sorted-iw.xml
  242. +6,058 −0 uwn-pt-sorted-ix.xml
  243. +6,049 −0 uwn-pt-sorted-iy.xml
  244. +6,063 −0 uwn-pt-sorted-iz.xml
  245. +6,041 −0 uwn-pt-sorted-ja.xml
  246. +1,934 −0 uwn-pt-sorted-jb.xml
  247. BIN uwn-pt-sorted.tsv.gz
View
@@ -1,26 +1,34 @@
-split -a 1 -l 15000 uwn-pt-sorted.tsv uwn-pt-sorted-
-head -1 uwn-pt-sorted-a > header
+# before the script
+# head -1 uwn-pt-sorted-a > header
+# edit header
-# add header
-for f in uwn-pt-sorted-[b-h]; do
+split -a 2 -l 500 uwn-pt-sorted.tsv uwn-pt-sorted-
+
+# add header to all files without header
+tail +2 uwn-pt-sorted-aa > aa.tmp
+mv aa.tmp uwn-pt-sorted-aa
+for f in uwn-pt-sorted-??; do
cat header $f >> $f.new;
mv $f.new $f
done
+wc -l uwn-pt-sorted-??
+wc -l uwn-pt-sorted.tsv
+
# convert to XML
-for f in uwn-pt-sorted-?; do
+for f in uwn-pt-sorted-??; do
python convert.py $f $f.xml
done
# formating the XML
-for f in uwn-pt-sorted-?.xml; do
+for f in uwn-pt-sorted-??.xml; do
tidy -utf8 -wrap 150 -xml -i $f 2> `basename $f .xml`.error > $f.new
mv $f.new $f
done
# remove temp files
-rm uwn-pt-sorted-? header uwn-pt-sorted-?.error
+rm uwn-pt-sorted-?? uwn-pt-sorted-??.error
View
@@ -0,0 +1 @@
+BC WN-3.0-Synset PT-Words-Man PT-Word-Cand EN-Gloss EN-Words PT-Gloss PT-Gloss-Sug SPA-Words-Sug Comments
Oops, something went wrong.

0 comments on commit 76ecb3f

Please sign in to comment.