Skip to content

Commit

Permalink
fixed a bug in grep, make sure words starting with - are not valid.
Browse files Browse the repository at this point in the history
  • Loading branch information
nifgraup committed Mar 11, 2012
1 parent f96e7f1 commit 0dfad19
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion makedict.sh
Expand Up @@ -91,7 +91,7 @@ elif [ "$1" = "packages" ]; then
elif [ "$1" != "" ]; then
echo "Downloading files..."
test -e ${TMP}/${1}wiktionary-latest-pages-articles.xml || ( wget http://dumps.wikimedia.org/${1}wiktionary/latest/${1}wiktionary-latest-pages-articles.xml.bz2 -O ${TMP}/${1}wiktionary-latest-pages-articles.xml.bz2 && bunzip2 ${TMP}/${1}wiktionary-latest-pages-articles.xml.bz2 )
test -e ${TMP}/${1}wiktionary-latest-pages-articles.xml.texts || grep -o "{{[^.]*|[^-.][^}]*" ${TMP}/iswiktionary-latest-pages-articles.xml | grep -v "{{.*|.*[ =]" | sort | uniq > ${TMP}/iswiktionary-latest-pages-articles.xml.texts
test -e ${TMP}/${1}wiktionary-latest-pages-articles.xml.texts || grep -o "{{[^.|]*|[^-.][^}]*" ${TMP}/iswiktionary-latest-pages-articles.xml | grep -v "{{.*|.*[ =]" | sort | uniq > ${TMP}/iswiktionary-latest-pages-articles.xml.texts

echo "Extracting valid words from the wiktionary dump..."
rm -f ${TMP}/wiktionary.extracted
Expand Down

0 comments on commit 0dfad19

Please sign in to comment.