Skip to content

Commit

Permalink
Add script to download Hunspell dictionaries
Browse files Browse the repository at this point in the history
  • Loading branch information
ljacqu committed Jun 18, 2024
1 parent 38e530f commit 360f92b
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 18 deletions.
23 changes: 23 additions & 0 deletions dict/download_dictionaries.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Downloads some dictionaries from GitHub.
# If on Windows, this can be run with Git Bash

curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Bulgarian.dic" # bg
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Czech.dic" # cs
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Danish.dic" # da
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/German_de_DE.dic" # de
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/English%20(American).dic" # en
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Basque.dic" # eu
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Spanish.dic" # es
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Finnish.dic" # fi
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/French.dic" # fr
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Hungarian.dic" # hu
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Italian.dic" # it
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Norwegian%20(Bokmal).dic" # nb
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Dutch.dic" # nl
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Norwegian%20(Nynorsk).dic" # nn
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Polish.dic" # pl
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Portuguese%20(European).dic" # pt
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Russian.dic" # ru
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Serbian%20(Cyrillic).dic" # sr-cyrl
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Serbian%20(Latin).dic" # sr-lat
curl -O "https://raw.githubusercontent.com/titoBouzout/Dictionaries/master/Turkish.dic" # tr
27 changes: 9 additions & 18 deletions src/main/java/ch/jalu/wordeval/DictionaryRenamer.java
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
package ch.jalu.wordeval;

import com.google.common.collect.ImmutableMap;
import com.google.common.io.Files;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.ArrayUtils;

import java.io.File;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;

/**
* Utility class to rename the dictionaries from the Hunspell
Expand All @@ -17,7 +18,7 @@
public class DictionaryRenamer {

private static final File DICT_DIRECTORY = new File("./dict");
private static final String[] USE_EXTENSIONS = { ".aff", ".dic", ".txt" };
private static final Set<String> USE_EXTENSIONS = Set.of("aff", "dic", "txt");
private static final Map<String, String> REPLACEMENTS = initReplacements();

private DictionaryRenamer() {
Expand All @@ -37,33 +38,22 @@ public static void main(String[] args) {
throw new IllegalStateException("Could not read files from dictionary " + DICT_DIRECTORY);
}
Arrays.stream(files)
.filter(file -> ArrayUtils.contains(USE_EXTENSIONS, getExtension(file)))
.filter(file -> USE_EXTENSIONS.contains(Files.getFileExtension(file.getName())))
.forEach(file -> applyReplacement(file));

log.info("End renaming files");
}

private static String getExtension(File f) {
int lastIndex = f.getName().lastIndexOf('.');
return lastIndex > -1 ? f.getName().substring(lastIndex) : null;
}

private static String getFileName(File f) {
int lastIndex = f.getName().lastIndexOf('.');
return lastIndex > -1
? f.getName().substring(0, lastIndex)
: f.getName();
}

private static void applyReplacement(File f) {
String fileName = getFileName(f);
String fileName = Files.getNameWithoutExtension(f.getName()).replace("%20", " ");

if (!REPLACEMENTS.containsKey(fileName)) {
log.info("No replacement for '{}'", fileName);
return;
}

String newName = fileName.replace(fileName, REPLACEMENTS.get(fileName))
+ getExtension(f);
+ "." + Files.getFileExtension(f.getName());
File newFile = new File(DICT_DIRECTORY + File.separator + newName);
if (newFile.exists() && !newFile.isDirectory()) {
log.warn("Not renaming '{}' to '{}': file with such name already exists", fileName, newName);
Expand All @@ -74,7 +64,7 @@ private static void applyReplacement(File f) {
} else {
log.warn("Could not rename '{}' to '{}'", fileName, newName);
}
}
}
}

private static Map<String, String> initReplacements() {
Expand All @@ -91,6 +81,7 @@ private static Map<String, String> initReplacements() {
.put("English (British)", "en-uk")
.put("English (Canadian)", "en-ca")
.put("Estonian", "et")
.put("Finnish", "fi")
.put("French", "fr")
.put("Galego", "gl")
.put("German", "de")
Expand Down

0 comments on commit 360f92b

Please sign in to comment.