Skip to content

Commit

Permalink
fix #144 and update Sweble version
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Jun 21, 2022
1 parent 4f80308 commit d5c02cb
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 15 deletions.
4 changes: 2 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,8 @@ dependencies {
implementation group: 'de.ruedigermoeller', name: 'fst', version: '2.50'

//Wikipedia
implementation group: 'org.sweble.wikitext', name: 'swc-parser-lazy', version: '3.1.5'
implementation group: 'org.sweble.wikitext', name: 'swc-engine', version: '3.1.5'
implementation group: 'org.sweble.wikitext', name: 'swc-parser-lazy', version: '3.1.9'
implementation group: 'org.sweble.wikitext', name: 'swc-engine', version: '3.1.9'

//XML
implementation group: 'com.thoughtworks.xstream', name: 'xstream', version: '1.4.19'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ protected void addNamespaces(WikiConfigImpl c)
"Gadget definition talk",
false,
false,
new ArrayList<String>()));
new ArrayList<String>()));

c.setDefaultNamespace(c.getNamespace(0));
c.setTemplateNamespace(c.getNamespace(10));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4035,12 +4035,12 @@ protected void addI18nAliases(WikiConfigImpl c) {
"time",
false,
Arrays.asList("#time:")));
/*

c.addI18nAlias(new I18nAliasImpl(
"timel",
false,
Arrays.asList("timel")));
c.addI18nAlias(new I18nAliasImpl(
/*c.addI18nAlias(new I18nAliasImpl(
"rel2abs",
false,
Arrays.asList("rel2abs")));
Expand All @@ -4049,12 +4049,11 @@ protected void addI18nAliases(WikiConfigImpl c) {
"titleparts",
false,
Arrays.asList("#titleparts:")));
/*
c.addI18nAlias(new I18nAliasImpl(
"convert",
false,
Arrays.asList("convert")));
c.addI18nAlias(new I18nAliasImpl(
/*c.addI18nAlias(new I18nAliasImpl(
"sourceunit",
false,
Arrays.asList("#sourceunit")));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.sweble.wikitext.parser.nodes.WtItalics;
import org.sweble.wikitext.parser.nodes.WtListItem;
import org.sweble.wikitext.parser.nodes.WtNode;
import org.sweble.wikitext.parser.nodes.WtName;
import org.sweble.wikitext.parser.nodes.WtNodeList;
import org.sweble.wikitext.parser.nodes.WtOrderedList;
import org.sweble.wikitext.parser.nodes.WtPageSwitch;
Expand Down Expand Up @@ -361,24 +362,36 @@ public void visit(WtXmlElement e) {
}
}

// Stuff we want to hide

public void visit(WtImageLink n) {
public void visit(WtTemplate n) {
System.out.println("processing template: "+n.getName());
WtName templateName = n.getName();
String templateNameString = templateName.getAsString();

// afaik templates are very ad hoc, so we only want to keep the argument values of some of them for proper
// text serialization
if (templateNameString != null && templateToKeep(templateNameString) && n.getArgs() != null) {
iterate(n.getArgs());
}
}

public void visit(WtIllegalCodePoint n) {
public void visit(WtTemplateArgument n) {
if (n.getValue() != null)
iterate(n.getValue());

}

public void visit(WtXmlComment n) {
// Stuff we want to hide

public void visit(WtTemplateParameter n) {
}

public void visit(WtTemplate n) {
public void visit(WtImageLink n) {
}

public void visit(WtTemplateArgument n) {
public void visit(WtIllegalCodePoint n) {
}

public void visit(WtTemplateParameter n) {
public void visit(WtXmlComment n) {
}

public void visit(WtTagExtension n) {
Expand Down Expand Up @@ -464,4 +477,11 @@ private void write(char ch) {
private void write(int num) {
writeWord(String.valueOf(num));
}

private boolean templateToKeep(String templateNameString) {
//if (templateNameString.indexOf("date") != -1 || templateNameString.equals("MSAPI"))
if (templateNameString.indexOf("date") != -1)
return true;
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -209,4 +209,19 @@ public void testWikiMediaTextWithInternalLinksArticlesOnlyPt() throws Exception
assertThat(result.trim(), startsWith("[[Imagem:Scriptorium.jpg"));
}

@Test
public void testWikiMediaTextWithDatesFr() throws Exception {
InputStream is = this.getClass().getResourceAsStream("date_fr.txt");
String input = IOUtils.toString(is, UTF_8);
String result = mediaWikiParser.toTextWithInternalLinksArticlesOnly(input, "fr");

assertThat(result, containsString("[["));
assertThat(result, containsString("]]"));
assertThat(result, not(containsString("'''")));

assertThat(result.trim(), startsWith("Emmanuel Macron"));

System.out.println(result);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
'''Emmanuel Macron''' ({{MSAPI|/ɛmanɥɛl makʁɔ̃/}}<ref group="alpha">[[Prononciation du français|Prononciation]] en [[français de France]] [[Transcription phonétique|retranscrite]] selon la [[alphabet phonétique international|norme API]].</ref> {{prononciation|LL-Q150 (fra)-Fabricio Cardenas (Culex)-Emmanuel Macron.wav}}), né le {{date de naissance|21 décembre 1977}} à [[Amiens]] ([[France]]), est un [[Haute fonction publique française|haut fonctionnaire]] et [[homme d'État]] [[France|français]]. Il est [[président de la République française]] depuis le {{date-|14 mai 2017}}.

0 comments on commit d5c02cb

Please sign in to comment.