diff --git a/app/src/main/java/com/github/mzule/androidweekly/api/ArticleApi.java b/app/src/main/java/com/github/mzule/androidweekly/api/ArticleApi.java index 617a851..786f82a 100644 --- a/app/src/main/java/com/github/mzule/androidweekly/api/ArticleApi.java +++ b/app/src/main/java/com/github/mzule/androidweekly/api/ArticleApi.java @@ -4,6 +4,7 @@ import android.os.Handler; import android.text.TextUtils; +import com.github.mzule.androidweekly.api.parser.ArticleParsers; import com.github.mzule.androidweekly.dao.ArticleDao; import com.github.mzule.androidweekly.dao.IssueListKeeper; import com.github.mzule.androidweekly.entity.Article; @@ -100,106 +101,13 @@ private Response> doGetArchive() throws Exception { } private Response> doGetPage(String issue) throws Exception { - String url = "http://androidweekly.net"; - if (issue != null) { - url += issue; - } - final List articles = new ArrayList<>(); - Document doc = Jsoup.parse(new URL(url), 30000); - if (issue == null || isBiggerThan100(issue)) { - parse(doc, articles, issue); - } else { - Element root = doc.getElementsByClass("issue").get(0); - while (root.children().size() == 1) { - root = root.child(0); - } - String currentSection = null; - for (Element e : root.children()) { - if (e.tagName().equals("h2")) { - currentSection = e.text(); - articles.add(currentSection); - continue; - } - if (e.tagName().equals("div")) { - Elements img = e.getElementsByTag("img"); - if (!img.isEmpty()) { - Article article = new Article(); - article.setImageUrl(img.get(0).attr("src")); - article.setTitle(e.getElementsByTag("a").get(1).text()); - article.setLink(e.getElementsByTag("a").get(1).attr("href")); - article.setBrief(e.getElementsByTag("p").get(0).text()); - Elements span = e.getElementsByTag("span"); - if (!span.isEmpty()) { - article.setDomain(span.get(0).text().replace("(", "").replace(")", "")); - } - article.setIssue(issue); - article.setSection(currentSection); - articles.add(article); - //articleDao.save(article); - } - } else { - Article article = new Article(); - Elements title = e.getElementsByTag("a"); - if (title.isEmpty()) { - continue; - } - article.setTitle(title.get(0).text()); - Elements span = e.getElementsByTag("span"); - if (!span.isEmpty()) { - article.setDomain(span.get(0).text().replace("(", "").replace(")", "")); - } - article.setLink(e.getElementsByTag("a").get(0).attr("href")); - article.setBrief(e.text()); - article.setIssue(issue); - article.setSection(currentSection); - articles.add(article); - //articleDao.save(article); - } - } - } - return new Response<>(articles, false); - } - - private boolean isBiggerThan100(String issue) { - String s = issue.split("-")[1]; - return Integer.parseInt(s) >= 103; - } - - private void parse(Document doc, List articles, String issue) { - Elements tables = doc.getElementsByTag("table"); - String currentSection = null; - for (Element e : tables) { - Elements h2 = e.getElementsByTag("h2"); - if (!h2.isEmpty()) { - currentSection = h2.get(0).text(); - articles.add(currentSection); - } else { - Elements tds = e.getElementsByTag("td"); - Element td = tds.get(tds.size() - 2); - String imageUrl = null; - if (tds.size() == 4) { - imageUrl = tds.get(0).getElementsByTag("img").get(0).attr("src"); - } - String title = td.getElementsByClass("article-headline").get(0).text(); - String brief = td.getElementsByTag("p").get(0).text(); - String link = td.getElementsByClass("article-headline").get(0).attr("href"); - String domain = td.getElementsByTag("span").get(0).text().replace("(", "").replace(")", ""); - if (issue == null) { - String number = doc.getElementsByClass("issue-header").get(0).getElementsByTag("span").get(0).text(); - issue = "/issues/issue-" + number.replace("#", ""); - } - Article article = new Article(); - article.setTitle(title); - article.setBrief(brief); - article.setLink(link); - article.setDomain(domain); - article.setIssue(issue); - article.setImageUrl(imageUrl); - article.setSection(currentSection); - articles.add(article); - articleDao.save(article); + List result = ArticleParsers.get(issue).parse(issue); + for (Object obj : result) { + if (obj instanceof Article) { + articleDao.save((Article) obj); } } + return new Response<>(result, false); } private void postSuccess(final Response result, final ApiCallback callback) { diff --git a/app/src/main/java/com/github/mzule/androidweekly/api/parser/ArticleParser.java b/app/src/main/java/com/github/mzule/androidweekly/api/parser/ArticleParser.java new file mode 100644 index 0000000..a8f0be1 --- /dev/null +++ b/app/src/main/java/com/github/mzule/androidweekly/api/parser/ArticleParser.java @@ -0,0 +1,11 @@ +package com.github.mzule.androidweekly.api.parser; + +import java.io.IOException; +import java.util.List; + +/** + * Created by CaoDongping on 4/15/16. + */ +public interface ArticleParser { + List parse(String issue) throws IOException; +} diff --git a/app/src/main/java/com/github/mzule/androidweekly/api/parser/ArticleParsers.java b/app/src/main/java/com/github/mzule/androidweekly/api/parser/ArticleParsers.java new file mode 100644 index 0000000..124d203 --- /dev/null +++ b/app/src/main/java/com/github/mzule/androidweekly/api/parser/ArticleParsers.java @@ -0,0 +1,18 @@ +package com.github.mzule.androidweekly.api.parser; + +import android.support.annotation.WorkerThread; + +/** + * Created by CaoDongping on 4/15/16. + */ +public class ArticleParsers { + @WorkerThread + public static ArticleParser get(String issue) { + if (issue == null || Integer.parseInt(issue.split("-")[1]) > 102) { + return new FresherArticlesParser(); + } else { + return new OlderArticlesParser(); + } + } + +} diff --git a/app/src/main/java/com/github/mzule/androidweekly/api/parser/DocumentProvider.java b/app/src/main/java/com/github/mzule/androidweekly/api/parser/DocumentProvider.java new file mode 100644 index 0000000..a3f0dec --- /dev/null +++ b/app/src/main/java/com/github/mzule/androidweekly/api/parser/DocumentProvider.java @@ -0,0 +1,20 @@ +package com.github.mzule.androidweekly.api.parser; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; + +import java.io.IOException; +import java.net.URL; + +/** + * Created by CaoDongping on 4/15/16. + */ +public class DocumentProvider { + public static Document get(String issue) throws IOException { + String url = "http://androidweekly.net/"; + if (issue != null) { + url += issue; + } + return Jsoup.parse(new URL(url), 30000); + } +} diff --git a/app/src/main/java/com/github/mzule/androidweekly/api/parser/FresherArticlesParser.java b/app/src/main/java/com/github/mzule/androidweekly/api/parser/FresherArticlesParser.java new file mode 100644 index 0000000..941e9d3 --- /dev/null +++ b/app/src/main/java/com/github/mzule/androidweekly/api/parser/FresherArticlesParser.java @@ -0,0 +1,57 @@ +package com.github.mzule.androidweekly.api.parser; + +import com.github.mzule.androidweekly.entity.Article; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Created by CaoDongping on 4/15/16. + */ +public class FresherArticlesParser implements ArticleParser { + + @Override + public List parse(String issue) throws IOException { + Document doc = DocumentProvider.get(issue); + List articles = new ArrayList<>(); + Elements tables = doc.getElementsByTag("table"); + String currentSection = null; + for (Element e : tables) { + Elements h2 = e.getElementsByTag("h2"); + if (!h2.isEmpty()) { + currentSection = h2.get(0).text(); + articles.add(currentSection); + } else { + Elements tds = e.getElementsByTag("td"); + Element td = tds.get(tds.size() - 2); + String imageUrl = null; + if (tds.size() == 4) { + imageUrl = tds.get(0).getElementsByTag("img").get(0).attr("src"); + } + String title = td.getElementsByClass("article-headline").get(0).text(); + String brief = td.getElementsByTag("p").get(0).text(); + String link = td.getElementsByClass("article-headline").get(0).attr("href"); + String domain = td.getElementsByTag("span").get(0).text().replace("(", "").replace(")", ""); + if (issue == null) { + String number = doc.getElementsByClass("issue-header").get(0).getElementsByTag("span").get(0).text(); + issue = "/issues/issue-" + number.replace("#", ""); + } + Article article = new Article(); + article.setTitle(title); + article.setBrief(brief); + article.setLink(link); + article.setDomain(domain); + article.setIssue(issue); + article.setImageUrl(imageUrl); + article.setSection(currentSection); + articles.add(article); + } + } + return articles; + } +} diff --git a/app/src/main/java/com/github/mzule/androidweekly/api/parser/OlderArticlesParser.java b/app/src/main/java/com/github/mzule/androidweekly/api/parser/OlderArticlesParser.java new file mode 100644 index 0000000..36fbd3b --- /dev/null +++ b/app/src/main/java/com/github/mzule/androidweekly/api/parser/OlderArticlesParser.java @@ -0,0 +1,69 @@ +package com.github.mzule.androidweekly.api.parser; + +import com.github.mzule.androidweekly.entity.Article; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Created by CaoDongping on 4/15/16. + */ +public class OlderArticlesParser implements ArticleParser { + + @Override + public List parse(String issue) throws IOException { + Document doc = DocumentProvider.get(issue); + List articles = new ArrayList<>(); + Element root = doc.getElementsByClass("issue").get(0); + while (root.children().size() == 1) { + root = root.child(0); + } + String currentSection = null; + for (Element e : root.children()) { + if (e.tagName().equals("h2")) { + currentSection = e.text(); + articles.add(currentSection); + continue; + } + if (e.tagName().equals("div")) { + Elements img = e.getElementsByTag("img"); + if (!img.isEmpty()) { + Article article = new Article(); + article.setImageUrl(img.get(0).attr("src")); + article.setTitle(e.getElementsByTag("a").get(1).text()); + article.setLink(e.getElementsByTag("a").get(1).attr("href")); + article.setBrief(e.getElementsByTag("p").get(0).text()); + Elements span = e.getElementsByTag("span"); + if (!span.isEmpty()) { + article.setDomain(span.get(0).text().replace("(", "").replace(")", "")); + } + article.setIssue(issue); + article.setSection(currentSection); + articles.add(article); + } + } else { + Article article = new Article(); + Elements title = e.getElementsByTag("a"); + if (title.isEmpty()) { + continue; + } + article.setTitle(title.get(0).text()); + Elements span = e.getElementsByTag("span"); + if (!span.isEmpty()) { + article.setDomain(span.get(0).text().replace("(", "").replace(")", "")); + } + article.setLink(e.getElementsByTag("a").get(0).attr("href")); + article.setBrief(e.text()); + article.setIssue(issue); + article.setSection(currentSection); + articles.add(article); + } + } + return articles; + } +}