@@ -13,111 +13,136 @@
public class XMLParser {

private ArrayList<String> links;
private int throwOutCounter = 0;

public XMLParser(String feed) throws IOException, InterruptedException {
public XMLParser(String feed) throws InterruptedException {
links = new ArrayList<String>();

Document doc = Jsoup.connect(feed).get();
doc = Jsoup.parse(doc.html(), "", Parser.xmlParser());
Elements elements = doc.getElementsByTag("item");

ExecutorService es = Executors.newFixedThreadPool(elements.size() / 2);

for (final Element element : elements) {
es.submit(new Runnable() {

@Override
public void run() {
// TODO Auto-generated method stub
links.add(element.getElementsByTag("link").first().text());
}
});
}
es.shutdown();
es.awaitTermination(StaticVariables.TIMEOUT, TimeUnit.SECONDS);
}

public XMLParser(String feed, final String searchTerm) throws IOException, InterruptedException {
links = new ArrayList<String>();

Document doc = Jsoup.connect(feed).get();
doc = Jsoup.parse(doc.html(), "", Parser.xmlParser());
Elements elements = doc.getElementsByTag("item");

ExecutorService es = Executors.newFixedThreadPool(elements.size() / 2);

for (final Element element : elements) {
es.submit(new Runnable() {

@Override
public void run() {
// TODO Auto-generated method stub
String title = element.getElementsByTag("title").first().text();
String description = element.getElementsByTag("description").first().text();

if (title.contains(searchTerm) || description.contains(searchTerm)) {
links.add(element.getElementsByTag("link").first().text());
}

}
});
}
es.shutdown();
es.awaitTermination(StaticVariables.TIMEOUT, TimeUnit.SECONDS);
}

public XMLParser(List<String> feeds) throws IOException, InterruptedException {
links = new ArrayList<String>();

for (String feed : feeds) {
Document doc = Jsoup.connect(feed).get();
try {
Document doc = Jsoup.connect(feed).timeout(StaticVariables.TIMEOUT * 200).get();
doc = Jsoup.parse(doc.html(), "", Parser.xmlParser());
Elements elements = doc.getElementsByTag("item");

ExecutorService es = Executors.newFixedThreadPool(elements.size() / 2);
ExecutorService es = Executors
.newFixedThreadPool(Math.min(StaticVariables.MAX_THREADS, (elements.size() + 1) / 2));

for (final Element element : elements) {
es.submit(new Runnable() {

@Override
public void run() {
String title = element.getElementsByTag("title").first().text();
String description = element.getElementsByTag("description").first().text();
links.add(element.getElementsByTag("link").first().text());
}
});
}
es.shutdown();
es.awaitTermination(StaticVariables.TIMEOUT, TimeUnit.SECONDS);
} catch (IOException e) {
System.err.println(e.getMessage() + ": Excluding link. (#" + throwOutCounter + ") Feed: " + feed);
}
}

public XMLParser(List<String> feeds, final String searchTerm) throws IOException, InterruptedException {
public XMLParser(String feed, final String searchTerm) throws InterruptedException {
links = new ArrayList<String>();
final String term = searchTerm.toUpperCase();

for (String feed : feeds) {
Document doc = Jsoup.connect(feed).get();
try {
Document doc = Jsoup.connect(feed).timeout(StaticVariables.TIMEOUT * 200).get();
doc = Jsoup.parse(doc.html(), "", Parser.xmlParser());
Elements elements = doc.getElementsByTag("item");

ExecutorService es = Executors.newFixedThreadPool(elements.size() / 2);
ExecutorService es = Executors
.newFixedThreadPool(Math.min(StaticVariables.MAX_THREADS, (elements.size() + 1) / 2));

for (final Element element : elements) {
es.submit(new Runnable() {

@Override
public void run() {
String title = element.getElementsByTag("title").first().text();
String description = element.getElementsByTag("description").first().text();
String title = element.getElementsByTag("title").first().text().toUpperCase();
String description = element.getElementsByTag("description").first().text().toUpperCase();

if (title.contains(searchTerm) || description.contains(searchTerm)) {
if (title.contains(term) || description.contains(term)) {
links.add(element.getElementsByTag("link").first().text());
}

}
});
}
es.shutdown();
es.awaitTermination(StaticVariables.TIMEOUT, TimeUnit.SECONDS);
} catch (

IOException e)

{
System.err.println(e.getMessage() + ": Excluding link. (#" + throwOutCounter + ") Feed: " + feed);
}

}

public XMLParser(List<String> feeds) throws InterruptedException {
links = new ArrayList<String>();

ExecutorService es = Executors
.newFixedThreadPool(Math.min(StaticVariables.MAX_THREADS, (feeds.size() + 1) / 2));
for (final String feed : feeds) {
es.submit(new Runnable() {

@Override
public void run() {
try {
Document doc = Jsoup.connect(feed).timeout(StaticVariables.TIMEOUT * 200).get();
doc = Jsoup.parse(doc.html(), "", Parser.xmlParser());
Elements elements = doc.getElementsByTag("item");
for (final Element element : elements) {
links.add(element.getElementsByTag("link").first().text());
}

} catch (IOException e) {
System.err
.println(e.getMessage() + ": Excluding link. (#" + throwOutCounter + ") Feed: " + feed);
}
}
});
}
es.shutdown();
es.awaitTermination(StaticVariables.TIMEOUT, TimeUnit.SECONDS);
}

public XMLParser(List<String> feeds, String searchTerm) throws InterruptedException {
links = new ArrayList<String>();
final String term = searchTerm.toUpperCase();
ExecutorService es = Executors
.newFixedThreadPool(Math.min(StaticVariables.MAX_THREADS, (feeds.size() + 1) / 2));
for (final String feed : feeds) {
es.submit(new Runnable() {

@Override
public void run() {
try {
Document doc = Jsoup.connect(feed).timeout(StaticVariables.TIMEOUT * 200).get();
doc = Jsoup.parse(doc.html(), "", Parser.xmlParser());
Elements elements = doc.getElementsByTag("item");

for (final Element element : elements) {

String title = element.getElementsByTag("title").first().text().toUpperCase();
String description = element.getElementsByTag("description").first().text().toUpperCase();

if (title.contains(term) || description.contains(term)) {
links.add(element.getElementsByTag("link").first().text());
}
}
} catch (IOException e) {
System.err
.println(e.getMessage() + ": Excluding link. (#" + throwOutCounter + ") Feed: " + feed);
}
}
});
}
es.shutdown();
es.awaitTermination(StaticVariables.TIMEOUT, TimeUnit.SECONDS);
}

public ArrayList<String> retrieveLinks() {
@@ -1,7 +1,6 @@
package tests.daemondash.newsvisualizer.com;

import java.io.IOException;
import java.util.ArrayList;

import javax.xml.parsers.ParserConfigurationException;

@@ -16,15 +15,20 @@
public class ArticleParserTest {
public static void main(String[] args)
throws IOException, ParserConfigurationException, SAXException, InterruptedException {
ArrayList<String> articles = new ArrayList<String>();
XMLParser xmlParser = new XMLParser(StaticVariables.LIST_OF_ALL_SITES);
ArrayList<String> list = new ArrayList<String>();
list.add("http://www.cnn.com/2015/09/25/politics/china-state-dinner/index.html");
list.addAll(xmlParser.retrieveLinks());
System.out.println(list.size());
WebReader reader = new WebReader(list);
articles.addAll(reader.getOutputs());
ArticleParser parser = new ArticleParser(articles);
long initTime = System.currentTimeMillis();
XMLParser xmlParser = new XMLParser(StaticVariables.LIST_OF_ALL_SITES,"boehner");
long newTime = System.currentTimeMillis();
System.out.println("XMLParser Time Lapsed: " + (newTime - initTime) / 1000.0);
initTime = newTime;
System.out.println("Num RSS Feeds Used:" + xmlParser.retrieveLinks().size());
WebReader reader = new WebReader(xmlParser.retrieveLinks());
newTime = System.currentTimeMillis();
System.out.println("WebReader Time Lapsed: " + (newTime - initTime) / 1000.0);
initTime = newTime;
ArticleParser parser = new ArticleParser(reader.getOutputs());
newTime = System.currentTimeMillis();
System.out.println("ArticleParser Time Lapsed: " + (newTime - initTime) / 1000.0);
initTime = newTime;
for (Tuple<String> s : parser.getMostPopTuples()) {
System.out.println(s);
}