Skip to content

Commit

Permalink
Merge branch 'NAS-2559'
Browse files Browse the repository at this point in the history
  • Loading branch information
csrster committed Oct 7, 2016
2 parents 1766b46 + 1d50ad2 commit 0ef16a1
Showing 1 changed file with 51 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -49,6 +50,7 @@
import dk.netarkivet.common.exceptions.PermissionDenied;
import dk.netarkivet.common.exceptions.UnknownID;
import dk.netarkivet.common.utils.DBUtils;
import dk.netarkivet.common.utils.DomainUtils;
import dk.netarkivet.common.utils.FilterIterator;
import dk.netarkivet.common.utils.StringUtils;
import dk.netarkivet.harvester.datamodel.eav.EAV;
Expand Down Expand Up @@ -102,6 +104,7 @@ protected DomainDBDAO() {
protected void create(Connection connection, Domain d) {
ArgumentNotValid.checkNotNull(d, "d");
ArgumentNotValid.checkNotNullOrEmpty(d.getName(), "d.getName()");
ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(d.getName()),"Not creating domain wth invalid name " + d.getName());

if (exists(connection, d.getName())) {
String msg = "Cannot create already existing domain " + d;
Expand Down Expand Up @@ -735,6 +738,7 @@ private void createConfigSeedlistsEntries(Connection c, Domain d, DomainConfigur
@Override
protected synchronized Domain read(Connection c, String domainName) {
ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName");
ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Invalid domain name " + domainName);
if (!exists(c, domainName)) {
throw new UnknownID("No domain by the name '" + domainName + "'");
}
Expand All @@ -744,6 +748,7 @@ protected synchronized Domain read(Connection c, String domainName) {
@Override
protected synchronized Domain readKnown(Connection c, String domainName) {
ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName");
ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Invalid domain name " + domainName);
Domain result;
PreparedStatement s = null;
try {
Expand Down Expand Up @@ -1030,7 +1035,9 @@ private SeedList getSeedListFromResultset(ResultSet res) throws SQLException {
@Override
public synchronized boolean exists(String domainName) {
ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName");

if (!DomainUtils.isValidDomainName(domainName)) {
return false;
}
Connection c = HarvestDBConnection.get();
try {
return exists(c, domainName);
Expand All @@ -1047,6 +1054,9 @@ public synchronized boolean exists(String domainName) {
* @return true if a domain with the given name exists, otherwise false.
*/
private synchronized boolean exists(Connection c, String domainName) {
if (!DomainUtils.isValidDomainName(domainName)) {
return false;
}
return 1 == DBUtils.selectIntValue(c, "SELECT COUNT(*) FROM domains WHERE name = ?", domainName);
}

Expand All @@ -1067,7 +1077,9 @@ public synchronized Iterator<Domain> getAllDomains() {
List<String> domainNames = DBUtils.selectStringList(c, "SELECT name FROM domains ORDER BY name");
List<Domain> orderedDomains = new LinkedList<Domain>();
for (String name : domainNames) {
orderedDomains.add(read(c, name));
if (DomainUtils.isValidDomainName(name)) {
orderedDomains.add(read(c, name));
}
}
return orderedDomains.iterator();
} finally {
Expand All @@ -1091,16 +1103,17 @@ public Iterator<Domain> getAllDomainsInSnapshotHarvestOrder() {
List<String> domainNamesWithAttributes = DBUtils.selectStringList(c, // Don't order this - it will be ordered later
"SELECT DISTINCT domains.name"
+ " FROM domains, configurations, eav_attribute"
+ " WHERE domains.defaultconfig=configurations.config_id"
+ " WHERE domains.defaultconfig=configurations.config_id"
+ " AND configurations.config_id=eav_attribute.entity_id");
log.info("Retrieved all {} domains used for Snapshot harvesting that has attributes for their default configs", domainNamesWithAttributes.size());
// Remove the content of domainNamesWithAttributes from domainNames
domainNames = domainNames.stream().filter(DomainUtils::isValidDomainName).collect(Collectors.toList());
// Remove the content of domainNamesWithAttributes from domainNames
domainNames.removeAll(domainNamesWithAttributes);
log.info("Removed all {} domains with attributes from the total list, reducing total-list to {}", domainNamesWithAttributes.size(), domainNames.size());
// Add the remainder of domainNames to domainNamesWithAttributes, so the domain configs with attributes will be handled first.
domainNamesWithAttributes.addAll(domainNames);
log.info("Remainder of total list merged with list of domains w/ attributes");

return new FilterIterator<String, Domain>(domainNamesWithAttributes.iterator()) {
public Domain filter(String s) {
return readKnown(s);
Expand All @@ -1116,10 +1129,10 @@ public List<String> getDomains(String glob) {
ArgumentNotValid.checkNotNullOrEmpty(glob, "glob");
// SQL uses % and _ instead of * and ?
String sqlGlob = DBUtils.makeSQLGlob(glob);

Connection c = HarvestDBConnection.get();
try {
return DBUtils.selectStringList(c, "SELECT name FROM domains WHERE name LIKE ? ORDER BY name", sqlGlob);
List<String> names = DBUtils.selectStringList(c, "SELECT name FROM domains WHERE name LIKE ? ORDER BY name", sqlGlob);
return names.stream().filter(DomainUtils::isValidDomainName).collect(Collectors.toList());
} finally {
HarvestDBConnection.release(c);
}
Expand All @@ -1142,6 +1155,7 @@ public boolean mayDelete(DomainConfiguration config) {

@Override
public String getDefaultDomainConfigurationName(String domainName) {
ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Cannot read invalid domain name " + domainName);
Connection c = HarvestDBConnection.get();
try {
return DBUtils.selectStringValue(c, "SELECT configurations.name " + "FROM domains, configurations "
Expand All @@ -1154,7 +1168,7 @@ public String getDefaultDomainConfigurationName(String domainName) {
@Override
public synchronized SparseDomain readSparse(String domainName) {
ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName");

ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Cannot read invalid domain name " + domainName);
Connection c = HarvestDBConnection.get();
try {
List<String> domainConfigurationNames = DBUtils.selectStringList(c, "SELECT configurations.name "
Expand All @@ -1172,6 +1186,7 @@ public synchronized SparseDomain readSparse(String domainName) {
@Override
public List<AliasInfo> getAliases(String domain) {
ArgumentNotValid.checkNotNullOrEmpty(domain, "String domain");
ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domain), "Cannot read invalid domain name " + domain);
List<AliasInfo> resultSet = new ArrayList<AliasInfo>();
Connection c = HarvestDBConnection.get();
PreparedStatement s = null;
Expand Down Expand Up @@ -1218,9 +1233,10 @@ public List<AliasInfo> getAllAliases() {
String aliasOf = res.getString(2);
Date lastchanged = DBUtils.getDateMaybeNull(res, 3);
AliasInfo ai = new AliasInfo(domainName, aliasOf, lastchanged);
resultSet.add(ai);
if (DomainUtils.isValidDomainName(domainName) && DomainUtils.isValidDomainName(aliasOf)) {
resultSet.add(ai);
}
}

return resultSet;
} catch (SQLException e) {
throw new IOFailure("Failure getting alias-information" + "\n", e);
Expand Down Expand Up @@ -1249,24 +1265,26 @@ public List<TLDInfo> getTLDs(int level) {
ResultSet res = s.executeQuery();
while (res.next()) {
String domain = res.getString(1);
// getting the TLD level of the domain
int domainTLDLevel = TLDInfo.getTLDLevel(domain);
if (DomainUtils.isValidDomainName(domain)) {
// getting the TLD level of the domain
int domainTLDLevel = TLDInfo.getTLDLevel(domain);

// restraining to max level
if (domainTLDLevel > level) {
domainTLDLevel = level;
}
// restraining to max level
if (domainTLDLevel > level) {
domainTLDLevel = level;
}

// looping from level 1 to level max of the domain
for (int currentLevel = 1; currentLevel <= domainTLDLevel; currentLevel++) {
// getting the tld of the domain by level
String tld = TLDInfo.getMultiLevelTLD(domain, currentLevel);
TLDInfo i = resultMap.get(tld);
if (i == null) {
i = new TLDInfo(tld);
resultMap.put(tld, i);
// looping from level 1 to level max of the domain
for (int currentLevel = 1; currentLevel <= domainTLDLevel; currentLevel++) {
// getting the tld of the domain by level
String tld = TLDInfo.getMultiLevelTLD(domain, currentLevel);
TLDInfo i = resultMap.get(tld);
if (i == null) {
i = new TLDInfo(tld);
resultMap.put(tld, i);
}
i.addSubdomain(domain);
}
i.addSubdomain(domain);
}
}

Expand All @@ -1286,6 +1304,7 @@ public List<TLDInfo> getTLDs(int level) {
public HarvestInfo getDomainJobInfo(Job j, String domainName, String configName) {
ArgumentNotValid.checkNotNull(j, "j");
ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName");
ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Cannot read invalid domain name " + domainName);
ArgumentNotValid.checkNotNullOrEmpty(configName, "configName");
HarvestInfo resultInfo = null;

Expand Down Expand Up @@ -1328,6 +1347,7 @@ public HarvestInfo getDomainJobInfo(Job j, String domainName, String configName)
@Override
public List<DomainHarvestInfo> listDomainHarvestInfo(String domainName, String orderBy, boolean asc) {
ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName");
ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Cannot read invalid domain name " + domainName);
Connection c = HarvestDBConnection.get();
PreparedStatement s = null;
final ArrayList<DomainHarvestInfo> domainHarvestInfos = new ArrayList<DomainHarvestInfo>();
Expand Down Expand Up @@ -1399,6 +1419,7 @@ private void saveExtendedFieldValues(Connection c, Domain d) throws SQLException

@Override
public DomainConfiguration getDomainConfiguration(String domainName, String configName) {
ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Cannot read invalid domain name " + domainName);
DomainHistory history = getDomainHistory(domainName);
List<String> crawlertraps = getCrawlertraps(domainName);

Expand Down Expand Up @@ -1490,6 +1511,7 @@ public DomainConfiguration getDomainConfiguration(String domainName, String conf
* @return the crawlertraps for given domain.
*/
private List<String> getCrawlertraps(String domainName) {
ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Cannot read invalid domain name " + domainName);
Connection c = HarvestDBConnection.get();
String traps = null;
PreparedStatement s = null;
Expand Down Expand Up @@ -1535,6 +1557,7 @@ protected HarvestInfo filter(DomainConfiguration o) {
@Override
public DomainHistory getDomainHistory(String domainName) {
ArgumentNotValid.checkNotNullOrEmpty(domainName, "String domainName");
ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Cannot read invalid domain name " + domainName);
Connection c = HarvestDBConnection.get();
DomainHistory history = new DomainHistory();
// Read history info
Expand Down Expand Up @@ -1586,7 +1609,7 @@ public List<String> getDomains(String glob, String searchField) {
Connection c = HarvestDBConnection.get();
try {
return DBUtils.selectStringList(c, "SELECT name FROM domains WHERE " + searchField.toLowerCase()
+ " LIKE ?", sqlGlob);
+ " LIKE ?", sqlGlob).stream().filter(DomainUtils::isValidDomainName).collect(Collectors.toList());
} finally {
HarvestDBConnection.release(c);
}
Expand Down Expand Up @@ -1627,7 +1650,8 @@ public void renameAndUpdateConfig(Domain domain, DomainConfiguration domainConf,
public List<String> getAllDomainNames() {
Connection c = HarvestDBConnection.get();
try {
return DBUtils.selectStringList(c, "SELECT name FROM domains");
return DBUtils.selectStringList(c, "SELECT name FROM domains").stream().filter(DomainUtils::isValidDomainName).collect(
Collectors.toList());
} finally {
HarvestDBConnection.release(c);
}
Expand Down

0 comments on commit 0ef16a1

Please sign in to comment.