Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge branch 'master' of github.com:sriksun/Ivory

Conflicts:
	common/src/main/java/org/apache/ivory/entity/parser/FeedEntityParser.java
	feed/src/main/java/org/apache/ivory/converter/OozieFeedMapper.java
	process/src/main/java/org/apache/ivory/converter/OozieProcessMapper.java
  • Loading branch information...
commit de3bd51eb275b8dd2ac2eef5fec5196b662aeee2 2 parents 82ec929 + 0681050
Shwetha GS authored
Showing with 306 additions and 78 deletions.
  1. +2 −0  client/src/main/resources/feed-0.1.xsd
  2. +11 −0 common/src/main/java/org/apache/ivory/cleanup/AbstractCleanupHandler.java
  3. +1 −1  common/src/main/java/org/apache/ivory/cleanup/FeedCleanupHandler.java
  4. +2 −2 common/src/main/java/org/apache/ivory/cleanup/ProcessCleanupHandler.java
  5. +29 −6 common/src/main/java/org/apache/ivory/entity/FeedHelper.java
  6. +20 −5 common/src/main/java/org/apache/ivory/entity/parser/ClusterEntityParser.java
  7. +27 −12 common/src/main/java/org/apache/ivory/entity/parser/FeedEntityParser.java
  8. +5 −0 common/src/main/java/org/apache/ivory/entity/parser/ProcessEntityParser.java
  9. +1 −1  common/src/main/java/org/apache/ivory/group/FeedGroup.java
  10. +44 −6 common/src/main/java/org/apache/ivory/update/UpdateHelper.java
  11. +2 −1  common/src/test/java/org/apache/ivory/cleanup/LogCleanupServiceTest.java
  12. +18 −2 common/src/test/java/org/apache/ivory/entity/parser/ClusterEntityParserTest.java
  13. +26 −1 common/src/test/java/org/apache/ivory/entity/parser/FeedEntityParserTest.java
  14. +9 −0 common/src/test/java/org/apache/ivory/entity/parser/ProcessEntityParserTest.java
  15. +5 −0 common/src/test/resources/config/feed/feed-0.1.xml
  16. +5 −4 feed/src/main/java/org/apache/ivory/converter/OozieFeedMapper.java
  17. +8 −8 feed/src/main/resources/config/workflow/replication-workflow.xml
  18. +4 −4 feed/src/main/resources/config/workflow/retention-workflow.xml
  19. +3 −3 oozie/src/main/java/org/apache/ivory/converter/AbstractOozieEntityMapper.java
  20. +2 −3 oozie/src/main/java/org/apache/ivory/logging/LogMover.java
  21. +9 −2 oozie/src/main/java/org/apache/ivory/service/SharedLibraryHostingService.java
  22. +6 −0 oozie/src/main/java/org/apache/ivory/workflow/IvoryPostProcessing.java
  23. +5 −0 oozie/src/main/java/org/apache/ivory/workflow/engine/NullBundleJob.java
  24. +5 −0 oozie/src/main/java/org/apache/ivory/workflow/engine/NullCoordJob.java
  25. +35 −5 process/src/main/java/org/apache/ivory/converter/OozieProcessMapper.java
  26. +6 −6 process/src/main/resources/config/workflow/process-parent-workflow.xml
  27. +7 −3 process/src/test/java/org/apache/ivory/converter/OozieProcessMapperTest.java
  28. +5 −0 process/src/test/resources/config/feed/feed-0.1.xml
  29. +4 −3 rerun/src/main/java/org/apache/ivory/rerun/handler/LateRerunHandler.java
2  client/src/main/resources/feed-0.1.xsd
View
@@ -114,6 +114,7 @@
<xs:sequence>
<xs:element type="validity" name="validity" />
<xs:element type="retention" name="retention" />
+ <xs:element type="locations" name="locations" minOccurs="0"/>
</xs:sequence>
<xs:attribute type="IDENTIFIER" name="name" use="required" />
<xs:attribute type="cluster-type" name="type" use="optional" />
@@ -287,6 +288,7 @@
<xs:enumeration value="data" />
<xs:enumeration value="stats" />
<xs:enumeration value="meta" />
+ <xs:enumeration value="tmp" />
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="IDENTIFIER">
11 common/src/main/java/org/apache/ivory/cleanup/AbstractCleanupHandler.java
View
@@ -109,6 +109,7 @@ protected void delete(Cluster cluster, Entity entity, long retention)
} else {
LOG.info("Deleted path: " + log.getPath());
}
+ deleteParentIfEmpty(getFileSystem(cluster),log.getPath().getParent());
} catch (IOException e) {
throw new IvoryException(" Unable to delete log file : "
+ log.getPath() + " for entity " + entity.getName()
@@ -124,6 +125,16 @@ protected void delete(Cluster cluster, Entity entity, long retention)
}
+ private void deleteParentIfEmpty(FileSystem fs, Path parent) throws IOException {
+ FileStatus[] files = fs.listStatus(parent);
+ if(files!=null && files.length==0){
+ LOG.info("Parent path: "+parent+ " is empty, deleting path");
+ fs.delete(parent, true);
+ deleteParentIfEmpty(fs,parent.getParent());
+ }
+
+ }
+
public abstract void cleanup() throws IvoryException;
protected abstract Path getLogPath(Entity entity, String stagingPath);
2  common/src/main/java/org/apache/ivory/cleanup/FeedCleanupHandler.java
View
@@ -49,7 +49,7 @@ public void cleanup() throws IvoryException {
@Override
protected Path getLogPath(Entity entity, String stagingPath) {
Path logPath = new Path(stagingPath, "ivory/workflows/feed/"
- + entity.getName() + "/logs/job-*");
+ + entity.getName() + "/logs/job-*/*/*");
return logPath;
}
4 common/src/main/java/org/apache/ivory/cleanup/ProcessCleanupHandler.java
View
@@ -39,7 +39,7 @@ public void cleanup() throws IvoryException {
for (org.apache.ivory.entity.v0.process.Cluster cluster : process
.getClusters().getClusters()) {
LOG.info("Cleaning up logs for process:" + processName
- + " in cluster: " + cluster.getName());
+ + " in cluster: " + cluster.getName() + " with retention: "+retention);
Cluster currentCluster = STORE.get(EntityType.CLUSTER,
cluster.getName());
delete(currentCluster, process, retention);
@@ -51,7 +51,7 @@ public void cleanup() throws IvoryException {
@Override
protected Path getLogPath(Entity entity, String stagingPath) {
Path logPath = new Path(stagingPath, "ivory/workflows/process/"
- + entity.getName() + "/logs/job-*");
+ + entity.getName() + "/logs/job-*/*");
return logPath;
}
35 common/src/main/java/org/apache/ivory/entity/FeedHelper.java
View
@@ -29,6 +29,7 @@
import org.apache.ivory.entity.v0.feed.Feed;
import org.apache.ivory.entity.v0.feed.Location;
import org.apache.ivory.entity.v0.feed.LocationType;
+import org.apache.ivory.entity.v0.feed.Locations;
import org.apache.ivory.expression.ExpressionHelper;
public class FeedHelper {
@@ -39,12 +40,34 @@ public static Cluster getCluster(Feed feed, String clusterName) {
return null;
}
- public static Location getLocation(Feed feed, LocationType type) {
- for(Location loc:feed.getLocations().getLocations())
- if(loc.getType() == type)
- return loc;
- return null;
- }
+ public static Location getLocation(Feed feed, LocationType type,
+ String clusterName) {
+ Cluster cluster = getCluster(feed, clusterName);
+ if (cluster!=null &&cluster.getLocations() != null
+ && cluster.getLocations() .getLocations().size() != 0) {
+ return getLocation(cluster.getLocations() , type);
+ }
+ else{
+ return getLocation(feed.getLocations(), type);
+ }
+
+ }
+
+ public static Location getLocation(Feed feed, LocationType type) {
+ return getLocation(feed.getLocations(), type);
+ }
+
+ public static Location getLocation(Locations locations, LocationType type) {
+ for (Location loc : locations.getLocations()) {
+ if (loc.getType() == type) {
+ return loc;
+ }
+ }
+ Location loc = new Location();
+ loc.setPath("/tmp");
+ loc.setType(type);
+ return loc;
+ }
public static String normalizePartitionExpression(String part1, String part2) {
String partExp = StringUtils.stripToEmpty(part1) + "/" + StringUtils.stripToEmpty(part2);
25 common/src/main/java/org/apache/ivory/entity/parser/ClusterEntityParser.java
View
@@ -18,6 +18,8 @@
package org.apache.ivory.entity.parser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.ivory.entity.ClusterHelper;
import org.apache.ivory.entity.store.StoreAccessException;
import org.apache.ivory.entity.v0.EntityType;
@@ -33,9 +35,22 @@ public ClusterEntityParser() {
}
@Override
- public void validate(Cluster cluster) throws StoreAccessException, ValidationException {
- if (!ClusterHelper.getHdfsUrl(cluster).startsWith("hdfs://")) {
- throw new ValidationException("Cannot get valid nameNode from write interface of cluster: " + cluster.getName());
- }
- }
+ public void validate(Cluster cluster) throws StoreAccessException,
+ ValidationException {
+ if (!ClusterHelper.getHdfsUrl(cluster).startsWith("hdfs://")) {
+ throw new ValidationException(
+ "Cannot get valid nameNode from write interface of cluster: "
+ + cluster.getName());
+ }
+ try {
+ Configuration conf = new Configuration();
+ conf.set("fs.default.name", ClusterHelper.getHdfsUrl(cluster));
+ conf.setInt("ipc.client.connect.max.retries", 10);
+ FileSystem.get(conf);
+ } catch (Exception e) {
+ throw new ValidationException("Invalid HDFS server or port:"
+ + ClusterHelper.getHdfsUrl(cluster), e);
+ }
+ }
+
}
39 common/src/main/java/org/apache/ivory/entity/parser/FeedEntityParser.java
View
@@ -66,7 +66,6 @@ public void validate(Feed feed) throws IvoryException {
}
validateFeedPartitionExpression(feed);
- validateFeedSourceCluster(feed);
validateFeedGroups(feed);
// Seems like a good enough entity object for a new one
@@ -100,6 +99,23 @@ public void validate(Feed feed) throws IvoryException {
private void validateFeedGroups(Feed feed) throws ValidationException {
String[] groupNames = feed.getGroups() != null ? feed.getGroups().split(",") : new String[] {};
+ String defaultPath = FeedHelper.getLocation(feed, LocationType.DATA)
+ .getPath();
+ for (Cluster cluster : feed.getClusters().getClusters()) {
+ if (!FeedGroup.getDatePattern(
+ FeedHelper.getLocation(feed, LocationType.DATA,
+ cluster.getName()).getPath()).equals(
+ FeedGroup.getDatePattern(defaultPath))) {
+ throw new ValidationException("Feeds default path pattern: "
+ + FeedHelper.getLocation(feed, LocationType.DATA)
+ .getPath()
+ + ", does not match with cluster: "
+ + cluster.getName()
+ + " path pattern: "
+ + FeedHelper.getLocation(feed, LocationType.DATA,
+ cluster.getName()).getPath());
+ }
+ }
for (String groupName : groupNames) {
FeedGroup group = FeedGroupMap.get().getGroupsMapping().get(groupName);
if (group == null || group.canContainFeed(feed)) {
@@ -153,17 +169,6 @@ private void ensureValidityFor(Feed newFeed, Process process) throws IvoryExcept
}
}
- private void validateFeedSourceCluster(Feed feed) throws ValidationException {
- int i = 0;
- for (Cluster cluster : feed.getClusters().getClusters()) {
- if (cluster.getType() == ClusterType.SOURCE) {
- i++;
- }
- }
- if (i == 0)
- throw new ValidationException("Feed should have atleast one source cluster");
- }
-
private void validateClusterValidity(Date start, Date end, String clusterName) throws IvoryException {
try {
if (start.after(end)) {
@@ -198,7 +203,12 @@ private void validateFeedCutOffPeriod(Feed feed, Cluster cluster) throws IvoryEx
private void validateFeedPartitionExpression(Feed feed) throws IvoryException {
int numSourceClusters = 0, numTrgClusters = 0;
+ Set<String> clusters = new HashSet<String>();
for (Cluster cl : feed.getClusters().getClusters()) {
+ if (!clusters.add(cl.getName())) {
+ throw new ValidationException("Cluster: " + cl.getName()
+ + " is defined more than once for feed: "+feed.getName());
+ }
if (cl.getType() == ClusterType.SOURCE){
numSourceClusters++;
} else if(cl.getType() == ClusterType.TARGET) {
@@ -206,6 +216,11 @@ private void validateFeedPartitionExpression(Feed feed) throws IvoryException {
}
}
+ if (numTrgClusters >= 1 && numSourceClusters == 0) {
+ throw new ValidationException("Feed: " + feed.getName()
+ + " should have atleast one source cluster defined");
+ }
+
int feedParts = feed.getPartitions() != null ? feed.getPartitions().getPartitions().size() : 0;
for(Cluster cluster:feed.getClusters().getClusters()) {
5 common/src/main/java/org/apache/ivory/entity/parser/ProcessEntityParser.java
View
@@ -60,8 +60,13 @@ public void validate(Process process) throws IvoryException {
process.setTimezone(TimeZone.getTimeZone("UTC"));
// check if dependent entities exists
+ Set<String> clusters = new HashSet<String>();
for (org.apache.ivory.entity.v0.process.Cluster cluster : process.getClusters().getClusters()) {
String clusterName = cluster.getName();
+ if (!clusters.add(cluster.getName())) {
+ throw new ValidationException("Cluster: " + cluster.getName()
+ + " is defined more than once for process: "+process.getName());
+ }
validateEntityExists(EntityType.CLUSTER, clusterName);
validateProcessValidity(cluster.getValidity().getStart(), cluster.getValidity().getEnd());
validateHDFSpaths(process, clusterName);
2  common/src/main/java/org/apache/ivory/group/FeedGroup.java
View
@@ -43,7 +43,7 @@ public FeedGroup(String group, Frequency frequency, String path) {
.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
}
- private String getDatePattern(String path) {
+ public static String getDatePattern(String path) {
Matcher matcher = FeedDataPath.PATTERN.matcher(path);
List<String> fields = new ArrayList<String>();
while (matcher.find()) {
50 common/src/main/java/org/apache/ivory/update/UpdateHelper.java
View
@@ -52,8 +52,26 @@ public static boolean shouldUpdate(Entity oldEntity, Entity newEntity, Entity af
}
public static boolean shouldUpdate(Feed oldFeed, Feed newFeed, Process affectedProcess) {
- if (!FeedHelper.getLocation(oldFeed, LocationType.DATA).getPath()
- .equals(FeedHelper.getLocation(newFeed, LocationType.DATA).getPath()))
+ if (!FeedHelper
+ .getLocation(oldFeed.getLocations(), LocationType.DATA)
+ .getPath()
+ .equals(FeedHelper.getLocation(newFeed.getLocations(),
+ LocationType.DATA).getPath())
+ || !FeedHelper
+ .getLocation(oldFeed.getLocations(), LocationType.META)
+ .getPath()
+ .equals(FeedHelper.getLocation(newFeed.getLocations(),
+ LocationType.META).getPath())
+ || !FeedHelper
+ .getLocation(oldFeed.getLocations(), LocationType.STATS)
+ .getPath()
+ .equals(FeedHelper.getLocation(newFeed.getLocations(),
+ LocationType.STATS).getPath())
+ || !FeedHelper
+ .getLocation(oldFeed.getLocations(), LocationType.TMP)
+ .getPath()
+ .equals(FeedHelper.getLocation(newFeed.getLocations(),
+ LocationType.TMP).getPath()))
return true;
LOG.debug(oldFeed.toShortString() + ": Location identical. Ignoring...");
@@ -88,10 +106,30 @@ public static boolean shouldUpdate(Feed oldFeed, Feed newFeed, Process affectedP
}
for (Cluster cluster : affectedProcess.getClusters().getClusters()) {
- if (!FeedHelper.getCluster(oldFeed, cluster.getName()).getValidity().getStart()
- .equals(FeedHelper.getCluster(newFeed, cluster.getName()).getValidity().getStart()))
- return true;
- LOG.debug(oldFeed.toShortString() + ": Feed start on cluster" + cluster.getName() + " identical. Ignoring...");
+ if (!FeedHelper
+ .getCluster(oldFeed, cluster.getName())
+ .getValidity()
+ .getStart()
+ .equals(FeedHelper.getCluster(newFeed, cluster.getName())
+ .getValidity().getStart())
+ || !FeedHelper.getLocation(oldFeed, LocationType.DATA,
+ cluster.getName()).getPath().equals(
+ FeedHelper.getLocation(newFeed, LocationType.DATA,
+ cluster.getName()).getPath())
+ || !FeedHelper.getLocation(oldFeed, LocationType.META,
+ cluster.getName()).getPath().equals(
+ FeedHelper.getLocation(newFeed, LocationType.META,
+ cluster.getName()).getPath())
+ || !FeedHelper.getLocation(oldFeed, LocationType.STATS,
+ cluster.getName()).getPath().equals(
+ FeedHelper.getLocation(newFeed, LocationType.STATS,
+ cluster.getName()).getPath())
+ || !FeedHelper.getLocation(oldFeed, LocationType.TMP,
+ cluster.getName()).getPath().equals(
+ FeedHelper.getLocation(newFeed, LocationType.TMP,
+ cluster.getName()).getPath()))
+ return true;
+ LOG.debug(oldFeed.toShortString() + ": Feed on cluster" + cluster.getName() + " identical. Ignoring...");
}
return false;
3  common/src/test/java/org/apache/ivory/cleanup/LogCleanupServiceTest.java
View
@@ -98,10 +98,11 @@ public void testProcessLogs() throws IOException, IvoryException,
fs.mkdirs(instanceLogPath2);
fs.mkdirs(instanceLogPath3);
fs.mkdirs(instanceLogPath4);
- Thread.sleep(61000);
+
// fs.setTimes wont work on dirs
fs.createNewFile(new Path(instanceLogPath, "oozie.log"));
fs.createNewFile(new Path(instanceLogPath, "pigAction_SUCCEEDED.log"));
+ Thread.sleep(61000);
AbstractCleanupHandler processCleanupHandler = new ProcessCleanupHandler();
processCleanupHandler.cleanup();
20 common/src/test/java/org/apache/ivory/entity/parser/ClusterEntityParserTest.java
View
@@ -27,6 +27,7 @@
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.ivory.IvoryException;
import org.apache.ivory.entity.AbstractTestBase;
import org.apache.ivory.entity.ClusterHelper;
@@ -35,6 +36,8 @@
import org.apache.ivory.entity.v0.cluster.Interface;
import org.apache.ivory.entity.v0.cluster.Interfacetype;
import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
public class ClusterEntityParserTest extends AbstractTestBase {
@@ -46,7 +49,8 @@ public void testParse() throws IOException, IvoryException, JAXBException {
InputStream stream = this.getClass().getResourceAsStream(CLUSTER_XML);
- Cluster cluster = (Cluster) parser.parseAndValidate(stream);
+ Cluster cluster = (Cluster) parser.parse(stream);
+ ClusterHelper.getInterface(cluster, Interfacetype.WRITE).setEndpoint(conf.get("fs.default.name"));
Assert.assertNotNull(cluster);
assertEquals(cluster.getName(), "testCluster");
@@ -61,7 +65,7 @@ public void testParse() throws IOException, IvoryException, JAXBException {
assertEquals(readonly.getVersion(), "0.20.2");
Interface write = ClusterHelper.getInterface(cluster, Interfacetype.WRITE);
- assertEquals(write.getEndpoint(), "hdfs://localhost:8020");
+ //assertEquals(write.getEndpoint(), conf.get("fs.default.name"));
assertEquals(write.getVersion(), "0.20.2");
Interface workflow = ClusterHelper.getInterface(cluster, Interfacetype.WORKFLOW);
@@ -74,5 +78,17 @@ public void testParse() throws IOException, IvoryException, JAXBException {
Marshaller marshaller = EntityType.CLUSTER.getMarshaller();
marshaller.marshal(cluster, stringWriter);
System.out.println(stringWriter.toString());
+ parser.parseAndValidate(stringWriter.toString());
}
+
+ @BeforeClass
+ public void init() throws Exception {
+ conf.set("hadoop.log.dir", "/tmp");
+ this.dfsCluster = new MiniDFSCluster(conf, 1, true, null);
+ }
+
+ @AfterClass
+ public void tearDown() {
+ this.dfsCluster.shutdown();
+ }
}
27 common/src/test/java/org/apache/ivory/entity/parser/FeedEntityParserTest.java
View
@@ -45,6 +45,7 @@
import org.apache.ivory.entity.v0.feed.LocationType;
import org.apache.ivory.entity.v0.feed.Locations;
import org.apache.ivory.entity.v0.feed.Validity;
+import org.apache.ivory.group.FeedGroup;
import org.apache.ivory.group.FeedGroupMapTest;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
@@ -254,6 +255,7 @@ public void testValidFeedGroup() throws IvoryException, JAXBException {
location.setPath("/projects/bi/rmc/daily/ad/${YEAR}/fraud/${MONTH}-${DAY}/ad");
location.setType(LocationType.DATA);
feed1.getLocations().getLocations().add(location);
+ feed1.getClusters().getClusters().get(0).getLocations().getLocations().set(0, location);
parser.parseAndValidate(feed1.toString());
ConfigurationStore.get().publish(EntityType.FEED, feed1);
@@ -267,8 +269,23 @@ public void testValidFeedGroup() throws IvoryException, JAXBException {
.setPath("/projects/bi/rmc/daily/ad/${YEAR}/fraud/${MONTH}-${DAY}/ad");
location2.setType(LocationType.DATA);
feed2.getLocations().getLocations().add(location2);
+ feed2.getClusters().getClusters().get(0).getLocations().getLocations().set(0, location);
parser.parseAndValidate(feed2.toString());
}
+
+ @Test(expectedExceptions = ValidationException.class)
+ public void testInvalidFeedClusterDataLocation() throws JAXBException, IvoryException{
+ Feed feed1 = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(
+ (FeedEntityParserTest.class.getResourceAsStream(FEED_XML)));
+ feed1.setName("f1" + System.currentTimeMillis());
+ feed1.setGroups("group1,group2,group3");
+ feed1.setLocations(new Locations());
+ Location location = new Location();
+ location.setPath("/projects/bi/rmc/daily/ad/${YEAR}/fraud/${MONTH}-${DAY}/ad");
+ location.setType(LocationType.DATA);
+ feed1.getLocations().getLocations().add(location);
+ parser.parseAndValidate(feed1.toString());
+ }
@Test(expectedExceptions = ValidationException.class)
public void testInvalidFeedGroup() throws IvoryException, JAXBException {
@@ -282,6 +299,8 @@ public void testInvalidFeedGroup() throws IvoryException, JAXBException {
location.setType(LocationType.DATA);
feed1.getLocations().getLocations().add(location);
parser.parseAndValidate(feed1.toString());
+
+ feed1.getClusters().getClusters().get(0).getLocations().getLocations().set(0,location);
ConfigurationStore.get().publish(EntityType.FEED, feed1);
Feed feed2 = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(
@@ -294,6 +313,7 @@ public void testInvalidFeedGroup() throws IvoryException, JAXBException {
.setPath("/projects/bi/rmc/daily/ad/${YEAR}/fraud/${MONTH}/${HOUR}/ad");
location2.setType(LocationType.DATA);
feed2.getLocations().getLocations().add(location2);
+ feed2.getClusters().getClusters().get(0).getLocations().getLocations().set(0,location);
parser.parseAndValidate(feed2.toString());
}
@@ -371,7 +391,10 @@ public void testInvalidFeedGroupName() throws JAXBException, IvoryException {
FeedGroupMapTest.class
.getResourceAsStream("/config/feed/feed-0.1.xml"));
feed1.setName("feed1");
- feed1.getLocations().getLocations().get(0).setPath("/data/clicks/${YEAR}/${MONTH}/${DAY}/${HOUR}");
+ feed1.getLocations().getLocations().get(0)
+ .setPath("/data/clicks/${YEAR}/${MONTH}/${DAY}/${HOUR}");
+ feed1.getClusters().getClusters().get(0).getLocations().getLocations()
+ .get(0).setPath("/data/clicks/${YEAR}/${MONTH}/${DAY}/${HOUR}");
ConfigurationStore.get().publish(EntityType.FEED, feed1);
Feed feed2 = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(
@@ -379,6 +402,8 @@ public void testInvalidFeedGroupName() throws JAXBException, IvoryException {
.getResourceAsStream("/config/feed/feed-0.1.xml"));
feed2.setName("feed2");
feed2.getLocations().getLocations().get(0).setPath("/data/clicks/${YEAR}/${MONTH}/${DAY}/${HOUR}");
+ feed2.getClusters().getClusters().get(0).getLocations().getLocations()
+ .get(0).setPath("/data/clicks/${YEAR}/${MONTH}/${DAY}/${HOUR}");
feed2.setFrequency(new Frequency("hours(1)"));
try{
parser.parseAndValidate(feed2.toString());
9 common/src/test/java/org/apache/ivory/entity/parser/ProcessEntityParserTest.java
View
@@ -264,4 +264,13 @@ public void testOozieLatestExpression() throws Exception {
process.getInputs().getInputs().get(0).setStart("latest(-1)");
parser.parseAndValidate(process.toString());
}
+
+ @Test(expectedExceptions=ValidationException.class)
+ public void testDuplicateClusterName() throws Exception {
+ Process process = parser
+ .parse((ProcessEntityParserTest.class
+ .getResourceAsStream(PROCESS_XML)));
+ process.getClusters().getClusters().add(1, process.getClusters().getClusters().get(0));
+ parser.validate(process);
+ }
}
5 common/src/test/resources/config/feed/feed-0.1.xml
View
@@ -34,6 +34,11 @@
<cluster name="testCluster" type="source">
<validity start="2011-11-01T00:00Z" end="2011-12-31T00:00Z"/>
<retention limit="hours(48)" action="delete" /> <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ <locations>
+ <location type="data" path="/projects/ivory/clicks" />
+ <location type="stats" path="/projects/ivory/clicksStats" />
+ <location type="meta" path="/projects/ivory/clicksMetaData" />
+ </locations>
</cluster>
<cluster name="backupCluster" type="target">
<validity start="2011-11-01T00:00Z" end="2011-12-31T00:00Z"/>
9 feed/src/main/java/org/apache/ivory/converter/OozieFeedMapper.java
View
@@ -118,7 +118,7 @@ private ACTION getRetentionWorkflowAction(Cluster cluster, Path wfPath, String w
Map<String, String> props = createCoordDefaultConfiguration(cluster, wfPath, wfName);
org.apache.ivory.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName());
- String feedPathMask = FeedHelper.getLocation(feed, LocationType.DATA).getPath();
+ String feedPathMask = FeedHelper.getLocation(feed, LocationType.DATA,cluster.getName()).getPath();
props.put("feedDataPath", feedPathMask.replaceAll("\\$\\{", "\\?\\{"));
props.put("timeZone", feed.getTimezone().getID());
@@ -200,8 +200,9 @@ private COORDINATORAPP createAndGetCoord(Feed feed, Cluster srcCluster, Cluster
inputDataset.setUriTemplate(new Path(ClusterHelper
.getHdfsUrl(srcCluster), FeedHelper.getLocation(feed,
- LocationType.DATA).getPath()).toString());
- outputDataset.setUriTemplate(getHDFSPath(FeedHelper.getLocation(feed, LocationType.DATA).getPath()));
+ LocationType.DATA,srcCluster.getName()).getPath()).toString());
+ outputDataset.setUriTemplate(getHDFSPath(FeedHelper.getLocation(
+ feed, LocationType.DATA, trgCluster.getName()).getPath()));
setDatasetValues(inputDataset, feed, srcCluster);
setDatasetValues(outputDataset, feed, srcCluster);
if (feed.getAvailabilityFlag() == null) {
@@ -245,7 +246,7 @@ private ACTION getReplicationWorkflowAction(Cluster srcCluster, Cluster trgClust
props.put("srcClusterColo", srcCluster.getColo());
props.put(ARG.feedNames.getPropName(), feed.getName());
props.put(ARG.feedInstancePaths.getPropName(), pathsWithPartitions.toString());
- props.put("sourceRelativePaths", pathsWithPartitions.toString());
+ props.put("sourceRelativePaths", pathsWithPartitions.toString().replaceAll("//+", "/"));
props.put("distcpSourcePaths", "${coord:dataIn('input')}");
props.put("distcpTargetPaths", "${coord:dataOut('output')}");
props.put("ivoryInPaths", pathsWithPartitions.toString());
16 feed/src/main/resources/config/workflow/replication-workflow.xml
View
@@ -32,11 +32,11 @@
<configuration>
<property>
<name>mapred.job.queue.name</name>
- <value>${wf:conf('mapred.job.queue.name')}</value>
+ <value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.priority</name>
- <value>${wf:conf('mapred.job.priority')}</value>
+ <value>${jobPriority}</value>
</property>
</configuration>
<main-class>org.apache.ivory.latedata.LateDataHandler</main-class>
@@ -55,11 +55,11 @@
<configuration>
<property>
<name>mapred.job.queue.name</name>
- <value>${wf:conf('mapred.job.queue.name')}</value>
+ <value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.priority</name>
- <value>${wf:conf('mapred.job.priority')}</value>
+ <value>${jobPriority}</value>
</property>
</configuration>
<main-class>org.apache.ivory.replication.FeedReplicator</main-class>
@@ -83,11 +83,11 @@
<configuration>
<property>
<name>mapred.job.queue.name</name>
- <value>${wf:conf('mapred.job.queue.name')}</value>
+ <value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.priority</name>
- <value>${wf:conf('mapred.job.priority')}</value>
+ <value>${jobPriority}</value>
</property>
</configuration>
<main-class>org.apache.ivory.workflow.IvoryPostProcessing</main-class>
@@ -129,11 +129,11 @@
<configuration>
<property>
<name>mapred.job.queue.name</name>
- <value>${wf:conf('mapred.job.queue.name')}</value>
+ <value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.priority</name>
- <value>${wf:conf('mapred.job.priority')}</value>
+ <value>${jobPriority}</value>
</property>
</configuration>
<main-class>org.apache.ivory.workflow.IvoryPostProcessing</main-class>
8 feed/src/main/resources/config/workflow/retention-workflow.xml
View
@@ -24,11 +24,11 @@
<configuration>
<property>
<name>mapred.job.queue.name</name>
- <value>${wf:conf('mapred.job.queue.name')}</value>
+ <value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.priority</name>
- <value>${wf:conf('mapred.job.priority')}</value>
+ <value>${jobPriority}</value>
</property>
</configuration>
<main-class>org.apache.ivory.retention.FeedEvictor</main-class>
@@ -50,11 +50,11 @@
<configuration>
<property>
<name>mapred.job.queue.name</name>
- <value>${wf:conf('mapred.job.queue.name')}</value>
+ <value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.priority</name>
- <value>${wf:conf('mapred.job.priority')}</value>
+ <value>${jobPriority}</value>
</property>
</configuration>
<main-class>org.apache.ivory.messaging.MessageProducer</main-class>
6 oozie/src/main/java/org/apache/ivory/converter/AbstractOozieEntityMapper.java
View
@@ -65,8 +65,8 @@
protected static final String ACTUAL_TIME_EL = "${coord:formatTime(coord:actualTime(), 'yyyy-MM-dd-HH-mm')}";
protected static final Long DEFAULT_BROKER_MSG_TTL = 3 * 24 * 60L;
- protected static final String MR_QUEUE_NAME="mapred.job.queue.name";
- protected static final String MR_JOB_PRIORITY="mapred.job.priority";
+ protected static final String MR_QUEUE_NAME="queueName";
+ protected static final String MR_JOB_PRIORITY="jobPriority";
protected static final JAXBContext workflowJaxbContext;
protected static final JAXBContext coordJaxbContext;
@@ -151,7 +151,7 @@ private void copySharedLibs(Cluster cluster, Path coordPath) throws IvoryExcepti
SharedLibraryHostingService.pushLibsToHDFS(libPath.toString(), cluster, ivoryJarFilter);
} catch (IOException e) {
LOG.error("Failed to copy shared libs on cluster " + cluster.getName(), e);
- throw new IvoryException(e);
+ throw new IvoryException("Failed to copy shared libs on cluster " + cluster.getName(),e);
}
}
5 oozie/src/main/java/org/apache/ivory/logging/LogMover.java
View
@@ -84,8 +84,8 @@ public int run(String[] arguments) throws Exception {
FileSystem fs = path.getFileSystem(getConf());
if (args.entityType.equalsIgnoreCase(EntityType.FEED.name())) {
- // if replication wf
- copyTTlogs(args, fs, path, jobInfo.getActions().get(1));
+ // if replication wf
+ copyTTlogs(args, fs, path, jobInfo.getActions().get(2));
copyOozieLog(client, fs, path, jobInfo.getId());
} else {
// if process wf
@@ -93,7 +93,6 @@ public int run(String[] arguments) throws Exception {
WorkflowJob subflowInfo = client.getJobInfo(subflowId);
List<WorkflowAction> actions = subflowInfo.getActions();
for (WorkflowAction action : actions) {
-
if (action.getType().equals("pig")
|| action.getType().equals("java")) {
copyTTlogs(args, fs, path, action);
11 oozie/src/main/java/org/apache/ivory/service/SharedLibraryHostingService.java
View
@@ -63,7 +63,7 @@ private void addLibsTo(Cluster cluster) throws IvoryException {
try {
pushLibsToHDFS(libLocation, cluster, nonIvoryJarFilter);
} catch (IOException e) {
- throw new IvoryException("Failed to copy shared libs to cluster " + cluster.getName(), e);
+ LOG.error("Failed to copy shared libs to cluster " + cluster.getName(), e);
}
}
@@ -75,7 +75,14 @@ public static void pushLibsToHDFS(String path, Cluster cluster, IvoryPathFilter
}
Configuration conf = ClusterHelper.getConfiguration(cluster);
- FileSystem fs = FileSystem.get(conf);
+ conf.setInt("ipc.client.connect.max.retries", 10);
+ FileSystem fs = null;
+ try {
+ fs = FileSystem.get(conf);
+ } catch (Exception e) {
+ throw new IvoryException("Unable to connect to HDFS: "
+ + ClusterHelper.getHdfsUrl(cluster));
+ }
Path clusterPath = new Path(path);
if(!fs.exists(clusterPath))
fs.mkdirs(clusterPath);
6 oozie/src/main/java/org/apache/ivory/workflow/IvoryPostProcessing.java
View
@@ -31,8 +31,11 @@
import org.apache.hadoop.util.ToolRunner;
import org.apache.ivory.logging.LogMover;
import org.apache.ivory.messaging.MessageProducer;
+import org.apache.log4j.Logger;
+import org.mortbay.log.Log;
public class IvoryPostProcessing extends Configured implements Tool{
+ private static final Logger LOG = Logger.getLogger(IvoryPostProcessing.class);
public enum Arg{
CLUSTER("cluster","name of the current cluster"),
@@ -87,9 +90,12 @@ public int run(String[] args) throws Exception {
CommandLine cmd = getCommand(args);
+ LOG.info("Sending user message "+cmd);
invokeUserMessageProducer(cmd);
//LogMover doesnt throw exception, a failed logmover will not fail the user workflow
+ LOG.info("Moving logs "+cmd);
invokeLogProducer(cmd);
+ LOG.info("Sending ivory message "+cmd);
invokeIvoryMessageProducer(cmd);
return 0;
5 oozie/src/main/java/org/apache/ivory/workflow/engine/NullBundleJob.java
View
@@ -104,4 +104,9 @@ public Date getKickoffTime() {
public Date getCreatedTime() {
return null;
}
+
+ @Override
+ public String getAcl() {
+ return null;
+ }
}
5 oozie/src/main/java/org/apache/ivory/workflow/engine/NullCoordJob.java
View
@@ -147,4 +147,9 @@ public String getConsoleUrl() {
public List<CoordinatorAction> getActions() {
return null;
}
+
+ @Override
+ public String getAcl() {
+ return null;
+ }
}
40 process/src/main/java/org/apache/ivory/converter/OozieProcessMapper.java
View
@@ -151,7 +151,7 @@ public COORDINATORAPP createDefaultCoordinator(Cluster cluster, Path bundlePath)
if (coord.getInputEvents() == null)
coord.setInputEvents(new INPUTEVENTS());
- SYNCDATASET syncdataset = createDataSet(input.getFeed(), cluster, input.getName());
+ SYNCDATASET syncdataset = createDataSet(input.getFeed(), cluster, input.getName(), LocationType.DATA);
coord.getDatasets().getDatasetOrAsyncDataset().add(syncdataset);
DATAIN datain = createDataIn(input);
@@ -162,6 +162,7 @@ public COORDINATORAPP createDefaultCoordinator(Cluster cluster, Path bundlePath)
props.put(input.getName(), inputExpr);
inputFeeds.add(input.getName());
inputPaths.add(inputExpr);
+
}
}
props.put("ivoryInPaths", join(inputPaths.iterator(), '#'));
@@ -177,7 +178,7 @@ public COORDINATORAPP createDefaultCoordinator(Cluster cluster, Path bundlePath)
coord.setOutputEvents(new OUTPUTEVENTS());
for (Output output : process.getOutputs().getOutputs()) {
- SYNCDATASET syncdataset = createDataSet(output.getFeed(), cluster, output.getName());
+ SYNCDATASET syncdataset = createDataSet(output.getFeed(), cluster, output.getName(),LocationType.DATA);
coord.getDatasets().getDatasetOrAsyncDataset().add(syncdataset);
DATAOUT dataout = createDataOut(output);
@@ -187,6 +188,15 @@ public COORDINATORAPP createDefaultCoordinator(Cluster cluster, Path bundlePath)
props.put(output.getName(), outputExpr);
outputFeeds.add(output.getName());
outputPaths.add(outputExpr);
+
+ // stats and meta paths
+ createOutputEvent(output.getFeed(),output.getName(), cluster, "stats",
+ LocationType.STATS, coord, props, output.getInstance());
+ createOutputEvent(output.getFeed(),output.getName(), cluster, "meta",
+ LocationType.META, coord, props,output.getInstance());
+ createOutputEvent(output.getFeed(),output.getName(), cluster, "tmp",
+ LocationType.TMP, coord, props,output.getInstance());
+
}
}
// Output feed name and path for parent workflow
@@ -225,6 +235,24 @@ private DATAIN createDataIn(Input input) {
return datain;
}
+ private void createOutputEvent(String feed, String name, Cluster cluster,
+ String type, LocationType locType, COORDINATORAPP coord,
+ Map<String, String> props, String instance)
+ throws IvoryException {
+ SYNCDATASET dataset = createDataSet(feed, cluster,name+type,
+ locType);
+ coord.getDatasets().getDatasetOrAsyncDataset().add(dataset);
+ DATAOUT dataout = new DATAOUT();
+ if (coord.getOutputEvents() == null)
+ coord.setOutputEvents(new OUTPUTEVENTS());
+ dataout.setName(name+type);
+ dataout.setDataset(name+type);
+ dataout.setInstance(getELExpression(instance));
+ coord.getOutputEvents().getDataOut().add(dataout);
+ String outputExpr = "${coord:dataOut('" + name+type+ "')}";
+ props.put(name+"."+type, outputExpr);
+ }
+
private String join(Iterator<String> itr, char sep) {
String joinedStr = StringUtils.join(itr, sep);
if(joinedStr.isEmpty())
@@ -232,12 +260,14 @@ private String join(Iterator<String> itr, char sep) {
return joinedStr;
}
- private SYNCDATASET createDataSet(String feedName, Cluster cluster, String datasetName) throws IvoryException {
- Feed feed = EntityUtil.getEntity(EntityType.FEED, feedName);
+ private SYNCDATASET createDataSet(String feedName, Cluster cluster, String datasetName, LocationType locationType) throws IvoryException {
+ Feed feed = (Feed) EntityUtil.getEntity(EntityType.FEED, feedName);
SYNCDATASET syncdataset = new SYNCDATASET();
syncdataset.setName(datasetName);
- syncdataset.setUriTemplate("${nameNode}" + FeedHelper.getLocation(feed, LocationType.DATA).getPath());
+ syncdataset.setUriTemplate("${nameNode}"
+ + FeedHelper.getLocation(feed, locationType,
+ cluster.getName()).getPath());
syncdataset.setFrequency("${coord:" + feed.getFrequency().toString() + "}");
org.apache.ivory.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName());
12 process/src/main/resources/config/workflow/process-parent-workflow.xml
View
@@ -32,11 +32,11 @@
<configuration>
<property>
<name>mapred.job.queue.name</name>
- <value>${wf:conf('mapred.job.queue.name')}</value>
+ <value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.priority</name>
- <value>${wf:conf('mapred.job.priority')}</value>
+ <value>${jobPriority}</value>
</property>
</configuration>
<main-class>org.apache.ivory.latedata.LateDataHandler</main-class>
@@ -65,11 +65,11 @@
<configuration>
<property>
<name>mapred.job.queue.name</name>
- <value>${wf:conf('mapred.job.queue.name')}</value>
+ <value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.priority</name>
- <value>${wf:conf('mapred.job.priority')}</value>
+ <value>${jobPriority}</value>
</property>
</configuration>
<main-class>org.apache.ivory.workflow.IvoryPostProcessing</main-class>
@@ -111,11 +111,11 @@
<configuration>
<property>
<name>mapred.job.queue.name</name>
- <value>${wf:conf('mapred.job.queue.name')}</value>
+ <value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.priority</name>
- <value>${wf:conf('mapred.job.priority')}</value>
+ <value>${jobPriority}</value>
</property>
</configuration>
<main-class>org.apache.ivory.workflow.IvoryPostProcessing</main-class>
10 process/src/test/java/org/apache/ivory/converter/OozieProcessMapperTest.java
View
@@ -106,12 +106,16 @@ public void testDefCoordMap(Process process, COORDINATORAPP coord) throws Except
assertEquals(process.getInputs().getInputs().get(1).getName(), coord.getInputEvents().getDataIn().get(1).getDataset());
assertEquals("${"+process.getInputs().getInputs().get(1).getStart()+"}", coord.getInputEvents().getDataIn().get(1).getStartInstance());
assertEquals("${" + process.getInputs().getInputs().get(1).getEnd()+"}", coord.getInputEvents().getDataIn().get(1).getEndInstance());
-
+
+ assertEquals(process.getOutputs().getOutputs().get(0).getName()+"stats", coord.getOutputEvents().getDataOut().get(1).getName());
+ assertEquals(process.getOutputs().getOutputs().get(0).getName()+"meta", coord.getOutputEvents().getDataOut().get(2).getName());
+ assertEquals(process.getOutputs().getOutputs().get(0).getName()+"tmp", coord.getOutputEvents().getDataOut().get(3).getName());
+
assertEquals(process.getOutputs().getOutputs().get(0).getName(), coord.getOutputEvents().getDataOut().get(0).getName());
assertEquals("${"+process.getOutputs().getOutputs().get(0).getInstance()+"}", coord.getOutputEvents().getDataOut().get(0).getInstance());
assertEquals(process.getOutputs().getOutputs().get(0).getName(), coord.getOutputEvents().getDataOut().get(0).getDataset());
- assertEquals(3, coord.getDatasets().getDatasetOrAsyncDataset().size());
+ assertEquals(6, coord.getDatasets().getDatasetOrAsyncDataset().size());
ConfigurationStore store = ConfigurationStore.get();
Feed feed = store.get(EntityType.FEED, process.getInputs().getInputs().get(0).getFeed());
@@ -120,7 +124,7 @@ public void testDefCoordMap(Process process, COORDINATORAPP coord) throws Except
assertEquals(feed.getTimezone().getID(), ds.getTimezone());
assertEquals("${coord:"+feed.getFrequency().toString()+"}", ds.getFrequency());
assertEquals("", ds.getDoneFlag());
- assertEquals("${nameNode}" + FeedHelper.getLocation(feed, LocationType.DATA).getPath(), ds.getUriTemplate());
+ assertEquals(ds.getUriTemplate(),"${nameNode}" + FeedHelper.getLocation(feed, LocationType.DATA,feed.getClusters().getClusters().get(0).getName()).getPath());
for(Property prop:coord.getAction().getWorkflow().getConfiguration().getProperty()){
if(prop.getName().equals("mapred.job.priority")){
assertEquals(prop.getValue(), "LOW");
5 process/src/test/resources/config/feed/feed-0.1.xml
View
@@ -32,6 +32,11 @@
<cluster name="corp" type="source">
<validity start="2011-11-01T00:00Z" end="2099-12-31T23:59Z"/>
<retention limit="hours(6)" action="delete" /> <!-- Limit can be in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+ <locations>
+ <location type="data" path="/projects/ivory/clicks/${YY}/${MM}" />
+ <location type="stats" path="/projects/ivory/clicksStats" />
+ <location type="meta" path="/projects/ivory/clicksMetaData" />
+ </locations>
</cluster>
<cluster name="backupCluster" type="target">
<validity start="2011-11-01T00:00Z" end="2099-12-31T23:59Z"/>
7 rerun/src/main/java/org/apache/ivory/rerun/handler/LateRerunHandler.java
View
@@ -17,7 +17,6 @@
*/
package org.apache.ivory.rerun.handler;
-import java.io.IOException;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
@@ -70,12 +69,14 @@ public void handleRerun(String cluster, String entityType,
Date msgInsertTime = EntityUtil.parseDateUTC(nominalTime);
Long wait = getEventDelay(entity, nominalTime);
if (wait == -1) {
+ LOG.info("Late rerun expired for entity: "+entityType+"("+entityName+")");
String logDir = this.getWfEngine().getWorkflowProperty(cluster,
wfId, "logDir");
String srcClusterName = this.getWfEngine().getWorkflowProperty(
cluster, wfId, "srcClusterName");
- Path lateLogPath = this.getLateLogPath(logDir, nominalTime,
- srcClusterName);
+ Path lateLogPath = this.getLateLogPath(logDir,
+ EntityUtil.UTCtoURIDate(nominalTime), srcClusterName);
+ LOG.info("Going to delete path:" +lateLogPath);
FileSystem fs = FileSystem.get(getConfiguration(cluster,
wfId));
if (fs.exists(lateLogPath)) {
Please sign in to comment.
Something went wrong with that request. Please try again.