Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Adding RandomSample and syncing other changes

  • Loading branch information...
commit 47dc1434dad7589a66d941d071c1b6afd1ccd327 1 parent 989b8ec
@xstevens xstevens authored
View
48 src/main/java/com/mozilla/pig/eval/date/TimeDelta.java
@@ -33,16 +33,27 @@
public class TimeDelta extends EvalFunc<Long> {
public static enum ERRORS { DateParseError };
-
- private SimpleDateFormat sdf;
- private long currentDay;
+
private int deltaUnit;
+ private boolean parseDate = false;
+ private SimpleDateFormat sdf;
+
+ public TimeDelta() {
+ deltaUnit = Calendar.MILLISECOND;
+ }
- public TimeDelta(String day, String dateFormat, String deltaUnitStr) throws ParseException {
- sdf = new SimpleDateFormat(dateFormat);
- Date d = sdf.parse(day);
- currentDay = DateUtil.getTimeAtResolution(d.getTime(), Calendar.DATE);
+ public TimeDelta(String deltaUnitSr) throws ParseException {
+ this(deltaUnitSr, null);
+ }
+
+ public TimeDelta(String deltaUnitStr, String dateFormat) throws ParseException {
+ // WEEK_OF_YEAR = 3
+ // DATE = 5
deltaUnit = Integer.parseInt(deltaUnitStr);
+ if (dateFormat != null) {
+ parseDate = true;
+ sdf = new SimpleDateFormat(dateFormat);
+ }
}
@Override
@@ -50,15 +61,22 @@ public Long exec(Tuple input) throws IOException {
if (input == null || input.size() == 0) {
return null;
}
-
- Long delta = null;
- try {
- Date d = sdf.parse((String)input.get(0));
- delta = DateUtil.getTimeDelta(d.getTime(), currentDay, deltaUnit);
- } catch (ParseException e) {
- pigLogger.warn(this, "Date parsing error", ERRORS.DateParseError);
+
+ long delta = 0;
+ if (parseDate) {
+ try {
+ Date d1 = sdf.parse((String)input.get(0));
+ Date d2 = sdf.parse((String)input.get(1));
+ delta = DateUtil.getTimeDelta(d1.getTime(), d2.getTime(), deltaUnit);
+ } catch (ParseException e) {
+ pigLogger.warn(this, "Date parse error", ERRORS.DateParseError);
+ }
+ } else {
+ long t1 = ((Number)input.get(0)).longValue();
+ long t2 = ((Number)input.get(1)).longValue();
+ delta = DateUtil.getTimeDelta(t1, t2, deltaUnit);
}
-
+
return delta;
}
View
2  src/main/java/com/mozilla/pig/eval/regex/Find.java
@@ -31,7 +31,7 @@
private Pattern p;
public Find(String pattern) {
- p = Pattern.compile(pattern);
+ p = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE);
}
public String exec(Tuple input) throws IOException {
View
43 src/main/java/com/mozilla/pig/filter/RandomSample.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2012 Mozilla Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.mozilla.pig.filter;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.pig.FilterFunc;
+import org.apache.pig.data.Tuple;
+
+public class RandomSample extends FilterFunc {
+
+ private Random rand;
+ private double ratio;
+
+ public RandomSample(String r) {
+ rand = new Random();
+ ratio = Double.parseDouble(r);
+ }
+
+ @Override
+ public Boolean exec(Tuple input) throws IOException {
+ return rand.nextDouble() <= ratio;
+ }
+
+}
View
48 src/main/java/com/mozilla/pig/filter/tuple/Contains.java
@@ -0,0 +1,48 @@
+/**
+ * Copyright 2012 Mozilla Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.mozilla.pig.filter.tuple;
+
+import java.io.IOException;
+
+import org.apache.pig.FilterFunc;
+import org.apache.pig.data.Tuple;
+
+public class Contains extends FilterFunc {
+
+ @Override
+ public Boolean exec(Tuple input) throws IOException {
+ if (input == null || input.size() < 2) {
+ return false;
+ }
+
+ boolean found = false;
+ Tuple t = (Tuple)input.get(0);
+ Object o = input.get(1);
+ for (int i=0; i < t.size(); i++) {
+ Object to = t.get(i);
+ if (to != null && to.equals(o)) {
+ found = true;
+ break;
+ }
+ }
+
+ return found;
+ }
+}
View
3  src/main/java/com/mozilla/util/DateUtil.java
@@ -101,7 +101,10 @@ public static long getTimeDelta(long start, long end, int deltaUnit) {
case WEEK_OF_YEAR:
delta = (end - start) / WEEK_IN_MILLIS;
break;
+ case MILLISECOND:
+ // pass through to default
default:
+ delta = (end - start);
break;
}
View
50 src/test/java/com/mozilla/pig/filter/tuple/ContainsTest.java
@@ -0,0 +1,50 @@
+/**
+ * Copyright 2012 Mozilla Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.mozilla.pig.filter.tuple;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.junit.Test;
+
+public class ContainsTest {
+
+ private TupleFactory tupleFactory = TupleFactory.getInstance();
+
+ @Test
+ public void testExec1() throws IOException {
+ Tuple input = tupleFactory.newTuple(2);
+ Tuple items = tupleFactory.newTuple();
+ items.append(-1);
+ items.append(0);
+ items.append(1);
+
+ input.set(0, items);
+ input.set(1, 0);
+
+ Contains c = new Contains();
+ boolean found = c.exec(input);
+ assertTrue(found);
+ }
+
+}
Please sign in to comment.
Something went wrong with that request. Please try again.