Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Sync'ing latest changes

  • Loading branch information...
commit 417d2944f559d85c1614b64b591b012877e8986d 1 parent e2eeafd
@xstevens xstevens authored
View
55 src/main/java/com/mozilla/fhr/pig/eval/CrashTuples.java
@@ -30,9 +30,18 @@
public class CrashTuples extends EvalFunc<DataBag> {
+ private static final String CRASHES_FIELD = "org.mozilla.crashes.crashes";
+ private static final String PENDING_FIELD = "pending";
+ private static final String SUBMITTED_FIELD = "submitted";
+
+ private static final String ADDON_COUNTS_FIELD = "org.mozilla.addons.counts";
+ private static final String EXTENSION_FIELD = "extension";
+ private static final String PLUGIN_FIELD = "plugin";
+ private static final String THEME_FIELD = "theme";
+
private static BagFactory bagFactory = BagFactory.getInstance();
private static TupleFactory tupleFactory = TupleFactory.getInstance();
-
+
private int getSafeInt(Object o) {
if (o == null) {
return 0;
@@ -50,44 +59,36 @@ public DataBag exec(Tuple input) throws IOException {
DataBag dbag = bagFactory.newDefaultBag();
Map<String,Object> dataPoints = (Map<String,Object>)input.get(0);
- for (Map.Entry<String, Object> dataPoint : dataPoints.entrySet()) {
- String dayStr = dataPoint.getKey();
- Map<String,Object> fields = (Map<String,Object>)dataPoint.getValue();
- if ((fields.containsKey("crashCountPending") || fields.containsKey("crashCountSubmitted")) && fields.containsKey("sessions")) {
+ for (Map.Entry<String,Object> dayEntry : dataPoints.entrySet()) {
+ Map<String,Object> dayMap = (Map<String,Object>)dayEntry.getValue();
+ if (dayMap.containsKey(CRASHES_FIELD)) {
// crash info
- int crashCountPending = getSafeInt(fields.get("crashCountPending"));
- int crashCountSubmitted = getSafeInt(fields.get("crashCountSubmitted"));
+ Map<String,Object> crashesMap = (Map<String,Object>)dayMap.get(CRASHES_FIELD);
+ int crashCountPending = getSafeInt(crashesMap.get(PENDING_FIELD));
+ int crashCountSubmitted = getSafeInt(crashesMap.get(SUBMITTED_FIELD));
- // aborted session info
- Map<String,Object> sessions = (Map<String,Object>)fields.get("sessions");
- int aborted = getSafeInt(sessions.get("aborted"));
- int abortedTime = getSafeInt(sessions.get("abortedTime"));
- int abortedActiveTime = getSafeInt(sessions.get("abortedActiveTime"));
+ // TODO: aborted session info...where did this move to in the payload? Do we need it here anyway?
// addons info
- Map<String,Object> addonCounts = (Map<String,Object>)fields.get("addonCounts");
- int themeCount = 0, extCount = 0, pluginCount = 0;
- if (addonCounts != null) {
- themeCount = getSafeInt(addonCounts.get("theme"));
- extCount = getSafeInt(addonCounts.get("extension"));
- pluginCount = getSafeInt(addonCounts.get("plugin"));
+ Map<String,Object> addonCountMap = (Map<String,Object>)dayMap.get(ADDON_COUNTS_FIELD);
+ int themeCount = -1, extensionCount = -1, pluginCount = -1;
+ if (addonCountMap != null) {
+ themeCount = getSafeInt(addonCountMap.get(THEME_FIELD));
+ extensionCount = getSafeInt(addonCountMap.get(EXTENSION_FIELD));
+ pluginCount = getSafeInt(addonCountMap.get(PLUGIN_FIELD));
}
- Tuple t = tupleFactory.newTuple(9);
- t.set(0, dayStr);
+ Tuple t = tupleFactory.newTuple(6);
+ t.set(0, dayEntry.getKey());
t.set(1, crashCountPending);
t.set(2, crashCountSubmitted);
- t.set(3, aborted);
- t.set(4, abortedTime);
- t.set(5, abortedActiveTime);
- t.set(6, themeCount);
- t.set(7, extCount);
- t.set(8, pluginCount);
+ t.set(3, themeCount);
+ t.set(4, extensionCount);
+ t.set(5, pluginCount);
dbag.add(t);
}
}
-
return dbag;
}
View
71 src/main/pig/fhr_crash_analysis.pig
@@ -18,7 +18,7 @@ define DateDelta com.mozilla.pig.eval.date.TimeDelta('5','yyyy-MM-dd');
define WeekInYear com.mozilla.pig.eval.date.ConvertDateFormat('yyyy-MM-dd', 'w');
define MonthInYear com.mozilla.pig.eval.date.ConvertDateFormat('yyyy-MM-dd', 'M');
define OsVersionNormalizer com.mozilla.pig.eval.regex.FindOrReturn('^[0-9]+(\\.*[0-9]*){1}');
-define BucketAddonCount com.mozilla.pig.eval.Bucket('1','2','3','4','5','6');
+define BucketAddonCount com.mozilla.pig.eval.Bucket('-1','1','2','3','4','5','6');
define Median datafu.pig.stats.Median();
define CrashTuples com.mozilla.fhr.pig.eval.CrashTuples();
define VersionOnDate com.mozilla.fhr.pig.eval.VersionOnDate('yyyy-MM-dd', '$date');
@@ -31,99 +31,104 @@ genmap = FOREACH raw GENERATE k,com.mozilla.pig.eval.json.JsonMap(json) AS json_
Probably going to be best to create a new UDF for crash tuples (date, crash_count, extensions, plugins, etc.)
because we need to do different looks by day, week of year, month of year, d-7, and d-30
*/
-prefltrd = FILTER genmap BY IsMap(json_map#'dataPoints');
+prefltrd = FILTER genmap BY IsMap(json_map#'data'#'days');
data = FOREACH prefltrd GENERATE k,
- json_map#'appName' AS product:chararray,
- VersionOnDate(json_map#'versions') AS product_version:chararray,
- json_map#'appUpdateChannel' AS product_channel:chararray,
- json_map#'OSName' AS os:chararray,
- OsVersionNormalizer((chararray)json_map#'OSVersion') AS os_version:chararray,
- json_map#'locale' AS locale:chararray,
- FLATTEN(CrashTuples(json_map#'dataPoints')) AS
+ json_map#'data'#'last'#'org.mozilla.appInfo.appinfo'#'name' AS product:chararray,
+ VersionOnDate(json_map#'data'#'days') AS product_version:chararray,
+ json_map#'data'#'last'#'org.mozilla.appInfo.appinfo'#'updateChannel' AS product_channel:chararray,
+ json_map#'data'#'last'#'org.mozilla.appInfo.appinfo'#'os' AS os:chararray,
+ OsVersionNormalizer((chararray)json_map#'data'#'last'#'org.mozilla.sysinfo.sysinfo'#'version') AS os_version:chararray,
+ json_map#'geoCountry' AS geo_country_code:chararray,
+ FLATTEN(CrashTuples(json_map#'data'#'days')) AS
(date:chararray, crash_count_pending:int, crash_count_submitted:int,
- aborted_sessions:int, aborted_time:int, aborted_active_time:int,
theme_count:int, ext_count:int, plugin_count:int);
-fltrd = FILTER data BY crash_count_pending > 0 OR crash_count_submitted > 0;
-bucketed_data = FOREACH fltrd GENERATE k, product,product_version,product_channel,os,os_version,locale,date,
+fltrd = FILTER data BY (crash_count_pending > 0 OR crash_count_submitted > 0) AND
+ product IS NOT NULL AND
+ (product == 'Firefox' OR product == 'MetroFirefox' OR
+ product == 'Fennec' OR product == 'Thunderbird') AND
+ product_version IS NOT NULL AND
+ product_channel IS NOT NULL AND
+ os IS NOT NULL AND
+ os_version IS NOT NULL;
+bucketed_data = FOREACH fltrd GENERATE k, product,product_version,product_channel,os,os_version,geo_country_code,date,
DateDelta(date, '$date') AS days_ago:long,
WeekInYear(date) AS week_in_year:chararray,
MonthInYear(date) AS month_in_year:chararray,
crash_count_pending, crash_count_submitted,
(crash_count_pending + crash_count_submitted) AS crash_count:int,
- aborted_sessions, aborted_time, aborted_active_time,
theme_count, BucketAddonCount(ext_count) AS addon_count:int, plugin_count;
/* Daily */
daily_data = FILTER bucketed_data BY date == '$date';
-grouped_daily = GROUP daily_data BY (product,product_version,product_channel,os,os_version,locale,addon_count);
+grouped_daily = GROUP daily_data BY (product,product_version,product_channel,os,os_version,geo_country_code,addon_count);
daily_counts = FOREACH grouped_daily GENERATE '$date' AS perspective_date:chararray,
- FLATTEN(group) AS (product,product_version,product_channel,os,os_version,locale,addon_count),
+ FLATTEN(group) AS (product,product_version,product_channel,os,os_version,geo_country_code,addon_count),
FLATTEN(Median(daily_data.crash_count)) AS median_crash_count,
SUM(daily_data.crash_count) AS sum_crash_count:long,
COUNT(daily_data) AS doc_count:long;
-STORE daily_counts INTO 'fhr-crash-daily-counts-$date';
+/*STORE daily_counts INTO 'fhr-crash-daily-counts-$date';*/
/* Store into Vertica (only will work on Vertica 5+ and the vertica connector jar needs to be on every machine)*/
-STORE daily_counts INTO '{fhr_crash_daily_counts(perspective_date date, product varchar(32), product_version varchar(8), product_channel varchar(16), os varchar(16), os_version varchar(32), locale varchar(8), addon_count int, median_crash_count float, sum_crash_count int, doc_count int)}' USING com.vertica.pig.VerticaStorer('$dblist', '$dbname', '$dbport', '$dbuser', '$dbpass');
+STORE daily_counts INTO '{fhr_crash_daily_counts(perspective_date date, product varchar(32), product_version varchar(8), product_channel varchar(64), os varchar(16), os_version varchar(32), geo_country_code varchar(4), addon_count int, median_crash_count float, sum_crash_count int, doc_count int)}' USING com.vertica.pig.VerticaStorer('$dblist', '$dbname', '$dbport', '$dbuser', '$dbpass');
/* Week in Year */
wiy_data = FILTER bucketed_data BY week_in_year == '$week';
-grouped_wiy = GROUP wiy_data BY (product,product_version,product_channel,os,os_version,locale,addon_count);
+grouped_wiy = GROUP wiy_data BY (product,product_version,product_channel,os,os_version,geo_country_code,addon_count);
wiy_counts = FOREACH grouped_wiy {
dist_keys = DISTINCT wiy_data.k;
GENERATE
'$date' AS perspective_date:chararray,
- FLATTEN(group) AS (product,product_version,product_channel,os,os_version,locale,addon_count),
+ FLATTEN(group) AS (product,product_version,product_channel,os,os_version,geo_country_code,addon_count),
FLATTEN(Median(wiy_data.crash_count)) AS median_crash_count,
SUM(wiy_data.crash_count) AS sum_crash_count:long,
COUNT(dist_keys) AS doc_count:long;
}
-STORE wiy_counts INTO 'fhr-wiy-crash-analysis-$date';
+/*STORE wiy_counts INTO 'fhr-wiy-crash-analysis-$date';*/
/* Store into Vertica (only will work on Vertica 5+ and the vertica connector jar needs to be on every machine)*/
-STORE wiy_counts INTO '{fhr_crash_wiy_counts(perspective_date date, product varchar(32), product_version varchar(8), product_channel varchar(16), os varchar(16), os_version varchar(32), locale varchar(8), addon_count int, median_crash_count float, sum_crash_count int, doc_count int)}' USING com.vertica.pig.VerticaStorer('$dblist', '$dbname', '$dbport', '$dbuser', '$dbpass');
+STORE wiy_counts INTO '{fhr_crash_wiy_counts(perspective_date date, product varchar(32), product_version varchar(8), product_channel varchar(64), os varchar(16), os_version varchar(32), geo_country_code varchar(4), addon_count int, median_crash_count float, sum_crash_count int, doc_count int)}' USING com.vertica.pig.VerticaStorer('$dblist', '$dbname', '$dbport', '$dbuser', '$dbpass');
/* Month in Year */
miy_data = FILTER bucketed_data BY month_in_year == '$month';
-grouped_miy = GROUP miy_data BY (product,product_version,product_channel,os,os_version,locale,addon_count);
+grouped_miy = GROUP miy_data BY (product,product_version,product_channel,os,os_version,geo_country_code,addon_count);
miy_counts = FOREACH grouped_miy {
dist_keys = DISTINCT miy_data.k;
GENERATE
'$date' AS perspective_date:chararray,
- FLATTEN(group) AS (product,product_version,product_channel,os,os_version,locale,addon_count),
+ FLATTEN(group) AS (product,product_version,product_channel,os,os_version,geo_country_code,addon_count),
FLATTEN(Median(miy_data.crash_count)) AS median_crash_count,
SUM(miy_data.crash_count) AS sum_crash_count:long,
COUNT(dist_keys) AS doc_count:long;
}
-STORE miy_counts INTO 'fhr-miy-crash-analysis-$date';
+/*STORE miy_counts INTO 'fhr-miy-crash-analysis-$date';*/
/* Store into Vertica (only will work on Vertica 5+ and the vertica connector jar needs to be on every machine)*/
-STORE miy_counts INTO '{fhr_crash_miy_counts(perspective_date date, product varchar(32), product_version varchar(8), product_channel varchar(16), os varchar(16), os_version varchar(32), locale varchar(8), addon_count int, median_crash_count float, sum_crash_count int, doc_count int)}' USING com.vertica.pig.VerticaStorer('$dblist', '$dbname', '$dbport', '$dbuser', '$dbpass');
+STORE miy_counts INTO '{fhr_crash_miy_counts(perspective_date date, product varchar(32), product_version varchar(8), product_channel varchar(64), os varchar(16), os_version varchar(32), geo_country_code varchar(4), addon_count int, median_crash_count float, sum_crash_count int, doc_count int)}' USING com.vertica.pig.VerticaStorer('$dblist', '$dbname', '$dbport', '$dbuser', '$dbpass');
/* d-7 */
d7_data = FILTER bucketed_data BY days_ago <= 7;
-grouped_d7 = GROUP d7_data BY (product,product_version,product_channel,os,os_version,locale,addon_count);
+grouped_d7 = GROUP d7_data BY (product,product_version,product_channel,os,os_version,geo_country_code,addon_count);
d7_counts = FOREACH grouped_d7 {
dist_keys = DISTINCT d7_data.k;
GENERATE '$date' AS perspective_date:chararray,
- FLATTEN(group) AS (product,product_version,product_channel,os,os_version,locale,addon_count),
+ FLATTEN(group) AS (product,product_version,product_channel,os,os_version,geo_country_code,addon_count),
FLATTEN(Median(d7_data.crash_count)) AS median_crash_count,
SUM(d7_data.crash_count) AS sum_crash_count:long,
COUNT(dist_keys) AS doc_count:long;
}
-STORE d7_counts INTO 'fhr-d7-crash-analysis-$date';
+/*STORE d7_counts INTO 'fhr-d7-crash-analysis-$date';*/
/* Store into Vertica (only will work on Vertica 5+ and the vertica connector jar needs to be on every machine)*/
-STORE d7_counts INTO '{fhr_crash_d7_counts(perspective_date date, product varchar(32), product_version varchar(8), product_channel varchar(16), os varchar(16), os_version varchar(32), locale varchar(8), addon_count int, median_crash_count float, sum_crash_count int, doc_count int)}' USING com.vertica.pig.VerticaStorer('$dblist', '$dbname', '$dbport', '$dbuser', '$dbpass');
+STORE d7_counts INTO '{fhr_crash_d7_counts(perspective_date date, product varchar(32), product_version varchar(8), product_channel varchar(64), os varchar(16), os_version varchar(32), geo_country_code varchar(4), addon_count int, median_crash_count float, sum_crash_count int, doc_count int)}' USING com.vertica.pig.VerticaStorer('$dblist', '$dbname', '$dbport', '$dbuser', '$dbpass');
/* d-30 */
d30_data = FILTER bucketed_data BY days_ago <= 30;
-grouped_d30 = GROUP d30_data BY (product,product_version,product_channel,os,os_version,locale,addon_count);
+grouped_d30 = GROUP d30_data BY (product,product_version,product_channel,os,os_version,geo_country_code,addon_count);
d30_counts = FOREACH grouped_d30 {
dist_keys = DISTINCT d30_data.k;
GENERATE '$date' AS perspective_date:chararray,
- FLATTEN(group) AS (product,product_version,product_channel,os,os_version,locale,addon_count),
+ FLATTEN(group) AS (product,product_version,product_channel,os,os_version,geo_country_code,addon_count),
FLATTEN(Median(d30_data.crash_count)) AS median_crash_count,
SUM(d30_data.crash_count) AS sum_crash_count:long,
COUNT(dist_keys) AS doc_count:long;
}
-STORE d30_counts INTO 'fhr-d30-crash-analysis-$date';
+/*STORE d30_counts INTO 'fhr-d30-crash-analysis-$date';*/
/* Store into Vertica (only will work on Vertica 5+ and the vertica connector jar needs to be on every machine)*/
-STORE d30_counts INTO '{fhr_crash_d30_counts(perspective_date date, product varchar(32), product_version varchar(8), product_channel varchar(16), os varchar(16), os_version varchar(32), locale varchar(8), addon_count int, median_crash_count float, sum_crash_count int, doc_count int)}' USING com.vertica.pig.VerticaStorer('$dblist', '$dbname', '$dbport', '$dbuser', '$dbpass');
+STORE d30_counts INTO '{fhr_crash_d30_counts(perspective_date date, product varchar(32), product_version varchar(8), product_channel varchar(64), os varchar(16), os_version varchar(32), geo_country_code varchar(4), addon_count int, median_crash_count float, sum_crash_count int, doc_count int)}' USING com.vertica.pig.VerticaStorer('$dblist', '$dbname', '$dbport', '$dbuser', '$dbpass');
View
22 src/main/pig/fhr_retention.pig
@@ -43,12 +43,14 @@ grpd_by_all = GROUP filtered_genmap ALL;
n = FOREACH grpd_by_all GENERATE COUNT(filtered_genmap);
data = FOREACH filtered_genmap GENERATE k,
- ProfileAgeTime(FormatDate(LatestPingTime(json_map#'dataPoints')), json_map#'appProfileAge') AS profile_age_time:long,
+ ProfileAgeTime(FormatDate(LatestPingTime(json_map#'dataPoints')), ((int)json_map#'appProfileAge' - (int)DaysAgo(json_map#'thisPingTime', '$date'))) AS profile_age_time:long,
FirstPingTime(json_map#'dataPoints') AS first_ping_time:long,
PingTimes(json_map#'dataPoints') AS ping_times;
flat_data = FOREACH data GENERATE k, profile_age_time, FLATTEN(ping_times) AS ping_time:long;
-wiy_data = FOREACH flat_data GENERATE k, profile_age_time, ping_time,
+wiy_data = FOREACH flat_data GENERATE k,
+ profile_age_time,
+ ping_time,
(int)WeekInYear(ping_time) AS week_in_year:int,
(int)Year(ping_time) AS year:int;
/* Constrain data to before the specified week and year (works in case you ever have to rerun from a historical perspective) */
@@ -56,9 +58,12 @@ tc_data = FILTER wiy_data BY week_in_year <= $week AND year <= $year;
key_deltas = FOREACH tc_data GENERATE k, WeekDelta(profile_age_time, ping_time) AS week_delta:long;
distinct_key_deltas = DISTINCT key_deltas;
grpd_by_week_delta = GROUP distinct_key_deltas BY week_delta;
-week_delta_counts = FOREACH grpd_by_week_delta GENERATE FLATTEN(group) AS week_delta:long, COUNT(distinct_key_deltas) AS delta_count:long;
-week_delta_props = FOREACH week_delta_counts GENERATE week_delta, delta_count, ((double)delta_count/(double)n.$0);
-dump week_delta_props;
+week_delta_counts = FOREACH grpd_by_week_delta GENERATE FLATTEN(group) AS week_delta:long,
+ COUNT(distinct_key_deltas) AS delta_count:long;
+week_delta_props = FOREACH week_delta_counts GENERATE '$date' AS perspective_date:chararray,
+ week_delta, delta_count,
+ ((double)delta_count/(double)n.$0);
+STORE week_delta_props INTO 'fhr-retention-weekly-unique-$date';
/* Group by key and only keep the most recent week (i.e. lowest week delta) */
grpd_by_k = GROUP distinct_key_deltas BY k;
@@ -66,8 +71,11 @@ min_deltas = FOREACH grpd_by_k GENERATE FLATTEN(group) AS k:bytearray, MIN(disti
grpd_by_week_delta2 = GROUP min_deltas BY week_delta;
week_delta_counts2 = FOREACH grpd_by_week_delta2 GENERATE FLATTEN(group) AS week_delta:long,
COUNT(min_deltas) AS delta_count:long;
-week_delta_probs = FOREACH week_delta_counts2 GENERATE week_delta, delta_count, ((double)delta_count/(double)n.$0);
-dump week_delta_probs;
+week_delta_probs = FOREACH week_delta_counts2 GENERATE '$date' AS perspective_date:chararray,
+ week_delta,
+ delta_count,
+ ((double)delta_count/(double)n.$0);
+STORE week_delta_probs INTO 'fhr-retention-alltime-unique-$date';
/* Week in Year */
View
10 src/main/pig/fhr_uniques.pig
@@ -70,7 +70,7 @@ daily_counts = FOREACH grouped_daily GENERATE '$date' AS perspective_date:charar
FLATTEN(group) AS (product,product_version,product_channel,os,os_version,geo_country_code,new_ping),
FLATTEN(Median(daily_data.bucketed_profile_age)) AS median_profile_age:float,
COUNT(daily_data) AS count:long;
-STORE daily_counts INTO 'fhr-daily-counts-$date';
+/*STORE daily_counts INTO 'fhr-daily-counts-$date';*/
/* Store into Vertica (only will work on Vertica 5+ and the vertica connector jar needs to be on every machine)*/
STORE daily_counts INTO '{fhr_daily_counts(perspective_date date, product varchar(32), product_version varchar(8), product_channel varchar(64), os varchar(16), os_version varchar(32), geo_country_code varchar(4), new_ping int, median_profile_age float, count int)}' USING com.vertica.pig.VerticaStorer('$dblist', '$dbname', '$dbport', '$dbuser', '$dbpass');
@@ -83,7 +83,7 @@ wiy_counts = FOREACH grouped_wiy GENERATE '$date' AS perspective_date:chararray,
FLATTEN(group) AS (product,product_version,product_channel,os,os_version,geo_country_code,new_ping),
FLATTEN(Median(weekly_data.bucketed_profile_age)) AS median_profile_age,
COUNT(weekly_data) AS count:long;
-STORE wiy_counts INTO 'fhr-wiy-counts-$date';
+/*STORE wiy_counts INTO 'fhr-wiy-counts-$date';*/
/* Store into Vertica (only will work on Vertica 5+ and the vertica connector jar needs to be on every machine)*/
STORE wiy_counts INTO '{fhr_wiy_counts(perspective_date date, product varchar(32), product_version varchar(8), product_channel varchar(64), os varchar(16), os_version varchar(32), geo_country_code varchar(4), new_ping int, median_profile_age float, count int)}' USING com.vertica.pig.VerticaStorer('$dblist', '$dbname', '$dbport', '$dbuser', '$dbpass');
@@ -94,7 +94,7 @@ miy_counts = FOREACH grouped_miy GENERATE '$date' AS perspective_date:chararray,
FLATTEN(group) AS (product,product_version,product_channel,os,os_version,geo_country_code,new_ping),
FLATTEN(Median(monthly_data.bucketed_profile_age)) AS median_profile_age,
COUNT(monthly_data) AS count:long;
-STORE miy_counts INTO 'fhr-miy-counts-$date';
+/*STORE miy_counts INTO 'fhr-miy-counts-$date';*/
/* Store into Vertica (only will work on Vertica 5+ and the vertica connector jar needs to be on every machine)*/
STORE miy_counts INTO '{fhr_miy_counts(perspective_date date, product varchar(32), product_version varchar(8), product_channel varchar(64), os varchar(16), os_version varchar(32), geo_country_code varchar(4), new_ping int, median_profile_age float, count int)}' USING com.vertica.pig.VerticaStorer('$dblist', '$dbname', '$dbport', '$dbuser', '$dbpass');
@@ -105,7 +105,7 @@ d7_counts = FOREACH grouped_d7 GENERATE '$date' AS perspective_date:chararray,
FLATTEN(group) AS (product,product_version,product_channel,os,os_version,geo_country_code,new_ping),
FLATTEN(Median(d7_data.bucketed_profile_age)) AS median_profile_age,
COUNT(d7_data) AS count:long;
-STORE d7_counts INTO 'fhr-d7-counts-$date';
+/*STORE d7_counts INTO 'fhr-d7-counts-$date';*/
/* Store into Vertica (only will work on Vertica 5+ and the vertica connector jar needs to be on every machine)*/
STORE d7_counts INTO '{fhr_d7_counts(perspective_date date, product varchar(32), product_version varchar(8), product_channel varchar(64), os varchar(16), os_version varchar(32), geo_country_code varchar(4), new_ping int, median_profile_age float, count int)}' USING com.vertica.pig.VerticaStorer('$dblist', '$dbname', '$dbport', '$dbuser', '$dbpass');
@@ -116,6 +116,6 @@ d30_counts = FOREACH grouped_d30 GENERATE '$date' AS perspective_date:chararray,
FLATTEN(group) AS (product,product_version,product_channel,os,os_version,geo_country_code,new_ping),
FLATTEN(Median(d30_data.bucketed_profile_age)) AS median_profile_age,
COUNT(d30_data) AS count:long;
-STORE d30_counts INTO 'fhr-d30-counts-$date';
+/*STORE d30_counts INTO 'fhr-d30-counts-$date';*/
/* Store into Vertica (only will work on Vertica 5+ and the vertica connector jar needs to be on every machine)*/
STORE d30_counts INTO '{fhr_d30_counts(perspective_date date, product varchar(32), product_version varchar(8), product_channel varchar(64), os varchar(16), os_version varchar(32), geo_country_code varchar(4), new_ping int, median_profile_age float, count int)}' USING com.vertica.pig.VerticaStorer('$dblist', '$dbname', '$dbport', '$dbuser', '$dbpass');
Please sign in to comment.
Something went wrong with that request. Please try again.