Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 19 lines (13 sloc) 0.875 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
SET default_parallel 4;
TYPES = LOAD 's3://mattb/instance_types_en.nt.bz2' USING PigStorage(' ') AS (subj, pred, obj, dot);
PEOPLE_TYPES = FILTER TYPES BY obj == '<http://xmlns.com/foaf/0.1/Person>';
PEOPLE = FOREACH PEOPLE_TYPES GENERATE subj;

LINKS = LOAD 's3://mattb/page_links_en.nt.bz2' USING PigStorage(' ') AS (subj, pred, obj, dot);

SUBJ_LINKS_CO = COGROUP PEOPLE BY subj, LINKS BY subj;
SUBJ_LINKS_FILTERED = FILTER SUBJ_LINKS_CO BY NOT IsEmpty(PEOPLE) AND NOT IsEmpty(LINKS);
SUBJ_LINKS = FOREACH SUBJ_LINKS_FILTERED GENERATE FLATTEN(LINKS);

OBJ_LINKS_CO = COGROUP PEOPLE BY subj, SUBJ_LINKS BY obj;
OBJ_LINKS_FILTERED = FILTER OBJ_LINKS_CO BY NOT IsEmpty(PEOPLE) AND NOT IsEmpty(SUBJ_LINKS);
OBJ_LINKS = FOREACH OBJ_LINKS_FILTERED GENERATE FLATTEN(SUBJ_LINKS);

D_LINKS = DISTINCT OBJ_LINKS;

STORE D_LINKS INTO 's3://mattb/people-graph' USING PigStorage(' ');
Something went wrong with that request. Please try again.