# X.join(Y)
- Return RDD of all pairs of elements with matching keys in X and Y.
- Each pair is (k, (v1, v2)) tuple, where (k, v1) is in X and (k, v2) is in Y.

In [1]:
x = sc.parallelize([("a", 1), ("b", 4)])
y = sc.parallelize([("a", 2), ("a", 3)])
sorted(x.join(y).collect())

[('a', (1, 2)), ('a', (1, 3))]

# X.leftOuterJoin(Y)
- For each element (k, v) in X, resulting RDD will either contain
 - All pairs (k, (v, w)) for w in Y.
 - Or the pair (k, (v, None)) if no elements in Y have key k.

In [2]:
x = sc.parallelize([("a", 1), ("b", 4)])
y = sc.parallelize([("a", 2)])
sorted(x.leftOuterJoin(y).collect())

[('a', (1, 2)), ('b', (4, None))]

# X.rightOuterJoin(Y)
- For each element (k, w) in Y, resulting RDD will either contain
 - All pairs (k, (v, w)) for v in X.
 - Or the pair (k, (None, w)) if no elements in X have key k.

In [3]:
x = sc.parallelize([("a", 1)])
y = sc.parallelize([("a", 2), ("b", 4)])
sorted(x.rightOuterJoin(y).collect())

[('a', (1, 2)), ('b', (None, 4))]

# X.fullOuterJoin(Y)
- For each element (k, v) in X, resulting RDD will either contain
 - All pairs (k, (v, w)) for w in Y.
 - Or the pair (k, (v, None)) if no elements in Y have key k.
- For each element (k, w) in Y, resulting RDD will either contain
 - All pairs (k, (v, w)) for v in X.
 - Or the pair (k, (None, w)) if no elements in X have key k.

In [4]:
x = sc.parallelize([("a", 1), ("b", 4)])
y = sc.parallelize([("a", 2), ("c", 8)])
sorted(x.fullOuterJoin(y).collect())

[('a', (1, 2)), ('b', (4, None)), ('c', (None, 8))]