In [1]:
import sys
import csv
import itertools as it
from pyspark import SparkConf, SparkContext
from operator import add

In [2]:
sc = SparkContext('local[*]')
file = sc.textFile('2.txt')

In [3]:
def split_line(line):
    user, friends = line.split('\t', 1)
    return (user, friends.split(','))
    
user_friends = file.map(lambda line: split_line(line))
# user_friends.take(5)

In [4]:
def map_frienships(user_friends):
    user, friend_list = user_friends[0], user_friends[1]
    
    user_friend_pairs = [((user, fr), float('-inf')) for fr in friend_list]
    possible_frend_pairs = [((fr1, fr2), 1) for fr1, fr2 in it.combinations(friend_list, 2)]
    
    return user_friend_pairs + possible_frend_pairs

frienship_pairs = user_friends.flatMap(map_frienships)
# frienship_pairs.take(5)

In [5]:
recommendation_count = frienship_pairs.reduceByKey(add) \
                                      .filter(lambda val: val[1] > 0)
# recommendation_count.take(5)

In [6]:
def map_to_user_recommendation(pair):
    users, mutual_count = pair[0], pair[1]
    u1, u2 = users[0], users[1]
    return (int(u1), (u2, mutual_count))

def top_recommendations(user_recs):
    return sorted(user_recs, key=lambda rec: (-rec[1], rec[0]))[:10]

user_recommendations = recommendation_count.map(map_to_user_recommendation) \
                                           .groupByKey() \
                                           .mapValues(top_recommendations) \
                                           .sortByKey()
# user_recommendations.take(5)

In [7]:
def parse_recommendation(user_recs):
    user, recs = user_recs[0], user_recs[1]
    rec_list = [str(rec[0]) for rec in recs]
    return str(user) + '\t' + ', '.join(rec_list)

result = user_recommendations.map(parse_recommendation).collect()
# result[:5]

In [8]:
with open("result.txt", "w") as outfile:
    outfile.write("\n".join(result))

In [9]:
sc.stop()