In [92]:
import math
import operator


In [93]:

"""
readDataInMem: Read data from provided file into a list (one record per line)
               Returns list
"""

def readDataInMem():
    cols=[]
    rows=[]
    with open("../data/MasterAdjList",'r') as fr:
        for line in fr:
            cols=line.split()
            if cols[-1]=='\\N':
                cols[-1]='0'
            cols=list(map(int,cols))
            rows.append(cols)
    return rows

In [94]:
userInfoDict={}
with open("../data/MasterUserFeatureMatrix.txt", 'r') as fp:
    for line in fp:
        cols=line.split()
        if cols[0] not in userInfoDict:
            userInfoDict[int(cols[0])]=int(cols[1]) if cols[1] != 'None' else 0

In [95]:

"""
buildPerUserFrndLst: Build dict of {userId:[friendList]} from list provided
                     Returns the built dict
"""

def buildPerUserFrndLst(rows):
    frndMap={}
    for r in rows:
        if r[0] in frndMap:
            prevList=frndMap[r[0]]
            newSet=set(prevList+r[1:])
            frndMap[r[0]]=list(newSet)
        else:
            frndMap[r[0]]=r[1:]           
            
    return frndMap

In [96]:
"""
recommendFrnd: 1. Read data from file containing friends
               2. Build per user friendList 
               3. Find most probable friend for given seed on the 
                  basis of max mutual friends
"""

def getCandidates(seed,frndMap):
    level1Frnds=[]
    level1Frnds=frndMap[seed] # Friend list for the provided seed (format: List of friend Ids)
    probableFrnd=0
    support=0
    probableFrndDict=getProbableFriendDict(level1Frnds,frndMap,seed)   # candidates for suggestions
    return probableFrndDict

 



"""
frndWithMaxMutualCount: Calculate the occurence of each Id found at level2 (i.e. friends of level1 friends)
                        Returns the userId with max count and count
"""

def getProbableFriendDict(level1Frnds,frndMap,seed):
    level2Frnds= set()
    freqCnt={}
    thisFrndFrnds=[]

    #iterate through level1 friends

    for i in level1Frnds:
        if i in frndMap.keys():        
            thisFrndFrnds=frndMap[i]
                                     #get friendlist of level1 friend
            for j in thisFrndFrnds:  #for each friend l2 of level1 friend l1,
                if j != seed:       # if l1 not one of level1 friend add t to set
                                     # and set frequency to 1. 
                    if j not in level1Frnds: 
                        if j in level2Frnds:
                            freqCnt[j]=freqCnt[j]+1
                        else:
                            level2Frnds.add(j)
                            freqCnt[j]=1
    return freqCnt



    
    

In [97]:
def buildMatrix():
    matrix=[]    
    for user in probableFrndDict:
        row=[]
        row.append(user)
        if user in userInfoDict:
            #age
            row.append(userInfoDict[user])
        else:
            row.append(0)
            
        row.append(probableFrndDict[user])
        matrix.append(row)
    return matrix

In [109]:
def cosineSimilarity(seedMatrix,matrix):
    score={}
    for r in matrix:
        sum=0
        eleSq=0
        seedSq=0
        for ele in range(1,len(r)):
            sum=sum+r[ele]*seedMatrix[ele]
            eleSq=eleSq+r[ele]*r[ele]
            seedSq=seedSq+seedMatrix[ele]*seedMatrix[ele]
        
        thisScore=float(sum/(math.sqrt(eleSq)+math.sqrt(seedSq)))
        score[r[0]]=thisScore
    sorted_score = sorted(score.items(), key=operator.itemgetter(1), reverse=True) 
    return sorted_score

In [110]:
def createSeedMatrix(seed,frndMap):
    seedMatrix=[]
    seedMatrix.append(seed)
    seedMatrix.append(userInfoDict[seed] if seed in userInfoDict else 0)
    seedMatrix.append(len(frndMap[seed]))
    return seedMatrix
    


In [111]:
rows=readDataInMem()
frndMap=buildPerUserFrndLst(rows)
for seed in frndMap:
    # seed=10153478616753407
    probableFrndDict=getCandidates(seed,frndMap)
    matrix=buildMatrix()
    seedMatrix=createSeedMatrix(seed,frndMap)
    score=cosineSimilarity(seedMatrix,matrix)
    if len(score):
        print("for "+str(seed)+" suggest user: "+str(score[0][0])+" with support "+str(score[0][1]))


for 1120299124684544 suggest user: 10203881668262331 with support 0.5
for 10202939550824451 suggest user: 1120299124684544 with support 0.5
for 10203819650923780 suggest user: 1198993888 with support 0.5
for 10152428102382341 suggest user: 1120299124684544 with support 0.5
for 10207648390994439 suggest user: 846314905418676 with support 12.237413163305991
for 754643311235080 suggest user: 10207648390994439 with support 12.48999602241717
for 10207029106439434 suggest user: 902025849828256 with support 0.5
for 10101768040479501 suggest user: 597232940418 with support 0.5
for 10203783090936847 suggest user: 758102710876995 with support 0.5
for 750078275061264 suggest user: 1713083637 with support 0.5
for 10100755017830162 suggest user: 597232940418 with support 0.5
for 10152834062440985 suggest user: 597232940418 with support 0.5
for 1017415450 suggest user: 10100428308394559 with support 0.9565217391304348
for 10152487516090756 suggest user: 597232940418 with support 0.5
for 669460260 su