# Building adjaceny list of the graph for data from Univ. of Washington

## Import libraries

In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df = pd.read_csv("../data/facebook-links.txt.anon", sep="\s+", header=None)
df.head()

Unnamed: 0,0,1,2
0,1,2,\N
1,1,3,\N
2,1,4,\N
3,1,5,\N
4,1,6,\N


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1545686 entries, 0 to 1545685
Data columns (total 3 columns):
0    1545686 non-null int64
1    1545686 non-null int64
2    1545686 non-null object
dtypes: int64(2), object(1)
memory usage: 35.4+ MB


In [4]:
def convertToAdjacenyList(graphDf):
    """
    Function to convert given graph dataFrame in below format : 
        1 2
        1 3
        1 4
        2 10
    into format like below and return the same:
        1 2 3 4
        2 10
    """
    with open("../data/adjList_UW.txt", 'w') as fileHandler:
        for user in df[0].unique():
            userFriendList = df[df[0]==user][1].values
            fileHandler.write(str(user)+" ")
            for friend in userFriendList:
                fileHandler.write(str(friend)+" ")
            fileHandler.write("\n")

# uncomment the below line to reconstruct the graph.
# graph already constructed

# convertToAdjacenyList(df)

## Reading the newly constructed Adjlist into memory using NetworkX library

In [5]:
G= nx.read_adjlist("../data/adjList_UW.txt")

In [6]:
"""
readDataInMem: Read data from provided file into a list (one record per line)
               Returns list
"""




def readDataInMem():
    cols=[]
    rows=[]
    with open("../data/adjList_UW.txt",'r') as fr:
        for line in fr:
            cols=line.split()
            if cols[-1]=='\\N':
                cols[-1]='0'
            cols=list(map(int,cols))
            rows.append(cols)
    return rows

In [7]:
rows=readDataInMem()

In [8]:

"""
buildPerUserFrndLst: Build dict of {userId:[friendList]} from list provided
                     Returns the built dict
"""

def buildPerUserFrndLst(rows):
    frndMap={}
    for r in rows:
        frndMap[r[0]]=r[1:]
            
    return frndMap

In [9]:
frndMap=buildPerUserFrndLst(rows)

In [10]:
"""
recommendFrnd: 1. Read data from file containing friends
               2. Build per user friendList 
               3. Find most probable friend for given seed on the 
                  basis of max mutual friends
"""

def recommendFrnd(seed,frndMap):

    #rows=readDataInMem() #list of lines read from file (expected format: [id1,id2,timestamp(0 if no timestamp)])
    #frndMap=buildPerUserFrndLst(rows) # dict with userId and friendList (format {id:[friendList]})
    level1Frnds=frndMap[seed] # Friend list for the provided seed (format: List of friend Ids)
    probableFrnd,support=frndWithMaxMutualCount(level1Frnds,frndMap)   # probable friend Id and tot mutual friends

    #Display frined Id with max frequency or mutual friend
    print("Suggest user "+str(probableFrnd)+" to "+str(seed)+ " (common friends:"+str(support)+" )")

 



"""
frndWithMaxMutualCount: Calculate the occurence of each Id found at level2 (i.e. friends of level1 friends)
                        Returns the userId with max count and count
"""

def frndWithMaxMutualCount(level1Frnds,frndMap):
    level2Frnds= set()
    freqCnt={}
    thisFrndFrnds=[]

    #iterate through level1 friends

    for i in level1Frnds:
        if i in frndMap.keys():        
            thisFrndFrnds=frndMap[i]
                                     #get friendlist of level1 friend
            for j in thisFrndFrnds:  #for each friend l2 of level1 friend l1,
                if j != seed:       # if l1 not one of level1 friend add t to set
                                     # and set frequency to 1. 
                    if j not in level1Frnds: 
                        if j in level2Frnds:
                            freqCnt[j]=freqCnt[j]+1
                        else:
                            level2Frnds.add(j)
                            freqCnt[j]=1
    friendToSuggest=max(freqCnt, key=freqCnt.get)
    len(frndMap.values())
#     plt.hist(list(frndMap.keys()),list(len(frndMap.values())))
    return friendToSuggest,freqCnt[friendToSuggest]
                
    

In [11]:
seed=1 #userId of the user for which we need to find mutual friend

#print(str(frndMap))
recommendFrnd(seed,frndMap) #

Suggest user 2056 to 1 (common friends:11 )
