In [1]:
import numpy as np
import pandas as pd

import networkx as nx
import community

%matplotlib inline

# Importing the data

The dataset used is the Huawei Social Network Dataset

<a>https://www.kaggle.com/andrewlucci/huawei-social-network-data</a>

"Huawei Facebook Communication Network is Directed and Labeled having 1000 nodes and 250315 edges"

In [2]:
facebook_network = pd.read_excel("D:\\Working_Directory\\github\\five families\\Other Analysis\\huawei-social-network-data\\Facebook_Data.xlsx")

# Creating the Network

Networkx is used to create a graphical database of the data provided and network analysis techniques can then us used over this database to obtain the results mentioned below

In [3]:
friends_index = dict([(i,x.replace('\xa0', '').strip()) for i,x in enumerate(facebook_network.columns)])
FG = nx.from_numpy_matrix(facebook_network.values)
FG = nx.relabel_nodes(FG, friends_index)

# Analyzing the Network

### 1) Checking who has most connections 
(Degree Centrality)

In [4]:
degree_centrality = nx.degree_centrality(FG)
sorted(degree_centrality.items(), key = lambda x: x[1], reverse = True)[:10]

[('Engkos Kosasih', 0.12712712712712712),
 ('Ernie', 0.12712712712712712),
 ('Zack', 0.12612612612612611),
 ('Fahad Rehman', 0.12612612612612611),
 ('Sylvia', 0.12512512512512514),
 ('Umtiti', 0.12512512512512514),
 ('Asghar', 0.12412412412412413),
 ('Abdullah Khan', 0.12412412412412413),
 ('Ahmed', 0.12312312312312312),
 ('Noor', 0.12312312312312312)]

### 2) Who can spread the words the fastest
(Closeness Centrality)

In [5]:
closeness_centrality = nx.closeness_centrality(FG)
sorted(closeness_centrality.items(), key = lambda x: x[1], reverse = True)[:10]

[('Engkos Kosasih', 0.5339390700160342),
 ('Ernie', 0.5339390700160342),
 ('Zack', 0.5336538461538461),
 ('Fahad Rehman', 0.5336538461538461),
 ('Sylvia', 0.5333689268553123),
 ('Umtiti', 0.5333689268553123),
 ('Asghar', 0.5330843116328708),
 ('Abdullah Khan', 0.5330843116328708),
 ('Ahmed', 0.5328),
 ('Noor', 0.5328)]

### 3) Who are links between different communities
(Betweenness Centrality)

In [6]:
betweenness_centrality = nx.betweenness_centrality(FG)
sorted(betweenness_centrality.items(), key = lambda x: x[1], reverse = True)[:10]

[('Ernie', 0.0014707100121539671),
 ('Engkos Kosasih', 0.001468302758588729),
 ('Sylvia', 0.0014362752642014785),
 ('Zack', 0.001424673955649196),
 ('Fahad Rehman', 0.0014158890999055739),
 ('Umtiti', 0.0014049614919470785),
 ('Noor', 0.0013690865209203418),
 ('Ahmed', 0.0013598774834562945),
 ('Asghar', 0.0013592802253661346),
 ('Abdullah Khan', 0.001357846062184436)]

### 4) Who is connected to other well connected people
(Eigenvector Centrality)

In [7]:
eigenvector_centrality = nx.eigenvector_centrality(FG)
sorted(eigenvector_centrality.items(), key = lambda x: x[1], reverse = True)[:10]

[('Zack', 0.03962809694363545),
 ('Engkos Kosasih', 0.03951208314544457),
 ('Fahad Rehman', 0.03950195657561689),
 ('Ernie', 0.03948123752745665),
 ('Abdullah Khan', 0.0392275311969346),
 ('Umtiti', 0.03916855664094581),
 ('Asghar', 0.03915401163608041),
 ('Subhaan', 0.03887034598012996),
 ('Sylvia', 0.03872472666754339),
 ('Carlo', 0.03871228327215511)]

### 5) Generating friend suggestions through mutual friends
(Jaccard Coefficient)

In [8]:
jaccard_coefficient = nx.jaccard_coefficient(FG)

jaccard_coefficient_dict = {}

for u,v,p in jaccard_coefficient:
    jaccard_coefficient_dict[(u,v)] = p
    
sorted(jaccard_coefficient_dict.items(), key = lambda x: x[1], reverse = True)[:10]

[(('Arisha', 'Ajengan Masduki'), 0.13903743315508021),
 (('Zachariah', 'Elsa'), 0.13636363636363635),
 (('Haseeb Ali', 'Musarrat'), 0.13450292397660818),
 (('Aneel', 'Rawail'), 0.1323529411764706),
 (('Muzayin Abdul Wahab', 'William'), 0.13106796116504854),
 (('Daisy', 'Keiron'), 0.13043478260869565),
 (('Trump', 'Muhammad Irfan'), 0.13020833333333334),
 (('Neha Ch', 'Robbie'), 0.12994350282485875),
 (('Zulqurnain', 'Shayan'), 0.12962962962962962),
 (('Hashim Jutt', 'Bakhtawar'), 0.12953367875647667)]

### 6) More socially active people might want to talk to or get to know each other
(Preferential Attachment)

In [9]:
pref_ps = nx.preferential_attachment(FG)

pref_coefficient_dict = {}

for u,v,p in pref_ps:
    pref_coefficient_dict[(u,v)] = p
    
sorted(pref_coefficient_dict.items(), key = lambda x: x[1], reverse = True)[:10]

[(('Engkos Kosasih', 'Ernie'), 16129),
 (('Engkos Kosasih', 'Fahad Rehman'), 16002),
 (('Fahad Rehman', 'Ernie'), 16002),
 (('Ernie', 'Zack'), 16002),
 (('Umtiti', 'Ernie'), 15875),
 (('Engkos Kosasih', 'Sylvia'), 15875),
 (('Ernie', 'Sylvia'), 15875),
 (('Umtiti', 'Fahad Rehman'), 15750),
 (('Fahad Rehman', 'Sylvia'), 15750),
 (('Zack', 'Sylvia'), 15750)]