In [2]:
%load_ext nb_black

<IPython.core.display.Javascript object>

# Work with a Minimal Example

## Visualize with Pyvis

In [103]:
import pandas as pd
import numpy as np

<IPython.core.display.Javascript object>

In [161]:
network = [
    ("Ben", "Khuyen"),
    ("Ben", "Thinh"),
    ("Ben", "Michael"),
    ("Ben", "Lauren"),
    ("Ben", "Josh"),
    ("Lauren", "Khuyen"),
    ("Thinh", "Michael"),
    ("Khuyen", "Josh"),
]
friends = pd.DataFrame(network, columns=["person1", "person2"])

<IPython.core.display.Javascript object>

In [162]:
friends

Unnamed: 0,person1,person2
0,Ben,Khuyen
1,Ben,Thinh
2,Ben,Michael
3,Ben,Lauren
4,Ben,Josh
5,Lauren,Khuyen
6,Thinh,Michael
7,Khuyen,Josh


<IPython.core.display.Javascript object>

In [130]:
from pyvis.network import Network

net = Network(notebook=True)
people = list(set(friends.person1).union(set(friends.person2)))
net.add_nodes(people)
net.show("minimal_example.html")

<IPython.core.display.Javascript object>

In [131]:
net.add_edges(friends.values.tolist())
net.show("minimal_example_with_edges.html")

<IPython.core.display.Javascript object>

## Observe Friendship Paradox

In [132]:
def get_friends(data: pd.DataFrame, person_id):
    """Get friends of a person with specified id"""
    return (
        data[data["person1"] == person_id]["person2"].values.tolist()
        + data[data["person2"] == person_id]["person1"].values.tolist()
    )

<IPython.core.display.Javascript object>

In [425]:
get_friends(friends, "Lauren")

['Khuyen', 'Ben']

<IPython.core.display.Javascript object>

In [115]:
def get_num_friends(data: pd.DataFrame, person_id: int):
    """Get number of friends of a person with specified id"""
    return len(get_friends(data, person_id))

<IPython.core.display.Javascript object>

In [428]:
get_num_friends(friends, "Lauren")

2

<IPython.core.display.Javascript object>

In [461]:
def get_num_friends_map(data: pd.DataFrame):
    """Get a dictionary of people and their number of friends"""

    all_people = list(set(data["person1"]).union(set(data["person2"])))
    return {name: get_num_friends(friends, name) for name in all_people}

<IPython.core.display.Javascript object>

In [457]:
def get_num_friends_of_a_person_friends(
    data: pd.DataFrame, person_id, num_friends_map: dict
):
    """""Get number of friends of a person's friends"""
    friends = get_friends(data, person_id)
    return [num_friends_map[friend_id] for friend_id in friends]

<IPython.core.display.Javascript object>

In [462]:
num_friends_map = get_num_friends_map(friends)
get_num_friends_of_a_person_friends(friends, "Lauren", num_friends_map)

[3, 5]

<IPython.core.display.Javascript object>

In [450]:
def get_average_friends_of_a_person_friends(data: pd.DataFrame, person_id):
    """Get the average number of friends a person's friends have"""
    
    num_friends_map = get_num_friends_map(friends)
    num_friends_of_friends = get_num_friends_of_a_person_friends(
        data, person_id, num_friends_map
    )
    return np.mean(num_friends_of_friends)

<IPython.core.display.Javascript object>

In [463]:
get_average_friends_of_a_person_friends(friends, "Lauren")

4.0

<IPython.core.display.Javascript object>

In [452]:
def get_friends_df(data: pd.DataFrame):
    all_people = list(set(data["person1"]).union(set(data["person2"])))
    num_friends = [
        {
            "person_id": person_id,
            "num_friends": get_num_friends(data, person_id),
            "avg_friends_of_friends": round(
                get_average_friends_of_a_person_friends(data, person_id), 2
            ),
        }
        for person_id in all_people
    ]
    return pd.DataFrame(num_friends)

<IPython.core.display.Javascript object>

In [453]:
num_friends_sample = get_friends_df(friends)

<IPython.core.display.Javascript object>

In [454]:
num_friends_sample = num_friends_sample.assign(
    friends_have_more_friends=lambda df_: df_.avg_friends_of_friends > df_.num_friends
)
num_friends_sample

Unnamed: 0,person_id,num_friends,avg_friends_of_friends,friends_have_more_friends
0,Ben,5,2.2,False
1,Lauren,2,4.0,True
2,Michael,2,3.5,True
3,Khuyen,3,3.0,False
4,Josh,2,4.0,True
5,Thinh,2,3.5,True


<IPython.core.display.Javascript object>

In [120]:
num_friends_sample.friends_have_more_friends.sum() / len(num_friends_sample)

0.8333333333333334

<IPython.core.display.Javascript object>

In [121]:
get_num_friends_of_a_person_friends(friends, "Khuyen")

[2, 5, 3]

<IPython.core.display.Javascript object>

In [122]:
get_num_friends_of_a_person_friends(friends, "Josh")

[5, 3]

<IPython.core.display.Javascript object>

In [125]:
get_num_friends_of_a_person_friends(friends, "Michael")

[5, 3]

<IPython.core.display.Javascript object>

# Facebook Social Network

## Observe the Friendship Paradox

In [270]:
data = pd.read_csv("facebook_combined.txt", sep=" ", header=None)
data.columns = ["person1", "person2"]

<IPython.core.display.Javascript object>

In [464]:
data

Unnamed: 0,person1,person2
0,0,1
1,0,2
2,0,3
3,0,4
4,0,5
...,...,...
88229,4026,4030
88230,4027,4031
88231,4027,4032
88232,4027,4038


<IPython.core.display.Javascript object>

In [272]:
num_friends = get_friends_df(data)

<IPython.core.display.Javascript object>

In [273]:
num_friends = num_friends.assign(
    friends_have_more_friends=lambda df_: df_.avg_friends_of_friends > df_.num_friends
)
num_friends

Unnamed: 0,person_id,num_friends,avg_friends_of_friends,friends_have_more_friends
0,0,347,18.96,False
1,1,17,48.24,True
2,2,10,49.90,True
3,3,17,59.76,True
4,4,10,42.60,True
...,...,...,...,...
4034,4034,2,38.50,True
4035,4035,1,59.00,True
4036,4036,2,31.50,True
4037,4037,4,23.25,True


<IPython.core.display.Javascript object>

In [274]:
num_friends.friends_have_more_friends.sum() / len(num_friends)

0.874721465709334

<IPython.core.display.Javascript object>

## Visualize with Pyvis

In [406]:
net = Network("1000px", "1000px")

all_people = list(map(str, num_friends.person_id.values.tolist()))
net.add_nodes(all_people)
net.show("all_people.html")

<IPython.core.display.Javascript object>

Opening in existing browser session.


In [400]:
influencers = num_friends[
    num_friends["friends_have_more_friends"] == False
].person_id.tolist()

<IPython.core.display.Javascript object>

In [407]:
net.nodes = [
    {"id": node["id"], "label": node["id"], "shape": "dot", "color": "#eb4034"}
    if node["id"] in influencers
    else node
    for node in net.nodes
]

<IPython.core.display.Javascript object>

In [398]:
edges = data[["person1", "person2"]].values.tolist()

<IPython.core.display.Javascript object>

In [410]:
net.add_edges(edges)

<IPython.core.display.Javascript object>

In [411]:
net.show("all_people_with_edges.html")

<IPython.core.display.Javascript object>

Opening in existing browser session.
