# PageRank calculation

In [None]:
import ipytest
import pytest
from typing import Any, Dict, List, Set, Tuple

ipytest.autoconfig()

You're given a web graph in form of an edge list. Each edge is represented as a `(from_node, to_node)` tuple.
(We assume that there is at most one link between any pair of nodes and that the input is correct.)

## Input 1

<img src="images/pagerank1.png" width="200">

In [None]:
WEB_GRAPH_1 = [("A", "B"), ("A", "C"), ("B", "C"), ("C", "A")]

## Input 2

<img src="images/pagerank2.png" width="200">

Mind that this web graph contains rank sinks, i.e., nodes that have only incoming edges but no outgoing ones. You'll need to deal with those by adding an incoming link from all nodes (including the very node itself).

In [None]:
WEB_GRAPH_2 = [(1, 2), (1, 3), (3, 1), (3, 2), (3, 5), (4, 5), (4, 6), (5, 4), (5, 6), (6, 4)]

## Utilities

In [None]:
def get_all_nodes(web_graph: List[Tuple[Any, Any]]) -> Set[Any]:
    """Returns a list of nodes given a web graph.
    
    Params:
        web_graph: List of edges.

    Returns:
        Set of nodes.
    """
    nodes = set()
    for (from_node, to_node) in web_graph:
        nodes.add(from_node)
        nodes.add(to_node)
    
    return nodes

**TODO** Complete this method.

In [None]:
def get_outlinks_num(web_graph: List[Tuple[Any, Any]]) -> Dict[Any, int]:
    """Computes the number of outgoing links for each node in a web graph.
    
    Param:
        web_graph: List of edges.

    Returns:
        Dict with nodes as keys and the number of outgoing nodes as values.
    """
    outlinks = {node: 0 for node in get_all_nodes(web_graph)}
    # TODO: calculate outlinks count for each node 
    return outlinks

## PageRank calculation

The pagerank of a given node $a$ is computed using:

$$PR(a) = \frac{q}{T} + (1-q) \sum_{i=1}^n \frac{PR(p_i)}{L(p_i)}$$

where 
  - $q$ is the probability of jumping to a random page
  - $T$ is the total number of pages (nodes) in the Web graph
  - $p_1\dots p_n$ are pages that **point to** page $a$
  - $PR(p_i)$ is the PageRank value of page $p_i$
  - $L(p_i)$ is the number of outgoing links of page $p_i$

**TODO** Complete this method.

In [None]:
def pagerank(web_graph: List[Tuple[Any, Any]], q: float = 0.15, iterations: int = 3) -> Dict[Any, float]:
    """Computes PageRank for all nodes in a web graph.
    
    Params:
        web_graph: List of edges.
        q: Random jump probability.
        iterations: Number of iterations.
        
    Returns:
        Dict with node names as keys and PageRank scores as values.    
    """
    nodes = get_all_nodes(web_graph)
    # Calculate the number of outgoing links of each page.
    outlinks_num = get_outlinks_num(web_graph)
    # Collect all inlinks of a page for more efficient PageRank computation.
    inlinks = {node: [] for node in nodes}
    for (from_node, to_node) in web_graph:
        inlinks[to_node].append(from_node)
    
    # TODO: Identify and deal with rank sinks.
    
    # TODO: Initialize pagerank values.
    pr = {node: 0 for node in nodes}
    
    # Calculate pagerank scores iteratively.
    for i in range(iterations):
        # TODO: Update pr values.
        pass
    
    return pr

Tests.

In [None]:
%%run_pytest[clean]

@pytest.mark.parametrize("web_graph,q,iterations,correct_values", [
    (WEB_GRAPH_1, 0.5, 0, {"A": 1/3, "B": 1/3, "C": 1/3}),
    (WEB_GRAPH_1, 0.5, 1, {"A": 0.3333, "B": 0.25, "C": 0.4166}),
    (WEB_GRAPH_1, 0.5, 2, {"A": 0.375, "B": 0.25, "C": 0.375}),
    (WEB_GRAPH_1, 0.5, 3, {"A": 0.3541, "B": 0.2604, "C": 0.3854}),
    (WEB_GRAPH_2, 0.15, 0, {1: 1/6, 2: 1/6, 3: 1/6, 4: 1/6, 5: 1/6, 6: 1/6}),
    (WEB_GRAPH_2, 0.15, 1, {1: 0.0958, 2: 0.1666, 3: 0.1194, 4: 0.2611, 5: 0.1666, 6: 0.1902}),
    (WEB_GRAPH_2, 0.15, 2, {1: 0.0824, 2: 0.1231, 3: 0.0893, 4: 0.2811, 5: 0.1934, 6: 0.2304}),
])
def test_pagerank(web_graph, q, iterations, correct_values):    
    assert pagerank(web_graph, q=q, iterations=iterations) == pytest.approx(correct_values, rel=1e-3)