# Class Session 5 Exercise:
## Comparing asymptotic running time for testing two vertices for an edge

We will measure the running time for testing whether two vertices (i and j) have an edge between them, for three different data structures for representing the graph:
    adjacency matrix
    adjacency list
    adjacency forest (aka adjacency tree)
    
First, we import all the R packages that we will need for this exercise:

In [66]:
library(igraph)

We'll define a function that can make an "adjacency forest" (aka adjacency tree) from the adjacency list for a graph:

### BUGFIX from 2016.10.05:  you have to call "as.vector" on adj_list[[i]], and you have to return "myforest" at the end of the function

In [67]:
get_adj_tree <- function(adj_list) {
    n <- length(adj_list)
    myforest <- list()
    for (i in 1:n) {
        newenv <- new.env()
        for (j in as.vector(adj_list[[i]])) {
            newenv[[as.character(j)]] <- 1
        }
        myforest[[i]] <- newenv
    }
    myforest
}

Then, we will define functions for testing whether vertices i and j are neighbors, for each of four different graph data structures (adjacency matrix, adjacency list, edge list, and adjacency tree):

In [68]:
find_matrix <- function(gmat, i, j) {
    gmat[i, j] == 1
}

find_adj_list <- function(adj_list, i, j) {
    j %in% adj_list[[i]]
}

find_edge_list <- function(edge_list, i, j) {
    any((edge_list[,1] == i) & (edge_list[,2] == j)) | 
        any((edge_list[,2] == i) & (edge_list[,1] == j))
} # this function we won't use, since we know it is going to be super slow

find_adj_tree <- function(adj_tree, i, jstr) {
    ! is.null(adj_tree[[i]][[jstr]])
}


This is the simulation code; note that we now have two parameters, "n" and "k" (n is the number of vertices in the graph, and k is the average vertex degree.  We'll actually be keeping n fixed and varying k for this exercise.

In [79]:
do_sim <- function(n, k) {

    nrep <- 1
    nsubrep <- 1
    
    simdf <- do.call(rbind,
                     replicate(nrep, {
                         g <- sample_pa(n, out.seq=rep(k, n), directed=FALSE)
                         
                         g_matrix <- as.matrix(as_adjacency_matrix(g))
                         g_adj_list <- as_adj_list(g)
                         g_edge_list <- as_edgelist(g)
                         g_adj_tree <- get_adj_tree(g_adj_list)
                                                 
                         # this is for setting up the (admittedly weird) R way of doing a 
                         # double "for" loop (see "mapply" below)
                         allvals <- expand.grid(1:n, 1:n)
                         
                         # need this because "as.character" is kind of slow
                         jstrs <- as.character(1:n)
                         
                         time_mat <- system.time(
                             replicate(nsubrep, {
                                 mapply(function(i, j) {
                                            find_matrix(g_matrix, i, j)
                                        }, allvals$Var1, allvals$Var2)
                             })
                         )[1]
                         
                         time_adj_list <- system.time(
                             replicate(nsubrep, {
                                 mapply(function(i, j) {
                                            find_adj_list(g_adj_list, i, jstrs[j])
                                        }, allvals$Var1, allvals$Var2)
                             })
                         )[1]
                         
                         time_adjacency_forest <- system.time(
                             replicate(nsubrep, {
                                 mapply(function(i, j) {
                                     find_adj_tree(g_adj_tree, i, jstrs[j])
                                     }, allvals$Var1, allvals$Var2)
                             })                           
                         )[1]
                         
                         rowdf <- data.frame(matrix=time_mat,
                                             adjlist=time_adj_list,
                                             adjforest=time_adjacency_forest)
                         

                         rowdf
                     }, simplify=FALSE)
                     )
    
    # average over replicates
    simres <- apply(simdf, 2, mean)
    
    # get results in microseconds, on a per-vertex-pair basis
    1000000*simres/(n*(n-1)/2)
}


Call the do_sim function for three different values of "k" (the average vertex degree), and convert the resulting list (of single-row data frames) to a three-row data frame:

In [86]:
sim_data_df <- do.call(rbind, lapply(c(1, 5, 10, 100), 
                                     function(k) {do_sim(1000, k)}))

In [87]:
sim_data_df

matrix,adjlist,adjforest
11.707708,20.54855,9.641642
9.001001,26.37037,9.545546
9.60961,32.36036,12.324324
12.668669,211.56356,10.978979
