In [49]:
library(latex2exp)
library(igraph)
library(ggplot2)
library(pracma)
library(scales)
library(data.table)
library(plyr)
library(readr)
library(stringr)

### Question 18

#### Set the path to the gplus files

In [31]:
path = "C:\\Users\\andersons1\\OneDrive - Anglo American\\Desktop\\ECE 232\\ECENGR232E-80\\ECENGR232E-80\\project_2\\gplus\\"

#### Get list of .circles files and remove one empty file

In [31]:
circle_files <- list.files(path=path, pattern="*.circles", full.names=TRUE, recursive=FALSE)
circle_files <- circle_files[!circle_files %in% "C:\\Users\\andersons1\\OneDrive - Anglo American\\Desktop\\ECE 232\\ECENGR232E-80\\ECENGR232E-80\\project_2\\gplus\\111278293763545982455.circles"]

#### Collect each .circle file as a list of lists

In [33]:
circles <- lapply(circle_files, function(x) {
  
  #read the .circle file
  ncol <- max(count.fields(x, sep="\t"))
  d <- read.table(
    x, 
    header = FALSE, 
    sep = "\t", 
    fill = TRUE,
    as.is = TRUE,
    col.names= paste0('V', seq_len(ncol))
    )
  
  #reorganize d as a data.frame
  names <- t(d)[1,]
  d <- t(d[,-1])
  d <- data.frame(d)
  names(d) = names
  
  #append the ego node from the filename into the circle
  ego_node_id <- as.numeric(str_sub(x,-1*nchar("111278293763545982455.circles"),-9))
  egos <- rep(ego_node_id, times=ncol(d))
  d <- rbind(d,egos)
  rownames(d) <- c(1:nrow(d))
  d
  
})

#### Count ego nodes with .circles files that belong to more than n circles

In [41]:
n_circles <- 2
egos_over_two <- length(lengths(circles)[lengths(circles)>n_circles])

#### Count the number of circles

In [42]:
#convert circles into a list of vectors representing one circle each
circles <- unlist(circles,recursive=F)
print(paste("The number of circles is: ",length(circles)))

[1] "The number of circles is:  63531"


#### Count the number of circles each node belongs to

In [43]:
#remove NA values from circles
circles <- lapply(circles, function(x){x <- x[!is.na(x)]})

In [44]:
#determine the list of nodes that are contained in at least one circle
circle_nodes <- Reduce(union, circles)

In [45]:
#determine the number of circles that each node belongs to
circle_IDs <- Reduce(c, circles)
circle_freq <- lapply(circle_nodes, function(x){sum(circle_IDs == x)})
nodes_over_two <- length(circle_freq[circle_freq>2])
print(paste("The number of nodes belonging to more than 2 circles is:", nodes_over_two))


[1] "The number of nodes belonging to more than 2 circles is: 6880"


### Question 19

#### Load the desired .edges files

In [None]:
nodes <- c("109327480479767108490","115625564993990145546","101373961279443806744")
edge_files <- c(paste(path,nodes,".edges",sep=""))
print(edge_files)

#### Load the files and add in the edges for the ego node

In [None]:
edges <- lapply(edge_files, function(x){
    d <- read.table(x, header = FALSE, sep = "\t", as.is = TRUE)
})
head(edges)

In [None]:
g_gp = graph_from_edgelist(g_gpe, directed = FALSE)

In [None]:
node_ids <- c(109327480479767108490,115625564993990145546,101373961279443806744)

g1 = make_ego_graph(g_gp, 1, nodes=V(g_gp)[node_ids[1]])
g2 = make_ego_graph(g_gp, 1, nodes=V(g_gp)[node_ids[2]])
g3 = make_ego_graph(g_gp, 1, nodes=V(g_gp)[node_ids[3]])

plot(degree.distribution(g1,mode="in"),
     col='red',
     main='v1 in-degree distribution',
     xlab="Node In-Degree",
     ylab="Degree Probability")

dev.copy(png,'images\\question19_v1_in.png')
dev.off()

plot(degree.distribution(g2,mode="in"),
     col='red',
     main='v2 in-degree distribution',
     xlab="Node In-Degree",
     ylab="Degree Probability")

dev.copy(png,'images\\question19_v2_in.png')
dev.off()

plot(degree.distribution(g3,mode="in"),
     col='red',
     main='v3 in-degree distribution',
     xlab="Node In-Degree",
     ylab="Degree Probability")

dev.copy(png,'images\\question19_v3_in.png')
dev.off()

plot(degree.distribution(g1,mode="out"),
     col='red',
     main='v1 out-degree distribution',
     xlab="Node In-Degree",
     ylab="Degree Probability")

dev.copy(png,'images\\question19_v1_out.png')
dev.off()

plot(degree.distribution(g2,mode="out"),
     col='red',
     main='v2 out-degree distribution',
     xlab="Node In-Degree",
     ylab="Degree Probability")

dev.copy(png,'images\\question19_v2_out.png')
dev.off()

plot(degree.distribution(g3,mode="out"),
     col='red',
     main='v3 out-degree distribution',
     xlab="Node In-Degree",
     ylab="Degree Probability")

dev.copy(png,'images\\question19_v3_out.png')
dev.off()

### Question 20

In [None]:
#compute communities
c1 = cluster_walktrap(g1)
c2 = cluster_walktrap(g2)
c3 = cluster_walktrap(g3)

#compute modularities
m1 = modularity(c1)
m2 = modularity(c2)
m3 = modularity(c3)

#plot community structure
plot(c1,g1,layout=layout_with_kk(g1))
plot(c2,g2,layout=layout_with_kk(g2))
plot(c3,g3,layout=layout_with_kk(g3))

### Question 22