In [None]:
library(latex2exp)
library(igraph)
library(ggplot2)
library(pracma)
library(scales)
library(data.table)
library(plyr)
library(readr)
library(stringr)

### Question 18

#### Set the path to the gplus files

In [None]:
path = "C:\\Users\\andersons1\\OneDrive - Anglo American\\Desktop\\ECE 232\\ECENGR232E-80\\ECENGR232E-80\\project_2\\gplus\\"

#### Get list of .circles files and remove one empty file

In [None]:
circle_files <- list.files(path=path, pattern="*.circles", full.names=TRUE, recursive=FALSE)
circle_files <- circle_files[!circle_files %in% "C:\\Users\\andersons1\\OneDrive - Anglo American\\Desktop\\ECE 232\\ECENGR232E-80\\ECENGR232E-80\\project_2\\gplus\\111278293763545982455.circles"]

#### Collect each .circle file as a list of lists

In [None]:
circles <- lapply(circle_files, function(x) {
  
  #read the .circle file
  ncol <- max(count.fields(x, sep="\t"))
  d <- read.table(
    x, 
    header = FALSE, 
    sep = "\t", 
    fill = TRUE,
    as.is = TRUE,
    col.names= paste0('V', seq_len(ncol))
    )
  
  #reorganize d as a data.frame
  names <- t(d)[1,]
  d <- t(d[,-1])
  d <- data.frame(d)
  names(d) = names
  
  #append the ego node from the filename into the circle
  ego_node_id <- as.numeric(str_sub(x,-1*nchar("111278293763545982455.circles"),-9))
  egos <- rep(ego_node_id, times=ncol(d))
  d <- rbind(d,egos)
  rownames(d) <- c(1:nrow(d))
  d
  
})

#### Count ego nodes with .circles files that belong to more than n circles

In [None]:
n_circles <- 2
egos_over_two <- length(lengths(circles)[lengths(circles)>n_circles])

#### Count the number of circles

In [None]:
#convert circles into a list of vectors representing one circle each
circles <- unlist(circles,recursive=F)
print(paste("The number of circles is: ",length(circles)))

#### Count the number of circles each node belongs to

In [None]:
#remove NA values from circles
circles <- lapply(circles, function(x){x <- x[!is.na(x)]})

In [None]:
#determine the list of nodes that are contained in at least one circle
circle_nodes <- Reduce(union, circles)

In [None]:
#determine the number of circles that each node belongs to
circle_IDs <- Reduce(c, circles)
circle_freq <- lapply(circle_nodes, function(x){sum(circle_IDs == x)})
nodes_over_two <- length(circle_freq[circle_freq>2])
print(paste("The number of nodes belonging to more than 2 circles is:", nodes_over_two))


### Question 19

#### Load the desired .edges files

In [None]:
nodes <- c("109327480479767108490","115625564993990145546","101373961279443806744")
edge_files <- c(paste(path,nodes,".edges",sep=""))

#### Load the files and add in the edges for the ego node

In [None]:
#load edge list from file
edges <- lapply(edge_files, function(x){
    d <- read.table(x, header = FALSE, sep = " ", as.is = TRUE)
})
names(edges) <- nodes

#get a list of other nodes in the ego network of each node of interest
ego_nodes <- lapply(edges, function(x){unique(Reduce(c,x))})
names(ego_nodes) <- nodes


#add directed edges between the node of interest and the other nodes
new_edges <- lapply(nodes, function(x){
    n <- length(ego_nodes[[paste(x)]])
    d <- data.frame(
      V1 = c(rep(as.numeric(x),n)),
      V2 = ego_nodes[[paste(x)]]
    )
})
names(new_edges) <- nodes
for(i in 1:length(edges)){
  edges[[i]] <- rbind(edges[[i]],new_edges[[i]])
}


#### Generate personal networks from each edge list

In [None]:
g <- lapply(edges,function(x){
    d <- graph.data.frame(x, directed = TRUE)
})

In [None]:
for(i in 1:length(g)){
    plot(degree.distribution(g[[i]],mode="in"),
     col='blue',
     main= paste("v",i," in-degree distribution",sep=""),
     xlab="Node In-Degree",
     ylab="Degree Probability")

    dev.copy(png,paste("images\\question19_v",i,"_in.png",sep=""))
    dev.off()
    
    plot(degree.distribution(g[[i]],mode="out"),
     col='red',
     main= paste("v",i," out-degree distribution",sep=""),
     xlab="Node Out-Degree",
     ylab="Degree Probability")

    dev.copy(png,paste("images\\question19_v",i,"_out.png",sep=""))
    dev.off()
    
    plot(degree.distribution(g[[i]],mode="out"),
     col='red',
     main= paste("v",i," in-degree vs. out-degree distribution",sep=""),
     xlab="Node In/Out-Degree",
     ylab="Degree Probability",
     xlim=c(0,length(degree.distribution(g[[i]],mode="in"))))
    points(degree.distribution(g[[i]],mode="in"),
     col='blue')
    legend(0.2,0.2,legend=c("out-degree","in-degree"),col=c("red","blue"),bty="o",lty=1:2, cex=0.8)

    dev.copy(png,paste("images\\question19_v",i,"_vs.png",sep=""))
    dev.off()

}


### Question 20

In [None]:
#compute communities
clusters <- lapply(g,function(x){cluster_walktrap(x)})

#compute modularities
modularities <- lapply(clusters,function(x){modularity(x)})
print(modularities)

#plot community structure
for(i in 1:length(g)){
  filename <- paste("images\\question20_v",i,".png",sep="")  
  plot(
      clusters[[i]],
      g[[i]],
      layout=layout_with_kk(g[[i]], dim=2),
      target=filename,
      vertex.label=NA,
      vertex.size=2,
      edge.width = 0.1,
      edge.arrow.size = 0.1,
      margin=c(0,0,0,0),
      main=sprintf("Node v%d Modularity=%.4f",i,modularities[i])
      )
  dev.copy(png,filename,width = 800, height = 800)
  dev.off()
}


### Question 22

In [None]:
#determine the nodes with circle information for each network


#identify the circles in each network and calculate their population


#identify the communities in each network and calculate their population

#calculate circle entropy

#calculate community entropy

#calculate circle-community conditional entropy

#calculate community-circle conditional entropy

#calculate homogeneity

#calculate completeness