In [1]:
using LightGraphs, GraphPlot
using Compose
using DataArrays, DataFrames

In [2]:
push!(LOAD_PATH, readchomp(`pwd`))
using NetSci01

# Datasets

In [3]:
# roads_europe = readnetwork("roads-europe.txt")
# save("roads-europe.lg", roads_europe)

In [4]:
roads_texas = readnetwork("roads-texas.txt"; fromzero = true)

{1393383, 1921660} undirected graph

In [5]:
facebook = readnetwork("facebook.txt")

{63731, 817035} undirected graph

## Basic Analysis

In [6]:
function graphsummary(g::Graph, show::Bool = false)
    Δs = triangles(g)
    comps = connected_components(g)
    println("Vertices: ", nv(g), ", edges: ", ne(g))
    println("Mean triangles: ", mean(Δs), ", total: ", div(sum(Δs), 3))
    println("Mean degree: ", mean(degree(g)))
    println("Density: ", density(g))
    println("Components: ", length(comps), ", mean size: ", mean(map(length, comps)))
    println("Global clustering coefficient: ", global_clustering_coefficient(g))
end

graphsummary (generic function with 2 methods)

In [7]:
# graphsummary(roads_europe)

In [8]:
graphsummary(roads_texas)

Vertices: 1393383, edges: 1921660
Mean triangles: 0.17841971661775693, total: 82869
Mean degree: 2.758265315422967
Density: 1.979547113012058e-6
Components: 13890, mean size: 100.31555075593953
Global clustering coefficient: 0.060224199223599666


In [9]:
graphsummary(facebook)

Vertices: 63731, edges: 817035
Mean triangles: 164.78049928606174, total: 3500542
Mean degree: 25.640112347209364
Density: 0.000402324060053497
Components: 144, mean size: 442.5763888888889
Global clustering coefficient: 0.14770813565777752


# Sampling 

In [10]:
texas_rw = gplot(samplenetwork(roads_texas, 200, :rw))
#draw(PDF("texas_rw.pdf", 10cm, 10cm), texas_rw)
display(texas_rw)

In [11]:
texas_ff = gplot(samplenetwork(roads_texas, 200, :ff))
#draw(PDF("texas_ff.pdf", 10cm, 10cm), texas_ff)
display(texas_ff)

In [12]:
facebook_rw = gplot(samplenetwork(facebook, 200, :rw))
#draw(PDF("facebook_rw.pdf", 10cm, 10cm), facebook_rw)
display(facebook_rw)

In [13]:
facebook_ff = gplot(samplenetwork(facebook, 200, :ff))
#draw(PDF("facebook_ff.pdf", 10cm, 10cm), facebook_ff)
display(facebook_ff)

## Measures from Sampling


In [17]:
function runstats(dataset::Graph, method::Symbol, n_samples::Int, sample_fraction::AbstractFloat)
    @assert 0.0 <= sample_fraction <= 1.0
    
    sample_size = round(Int, nv(dataset) * sample_fraction)
    samples = [samplenetwork(dataset, sample_size, method) for i in 1:n_samples]
    
    result = DataFrame()
    result[:edges] = map(ne, samples)
    Δss = map(triangles, samples)
    result[:triangles] = map(Δs -> div(sum(Δs), 3), Δss)
    result[:mean_triangles] = map(mean, Δss)
    result[:density] = map(density, samples)
    result[:mean_degree] = map(g -> mean(degree(g)), samples)
    comps = map(connected_components, samples)
    result[:components] = map(length, comps)
    result[:mean_component_size] = map(cs -> mean(map(length, cs)), comps)
    result[:global_clustering_coefficient] = map(global_clustering_coefficient, samples)
    
    return result
end

runstats (generic function with 2 methods)

In [18]:
n_samples = 100
sample_fraction = 0.1

results = Dict()

@sync for d in [:roads_texas, :facebook], m in [:rw, :ff]
    @async @eval results[$(Meta.quot(Symbol(d, "_", m)))] = 
    runstats($d, $(Meta.quot(m)), n_samples, sample_fraction)
end

for k in keys(results)
    writetable("sampled/$k.csv", results[k])
end