In [1]:
using Pkg
pkg"activate .."

# Sparse Math

See https://github.com/JuliaLang/julia/issues/29535

In [39]:
using SparseArrays

column_vecmul(A, x) = column_vecmul(A, @view(X[:]))

function column_vecmul!(A::AbstractMatrix, x::AbstractArray)
    A.=A.*x
end
    
function column_vecmul!(A::SparseMatrixCSC, x::AbstractVector{T}) where T
    size(A,2)==length(x) || DimensionMismatch()
    cols, rows, vals = findnz(A);
    
    x_ii=1
    x_val = @inbounds x[x_ii]
    rows_to_nan = Int64[]
    for A_ii in 1:length(rows)
        col= @inbounds cols[A_ii]
        row= @inbounds rows[A_ii]
        if row > x_ii #Note that our result is row sorted
            x_ii+=1
            x_val = @inbounds x[x_ii]
            if !isfinite(x_val) 
                # Got to deal with this later, row will become dense.
                push!(rows_to_nan, row)
            end
        end
        @inbounds vals[A_ii]*=x_val
    end

    # Go back and NaN any rows we have to
    for row in rows_to_nan
        for col in SparseArrays.nonzeroinds(@view(A[:,row]))
            # don't do the ones we already hit as they may be Inf (or NaN)
            @inbounds A[row,col] = T(NaN)
        end
    end
    
    A
end

column_vecmul! (generic function with 3 methods)

# Graph Helpers
These are helper functions for working with graphs

In [27]:
using LightGraphs
using SimpleWeightedGraphs
using GraphPlot

## Converters

In [10]:
Base.convert(::Type{SimpleGraph}, graph::AbstractSimpleWeightedGraph) = SimpleGraph(adjacency_matrix(graph))

## Weight Normalisers


In [38]:
prob_norm(W) = W./sum(W,dims=2)

prob_norm!(W) = column_vecmul!(W, 1 ./ sum(W,dims=2))

prob_norm! (generic function with 1 method)

### Plotting

gplot is missing WeightedGraph overloads

In [31]:
function GraphPlot.gplot(graph::AbstractSimpleWeightedGraph, args...; kwargs...)
    gplot(
        convert(SimpleGraph, graph),
        args...;
        kwargs...
    )
end

#### Goodplot
Plotting with sensible defaults

 - `node_scale`: set to control the overall size of the nodes. default 1.0
 - `node_scaling`: set to control size varience of the nodes based on degreee. default 3.0, lower means more variance

In [34]:
function goodplot(graph, args...; node_scale=1, node_scaling=3, kwargs...)
   gplot(graph, args...; kwargs...,
   nodesize = log.(degree(graph).+node_scaling),
   NODESIZE= node_scale * 0.15*(3.0/sqrt(length(vertices(graph))))
) 
end

goodplot (generic function with 1 method)

# Clustering Helpers

In [12]:
using Clustering



#### Show Clusters
 - `showfn`: function `id->texttoprint`
 - `res`: ClusteringResults

In [17]:
show_cluster(res) = show_cluster(string,res)
function show_cluster(showfn, res)
    for cluster in unique(assignments(res))
        for id in 1:nlabel(characters)
            if assignments(res)[id]==cluster
                println(showfn(id)...)
            end
        end
        println("-------------------------------")
    end
end

show_cluster (generic function with 2 methods)

#### Color Clusters
Returns a good color set for clusters,
1 color per element

In [25]:
using Colors
function color_clusters(clustering_res::ClusteringResult)
    colors = distinguishable_colors(nclusters(clustering_res), colorant"blue")
    [colors[assign] for assign in assignments(clustering_res)]
end

function color_clusters(assignments::AbstractVector)
    nclusters = length(unique(assignments))
    colors = distinguishable_colors(nclusters, colorant"blue")
    [colors[assign] for assign in assignments]
end

color_clusters (generic function with 3 methods)