## Category network
Create a network visualization where nodes are categories and links exist between two nodes if one or more authors posted stories to both categories. Uses d3 to draw the network (I wanted to experiment using d3 in a notebook).  

First, we count up the pairs and look at which category pairs show up the most in the data. Then we load d3 and use a force network to visualize the connections between categories. 

In [25]:
import pandas as pd
import itertools
from collections import defaultdict

# Load in data
df = pd.read_csv('../data/literotica_urls.tsv', sep='\t')

# Count up the category pairs
pair_counts = defaultdict(int)
for author, group in df.groupby('author'):
    pairs = itertools.combinations(list(set(list(group.category))), 2)
    for pair in pairs:
        pair_counts[pair] += 1

print(len(pair_counts), 'pairs of cross-category activity')

  interactivity=interactivity, compiler=compiler, result=result)


810 pairs of cross-category activity


In [26]:
# Print out top 40 categories
sorted_pair_counts = sorted(pair_counts.items(), key=lambda d: d[1], reverse=True)
for pair, count in sorted_pair_counts[:40]:
    print(pair[0], '---', pair[1], '\t', count)

group-sex-stories --- erotic-couplings 	 322
first-time-sex-stories --- erotic-couplings 	 253
mature-sex --- erotic-couplings 	 207
non-human-stories --- science-fiction-fantasy 	 204
non-consent-stories --- group-sex-stories 	 194
mature-sex --- first-time-sex-stories 	 185
non-consent-stories --- erotic-couplings 	 184
exhibitionist-voyeur --- erotic-couplings 	 180
non-consent-stories --- bdsm-stories 	 179
non-consent-stories --- exhibitionist-voyeur 	 179
first-time-sex-stories --- taboo-sex-stories 	 177
non-consent-stories --- first-time-sex-stories 	 176
anal-sex-stories --- erotic-couplings 	 173
science-fiction-fantasy --- erotic-horror 	 171
mature-sex --- taboo-sex-stories 	 169
masturbation-stories --- erotic-couplings 	 168
anal-sex-stories --- first-time-sex-stories 	 165
masturbation-stories --- exhibitionist-voyeur 	 162
non-human-stories --- erotic-horror 	 160
fetish-stories --- bdsm-stories 	 159
adult-romance --- erotic-couplings 	 158
non-consent-stories --- anal

In [33]:
# Create nodes and edges to use in d3 plot below
edges = []
node_set = set()

for pair, count in sorted_pair_counts:
    node_set.add(pair[0])
    node_set.add(pair[1])
    edges.append({ 'source': pair[0], 'target': pair[1], 'count': count })

nodes = [{'category': node } for node in node_set]

network = {
    'nodes': nodes,
    'edges': edges,
}

# Save to json so we can use in d3
import json

network = json.dumps(network)

In [34]:
from IPython.display import Javascript

Javascript('window.network={};'.format(network))

<IPython.core.display.Javascript object>

In [35]:
%%javascript

require.config({
  paths: {
      d3: '//cdnjs.cloudflare.com/ajax/libs/d3/4.12.2/d3.min'
  }
});

<IPython.core.display.Javascript object>

In [104]:
%%javascript

const forceWidth = 800;
const forceHeight = 500;
const center = {x: 0, y: forceHeight - 50};

// The network is very densely connected. Increase this number to make it sparser.
const MIN_AUTHOR_CROSSPOST = 80;

require(['d3'], d3 => {
    const { nodes, edges } = window.network;
    
    const filteredEdges = edges.filter(d => d.count > MIN_AUTHOR_CROSSPOST);
    
    // Remove chart container so that disappears when re-run this cell
    $('#chart-container').remove();
    
    const el = element.append('<div id="chart-container"></div>');
    el.width(`${forceWidth}px`);
    el.height(`${forceHeight}px`);
    
    const svg = d3.select('#chart-container')
        .append('svg')
        .attr('width', forceWidth)
        .attr('height', forceHeight);
    
    const forceLink = d3.forceLink().id(d => d.category);
    const forceCharge = d3.forceManyBody().strength(-400);
    const forceCenter = d3.forceCenter(center.x, center.y);
    
    const simulation = d3.forceSimulation()
        .force('link', forceLink)
        .force('charge', forceCharge)
        .force('center', forceCenter);
    
    const strokeWidth = d3.scaleLinear()
        .domain(d3.extent(filteredEdges, d => d.count))
        .range([1, 4]);
    
    const opacity = d3.scalePow()
        .domain(strokeWidth.domain())
        .range([0.1, 1]);
    
    const link = svg.append('g')
        .attr('class', 'links')
        .selectAll('.link')
        .data(filteredEdges)
        .enter()
        .append('line')
        .attr('id', d => `${d.source.category}-${d.target.category}`)
        .attr('class', 'link')
        .attr('stroke', '#aaa')
        .attr('stroke-width', d => strokeWidth(d.count))
        .attr('opacity', d => opacity(d.count));

    const node = svg.append('g')
        .attr('class', 'nodes')
        .selectAll('.node')
        .data(nodes)
        .enter()
        .append('circle')
        .attr('id', d => `node-${d.category}`)
        .attr('class', 'node')
        .attr('r', 8)
        .on('mouseover', highlight)
        .on('mouseout', unhighlight);
    
    const label = svg.append('g')
        .attr('class', 'labels')
        .selectAll('.label')
        .data(nodes)
        .enter()
        .append('text')
        .attr('id', d => `label-${d.category}`)
        .attr('class', 'label')
        .text(d => d.category)
        .style('font-size', '11px')
        .on('mouseover', highlight)
        .on('mouseout', unhighlight);
    
    simulation.nodes(nodes).on('tick', tick);
    simulation.force('link').links(filteredEdges);
    
    function tick() {
        link
          .attr('x1', d => d.source.x)
          .attr('y1', d => d.source.y)
          .attr('x2', d => d.target.x)
          .attr('y2', d => d.target.y);

        node.attr('cx', d => d.x).attr('cy', d => d.y);
        label.attr('x', d => d.x).attr('y', d => d.y - 10);
    }
    
    function highlight(n) {
        d3.select(`#node-${n.category}`).attr('fill', 'red');
        d3.select(`#label-${n.category}`).attr('fill', 'red');
        
        filteredEdges.forEach(d => {
            let nid, lid, eid;
            if (d.source.category === n.category) {
                nid = `#node-${d.target.category}`;
                lid = `#label-${d.target.category}`;
                eid = `#${n.category}-${d.target.category}`;
            } else if (d.target.category === n.category) {
                nid = `#node-${d.source.category}`;
                lid = `#label-${d.source.category}`;
                eid = `#${d.source.category}-${n.category}`;
            }
            
            if (nid) {
                svg.select(nid).attr('fill', 'red');
                svg.select(lid).attr('fill', 'red');
                svg.select(eid).attr('stroke', 'red');
            }
        });
    }
    
    function unhighlight() {
        node.attr('fill', 'black');
        label.attr('fill', 'black');
        link.attr('stroke', '#aaa');
    }
});

<IPython.core.display.Javascript object>

The 