This code was modified from [here](https://github.com/dudaspm/PSUPythonWorkshop/blob/master/Fall2018/Workshop4/4.TwitterAPI%2BRE%2BGraph.ipynb) and shows how to make a force-directed graph of hashtags from a collection of Tweets using the [D3.js JavaScript library](https://d3js.org). This could be used for network analysis to visualize the co-occurence of prevalent hashtags (or other entities) associated with a Twitter search. The original source code for this plot type can be found [here](https://observablehq.com/@d3/force-directed-graph).

In [2]:
from pymongo import MongoClient  # for accessing MongoDB database where Tweets are stored
import numpy as np
import pandas as pd
import re 
import json
from collections import OrderedDict

client = MongoClient()
client.list_database_names()

['admin', 'books', 'config', 'local', 'outings', 'twitter']

In [3]:
twitter = client.twitter
twitter.list_collection_names()

['tweets_2019_large', 'tweets_all_years']

In [4]:
all_years = twitter.tweets_all_years

In [5]:
# store all tweets in list
cursor = all_years.find({}, {'_id': 0})
tweets = list(cursor)

In [6]:
tweets[0]

{'created_at': 'Sat Dec 29 22:43:06 +0000 2007',
 'id': 544845252,
 'id_str': '544845252',
 'text': 'Anyone involved in artificial intelligence work? Specifically, anyone know of any AI system that can RECOGNISE puns in natural language?',
 'source': '<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>',
 'truncated': False,
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'in_reply_to_screen_name': None,
 'user': {'id': 5967392,
  'id_str': '5967392',
  'name': 'Mark Harrison',
  'screen_name': 'MarkHarrisonUK',
  'location': 'Malham Close, Crawley, West Su',
  'url': 'http://markharrison.wordpress.com',
  'description': 'Serial entrepreneur, currently running a boutique programming shop in London specialising in regulated markets.',
  'translator_type': 'none',
  'protected': False,
  'verified': False,
  'followers_count': 757,
  'friends_count': 600,
  'listed_count': 23,
  'favourite

In [8]:
# compile nodes and links for force-directed graph (see earlier links for details)
nodes = []
links = []

for tweet in tweets[-100:]:  # only using last 100 tweets here
    hashtags = re.findall(r'\#\w+', tweet["text"])  # find hashtags
    for hashtag in hashtags:
        if hashtag not in nodes:
            nodes.append(hashtag)
    for source in range(len(hashtags)):    
        for target in range(source + 1, len(hashtags)):
            links.append([hashtags[source], hashtags[target]])
print('Nodes:', nodes, '\n')
print('Links:', links)

Nodes: ['#artificialintelligence', '#MachineLearning', '#Robots', '#AI', '#Job', '#futureofwork', '#ArtificialIntelligence', '#machineLearning', '#deepLearning', '#neuralnetwork', '#DataViz', '#DataAnalytics', '#Trends', '#tech', '#te', '#Music', '#Video', '#Indie', '#BMI', '#RedCross', '#Flood', '#SEO', '#P2P', '#robots', '#ai', '#Scientists', '#NeuralNetworks', '#Python', '#IoT', '#5G', '#Machinelearning', '#Innovation', '#healthcare', '#IBMCodeMVD', '#ML', '#DL', '#DeepLearning', '#Team', '#technology', '#distributedledgers', '#extendedreality', '#quantumcom', '#Diversity', '#inclusion', '#AudioBooks', '#microsoftdesign', '#digitalhealth', '#s', '#automation', '#robotics', '#manufacturers', '#FutureOfWork', '#AnalyticsInsights', '#BigData', '#python', '#javascript', '#angular', '#reactjs', '#vuejs', '#perl', '#ruby', '#Csharp', '#Java', '#linux', '#programming', '#network', '#security', '#golang', '#cod', '#safecity', '#citywidesurveillance', '#publicsafety', '#videosurveillance', '

In [9]:
# output nodes and links as JSON file
outputFile = "graph.json"
f = open(outputFile, 'wb')
graphsize = 100
printNodes = OrderedDict()
printLinks = OrderedDict()

j = ""
j = j + "{"    
j = j + """\t"nodes": ["""

for n in nodes:
    printNodes[n] = {}
    printNodes[n]['name'] = n

for n in printNodes:
    j = j + str(json.dumps(printNodes[n])) + ",\n"
    
j = j[:-2]
j = j + "\t],\n"
j = j + """\t"links":[\n"""

for link in links:
    printLinks[str(link)] = {}
    printLinks[str(link)]['source'] = nodes.index(link[0])
    printLinks[str(link)]['target'] = nodes.index(link[1])
    
for l in printLinks:
    j = j + str(json.dumps(printLinks[l])) + ",\n"
    
j = j[:-2]
j = j + "\t]\n"
j = j + "}"

f.write(j.encode())
f.close()

In [11]:
%%javascript
require.config({paths: {d3: "//d3js.org/d3.v4.min",}});

// generate the network (nodes and links) from the JSON file
require(["d3"], function(d3) {
    window.nodes = [];
    window.links = [];
    
    d3.json("graph.json", function(error, graph) {
        console.log(graph);
        
        if (error) throw error;

        graph.nodes.forEach(function(d,i) {
            nodes.push({
                name:d.name,
                index: i,
            })
        })
        
        graph.links.forEach(function(d,i) {
            links.push({
                source:nodes[d.source],
                target:nodes[d.target],
                index: i,
            })
        })
    })
})

<IPython.core.display.Javascript object>

In [12]:
%%javascript
element.append('<div id="graph1" style="min-width: 310px; height: 1000px; margin: 0 auto"></div>');
require.config({paths: {d3: "//d3js.org/d3.v4.min",}});

// build the plot
require(["d3"], function(d3) {
    
    // for keeping the screen clean
    d3.select("div#graph1").selectAll("*").remove();    
    
    var width = 800, height = 1000;
    
    // create SVG
    var svg = d3.select("div#graph1").append("svg")
        .attr("width", (width)+"px")
        .attr("height", (height)+"px");
    
    // create color schema 
    var color = d3.scaleOrdinal(d3.schemeCategory20); // https://github.com/d3/d3-scale

    var simulation = d3.forceSimulation() // https://github.com/d3/d3-force
        .force("link", d3.forceLink().id(function(d) { return d.index; }))
        .force("charge", d3.forceManyBody())
        .force("center", d3.forceCenter(width / 2, height / 2));
    
    var link = svg.append("g") // https://www.w3schools.com/graphics/svg_line.asp
        .attr("class", "links")
        .selectAll("line").data(links).enter().append("line")
        .style("stroke", "black")
        .style("stroke-width", function(d) { return "2px"; });  
    
    var node = svg.append("g") // https://www.w3schools.com/graphics/svg_circle.asp
        .attr("class", "nodes")
        .selectAll("circle")
        .data(nodes)
        .enter().append("circle")
        .attr("r", 5)
        .attr("fill", function(d) { return color(d.group); })
        .call(d3.drag()
        .on("start", dragstarted)
        .on("drag", dragged)
        .on("end", dragended));

    node.append("title")
        .text(function(d) { return d.name; });

    simulation
        .nodes(nodes)
        .on("tick", ticked);

    simulation.force("link")
        .links(links);

    function ticked() {
        link
            .attr("x1", function(d) { return d.source.x; })
            .attr("y1", function(d) { return d.source.y; })
            .attr("x2", function(d) { return d.target.x; })
            .attr("y2", function(d) { return d.target.y; });

        node
            .attr("cx", function(d) { return d.x; })
            .attr("cy", function(d) { return d.y; });
    }
    
    function dragstarted(d) {
        if (!d3.event.active) simulation.alphaTarget(0.3).restart();
        d.fx = d.x;
        d.fy = d.y;
    }

    function dragged(d) {
        d.fx = d3.event.x;
        d.fy = d3.event.y;
    }

    function dragended(d) {
        if (!d3.event.active) simulation.alphaTarget(0);
        d.fx = null;
        d.fy = null;
    }
})

<IPython.core.display.Javascript object>