# LDA with NLTK and Gensim

* Start by importing python libraries that we'll be using

In [46]:
%load_ext autoreload
%autoreload 2

import sqlite3
import string
import logging
import pickle

from gensim import corpora, utils, models, similarities
from collections import defaultdict

import nltk
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import stopwords
nltk.download('stopwords')
from nltk.corpus import wordnet
nltk.download('wordnet')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

* Let's set up some basic config

In [2]:
# Set up logging for gensim
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

# Punctuation and stop words to be removed later
punctuation = set(string.punctuation)
stoplist = set(stopwords.words('english'))

# For LDA training later
dictionary = corpora.Dictionary()
lemma = WordNetLemmatizer()

NUM_PASSES=10
NUM_TOPICS=100
RANDOM_STATE=1

# Database and backups
DATABASE = '../data/content.db'
LDA_BACKUP = '../data/lda_model'
DICT_BACKUP = '../data/dictionary'
CORPUS_BACKUP = '../data/corpus'

### Remove punctuation from text

In [3]:
def remove_punctuation(text):
    """
    Remove punctuation from text by checking each character against a set of punctation characters
    :text: string
    :return: string
    """
    return ''.join(char for char in text if char not in punctuation)

### Remove numbers from text

In [4]:
def remove_numbers(text):
    """
    Remove numbers from text as they aren't of value to our model
    :text: string
    :return: string
    """
    return ''.join(char for char in text if not char.isdigit())

### Remove stop words from text

In [5]:
def remove_stop_words(text):
    """
    Remove common words as they won't add any value to our model
    :text: string
    :return: string
    """
    return ' '.join([word for word in text.split() if word not in stoplist])

### Remove single character words

In [6]:
def remove_single_characters(text):
    """
    Remove any remaining single-character words
    :text: string
    :return: string
    """
    return ' '.join([word for word in text.split() if len(word) > 1])

### Lemmatize our document

In [7]:
def lemmatize(text):
    """
    Use NLTK lemma functionality to get the route word
    :text: string
    :return: string
    """
    return ' '.join([lemma.lemmatize(word) for word in text.split()])


### Single function to clean text

In [18]:
def get_cleaned_text(text):
    """
    Return the page with stopwords, digits, punctuation and single character words removed
    :text: string
    :return: string
    """
    # Remove \n characters (Wikipedia has a lot of them in the page content!)
    text = text.replace('\n', '')
    # Remove numbers
    text = remove_numbers(text)
    # Remove stop words
    text = remove_stop_words(text)
    # Remove punctuation
    text = remove_punctuation(text)
    # Remove single character words
    text = remove_single_characters(text)
    # Lemmatize the document
    text = lemmatize(text)
    return text

### Create a class for our content

This class will use the database that we created when we downloaded content from Wikipedia

In [19]:
class Content:
    def __init__(self, db_file):
        """
        Intialise the crawl_wikipedia class, set up a
        lightweight database for storing content for later use
        :param db_file: string
        """
        self.categories = []
        # Connect to the DB db
        self.conn = sqlite3.connect(db_file)
        self.cursor = self.conn.cursor()

    def get_page_urls(self):
        """
        Retrieve a list of urls from the database
        :return: list of urls
        """
        return [row for row in self.cursor.execute("SELECT url FROM content")]
    
    def get_page_ids(self):
        """
        Retrieve a list of page ids from the database
        :return: list of page id tuples
        """
        return [row for row in self.cursor.execute("SELECT pageid FROM content")]   
    
    def get_page_by_id(self, pageid):
        """
        Retrieve the page with the specified pageid
        Note that this is of the format (pageid, ) for SQLite3 to work, for example
        to get the page with the id of 1 in our database, set pageid to ('1', )
        :pageid: tuple ('id', )
        :return: string
        """
        return str(self.cursor.execute("SELECT content FROM content WHERE pageid=?", pageid).fetchone()).lower()
    
    def get_page_url_by_id(self, pageid):
        """
        Retrieve the page with the specified pageid
        Note that this is of the format (pageid, ) for SQLite3 to work, for example
        to get the page with the id of 1 in our database, set pageid to ('1', )
        :pageid: tuple ('id', )
        :return: string
        """
        return self.cursor.execute("SELECT url FROM content WHERE pageid=?", pageid).fetchone()
            
    def __iter__(self):
        """
        Iterator for the document set stored in the database
        This is more efficient memory wise than loading the complete document set into memory
        and therefore will scale well for larger document sets (or those not available on local disk)
        :return: tuple (string, )
        """
        for pageid in self.get_page_ids():
            page = self.get_page_by_id(pageid)
            yield get_cleaned_text(page).split()

### Exploring and cleaning our content

* Access our database of Wikipedia content and get a list of all of the pages IDs

In [20]:
content = Content(DATABASE)
page_ids = content.get_page_ids()

* Let's view a page from our database

In [59]:
page = content.get_page_by_id(page_ids[0])
print(page)



* Now let's remove punctuation

In [60]:
page = page.replace('\n', '')
page = remove_punctuation(page)
print(page)

artificial intelligence ai sometimes called machine intelligence iss such as learning and problem solvingthe scope of ai is disputed as machines become increasingly capable tasks considered as requiring intelligence are often removed from the definition a phenomenon known as thennnartificial intelligence was founded as an academic discipline in 1956 and in the years since has experienced several waves of optimism followed by disappointment and the loss of funding known as an s or deep philosophical differences subfields have also been based on social factors particular institutions or the work of particular researchersnnthe traditional problems or goals of ai research includeand many othersnnnthe field was founded on the claim thatnnin the twentyfirst century ai techniques have experienced a resurgence following concurrent advances in helping to solve many challenging problems in computer science history thoughtcapablennthe study of mechanical orartificial neuronsthe field of ai resear

* Remove numbers

In [61]:
page = remove_numbers(page)
print(page)

artificial intelligence ai sometimes called machine intelligence iss such as learning and problem solvingthe scope of ai is disputed as machines become increasingly capable tasks considered as requiring intelligence are often removed from the definition a phenomenon known as thennnartificial intelligence was founded as an academic discipline in  and in the years since has experienced several waves of optimism followed by disappointment and the loss of funding known as an s or deep philosophical differences subfields have also been based on social factors particular institutions or the work of particular researchersnnthe traditional problems or goals of ai research includeand many othersnnnthe field was founded on the claim thatnnin the twentyfirst century ai techniques have experienced a resurgence following concurrent advances in helping to solve many challenging problems in computer science history thoughtcapablennthe study of mechanical orartificial neuronsthe field of ai research w

* And stop words

In [62]:
page = remove_stop_words(page)
print(page)

artificial intelligence ai sometimes called machine intelligence iss learning problem solvingthe scope ai disputed machines become increasingly capable tasks considered requiring intelligence often removed definition phenomenon known thennnartificial intelligence founded academic discipline years since experienced several waves optimism followed disappointment loss funding known deep philosophical differences subfields also based social factors particular institutions work particular researchersnnthe traditional problems goals ai research includeand many othersnnnthe field founded claim thatnnin twentyfirst century ai techniques experienced resurgence following concurrent advances helping solve many challenging problems computer science history thoughtcapablennthe study mechanical orartificial neuronsthe field ai research born atagreed writing within generationxa problem creating artificial intelligence substantially solvednnthey failed recognize difficulty remaining tasks progress slo

In [63]:
page = remove_single_characters(page)
print(page)

artificial intelligence ai sometimes called machine intelligence iss learning problem solvingthe scope ai disputed machines become increasingly capable tasks considered requiring intelligence often removed definition phenomenon known thennnartificial intelligence founded academic discipline years since experienced several waves optimism followed disappointment loss funding known deep philosophical differences subfields also based social factors particular institutions work particular researchersnnthe traditional problems goals ai research includeand many othersnnnthe field founded claim thatnnin twentyfirst century ai techniques experienced resurgence following concurrent advances helping solve many challenging problems computer science history thoughtcapablennthe study mechanical orartificial neuronsthe field ai research born atagreed writing within generationxa problem creating artificial intelligence substantially solvednnthey failed recognize difficulty remaining tasks progress slo

### Lemmatize the document

In [64]:
page = lemmatize(page)
print(page)

artificial intelligence ai sometimes called machine intelligence i learning problem solvingthe scope ai disputed machine become increasingly capable task considered requiring intelligence often removed definition phenomenon known thennnartificial intelligence founded academic discipline year since experienced several wave optimism followed disappointment loss funding known deep philosophical difference subfields also based social factor particular institution work particular researchersnnthe traditional problem goal ai research includeand many othersnnnthe field founded claim thatnnin twentyfirst century ai technique experienced resurgence following concurrent advance helping solve many challenging problem computer science history thoughtcapablennthe study mechanical orartificial neuronsthe field ai research born atagreed writing within generationxa problem creating artificial intelligence substantially solvednnthey failed recognize difficulty remaining task progress slowed response cr

### Now build a model using gensim

* Start by building a dictionary of all words found in the cleaned content

In [65]:
dictionary = corpora.Dictionary(content)

2018-09-03 20:23:43,261 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2018-09-03 20:24:14,456 : INFO : built Dictionary(132535 unique tokens: ['a', 'aaai', 'aally', 'abandoned', 'abduction']...) from 2729 documents (total 1296601 corpus positions)


* Ignore words that appear in less than 5 documents or more than 40% documents

(These settings are configurable and can produce very different results for the same document set)

In [67]:
dictionary.filter_extremes(no_below=5, no_above=0.4)

2018-09-03 20:25:22,981 : INFO : discarding 0 tokens: []...
2018-09-03 20:25:22,982 : INFO : keeping 14369 tokens which were in no less than 5 and no more than 1091 (=40.0%) documents
2018-09-03 20:25:23,007 : INFO : resulting dictionary: Dictionary(14369 unique tokens: ['a', 'aaai', 'abandoned', 'abduction', 'ability']...)


* Now create a corpus

In [29]:
corpus = [dictionary.doc2bow(text) for text in content]

In [69]:
len(corpus)

2729

In [70]:
corpus[100]

[(25, 1),
 (28, 1),
 (29, 2),
 (32, 2),
 (48, 2),
 (49, 1),
 (63, 1),
 (64, 2),
 (69, 6),
 (74, 1),
 (85, 2),
 (89, 3),
 (94, 1),
 (96, 3),
 (102, 1),
 (104, 18),
 (106, 2),
 (107, 2),
 (111, 1),
 (114, 4),
 (115, 8),
 (121, 1),
 (124, 4),
 (131, 2),
 (135, 1),
 (137, 1),
 (138, 3),
 (160, 3),
 (161, 4),
 (162, 1),
 (166, 2),
 (168, 1),
 (172, 1),
 (176, 8),
 (177, 2),
 (179, 1),
 (184, 1),
 (187, 1),
 (192, 1),
 (194, 3),
 (195, 1),
 (197, 1),
 (204, 2),
 (205, 1),
 (223, 1),
 (257, 1),
 (261, 2),
 (273, 2),
 (277, 2),
 (297, 1),
 (298, 1),
 (299, 1),
 (305, 1),
 (317, 1),
 (321, 2),
 (324, 1),
 (325, 1),
 (328, 1),
 (331, 3),
 (334, 3),
 (335, 2),
 (338, 1),
 (339, 1),
 (344, 1),
 (346, 4),
 (349, 1),
 (353, 2),
 (355, 1),
 (360, 1),
 (364, 1),
 (372, 1),
 (373, 4),
 (375, 2),
 (380, 3),
 (384, 6),
 (389, 1),
 (393, 1),
 (396, 2),
 (401, 2),
 (417, 1),
 (425, 18),
 (426, 1),
 (433, 1),
 (436, 1),
 (439, 6),
 (455, 1),
 (456, 2),
 (457, 1),
 (460, 6),
 (462, 1),
 (463, 2),
 (468, 2),


In [73]:
dictionary[656]

'field'

* Build our LDA model

Note this may take a while depending on how many documents you have and how many passes you set.  
An increased number of passes improves consistency of the model at the expense of longer training times.

In [74]:
lda = models.LdaModel(corpus, id2word=dictionary, random_state=RANDOM_STATE, num_topics=NUM_TOPICS, passes=NUM_PASSES)

2018-09-03 20:28:33,128 : INFO : using symmetric alpha at 0.01
2018-09-03 20:28:33,129 : INFO : using symmetric eta at 0.01
2018-09-03 20:28:33,133 : INFO : using serial LDA version on this node
2018-09-03 20:28:33,531 : INFO : running online (multi-pass) LDA training, 100 topics, 10 passes over the supplied corpus of 2729 documents, updating model once every 2000 documents, evaluating perplexity every 2729 documents, iterating 50x with a convergence threshold of 0.001000
2018-09-03 20:28:33,533 : INFO : PROGRESS: pass 0, at document #2000/2729
2018-09-03 20:28:41,480 : INFO : merging changes from 2000 documents into a model of 2729 documents
2018-09-03 20:28:41,790 : INFO : topic #84 (0.010): 0.009*"human" + 0.005*"program" + 0.004*"game" + 0.004*"intelligence" + 0.004*"artificial" + 0.003*"ai" + 0.003*"user" + 0.003*"may" + 0.003*"information" + 0.003*"agent"
2018-09-03 20:28:41,791 : INFO : topic #62 (0.010): 0.008*"–n" + 0.005*"human" + 0.005*"machine" + 0.004*"may" + 0.004*"ontolo

2018-09-03 20:29:22,875 : INFO : topic #66 (0.010): 0.022*"skill" + 0.021*"payload" + 0.015*"time" + 0.014*"aviation" + 0.012*"human" + 0.009*"moravec" + 0.009*"quote" + 0.008*"would" + 0.008*"breach" + 0.007*"kg"
2018-09-03 20:29:22,878 : INFO : topic #8 (0.010): 0.133*"david" + 0.029*"adam" + 0.023*"interviewed" + 0.021*"oscar" + 0.019*"minion" + 0.016*"walsh" + 0.012*"mirror" + 0.008*"byto" + 0.008*"strip" + 0.008*"film"
2018-09-03 20:29:22,882 : INFO : topic #64 (0.010): 0.019*"node" + 0.016*"tree" + 0.011*"value" + 0.010*"algorithm" + 0.007*"variable" + 0.007*"function" + 0.007*"search" + 0.006*"network" + 0.006*"vector" + 0.006*"number"
2018-09-03 20:29:22,886 : INFO : topic #53 (0.010): 0.013*"human" + 0.007*"image" + 0.007*"automated" + 0.006*"robot" + 0.006*"fly" + 0.005*"use" + 0.005*"technology" + 0.004*"pixel" + 0.004*"current" + 0.004*"document"
2018-09-03 20:29:22,890 : INFO : topic diff=inf, rho=0.478666
2018-09-03 20:29:22,895 : INFO : PROGRESS: pass 3, at document #200

2018-09-03 20:30:33,650 : INFO : PROGRESS: pass 5, at document #2729/2729
2018-09-03 20:30:37,546 : INFO : merging changes from 729 documents into a model of 2729 documents
2018-09-03 20:30:37,862 : INFO : topic #78 (0.010): 0.091*"reasoning" + 0.036*"expert" + 0.025*"logic" + 0.024*"knowledge" + 0.018*"problem" + 0.014*"application" + 0.013*"use" + 0.013*"theorem" + 0.011*"inference" + 0.010*"type"
2018-09-03 20:30:37,863 : INFO : topic #54 (0.010): 0.009*"lab" + 0.008*"company" + 0.007*"book" + 0.007*"life" + 0.007*"january" + 0.006*"project" + 0.006*"culture" + 0.006*"work" + 0.005*"power" + 0.005*"november"
2018-09-03 20:30:37,865 : INFO : topic #88 (0.010): 0.041*"agent" + 0.030*"action" + 0.028*"goal" + 0.026*"database" + 0.024*"rule" + 0.017*"structure" + 0.015*"event" + 0.014*"active" + 0.014*"part" + 0.013*"qa"
2018-09-03 20:30:37,868 : INFO : topic #60 (0.010): 0.077*"constraint" + 0.049*"programming" + 0.048*"logic" + 0.041*"clause" + 0.022*"moment" + 0.019*"goal" + 0.018*"p

2018-09-03 20:31:50,288 : INFO : topic diff=inf, rho=0.310617
2018-09-03 20:31:55,266 : INFO : -8.097 per-word bound, 273.8 perplexity estimate based on a held-out corpus of 729 documents with 191667 words
2018-09-03 20:31:55,272 : INFO : PROGRESS: pass 8, at document #2729/2729
2018-09-03 20:31:59,541 : INFO : merging changes from 729 documents into a model of 2729 documents
2018-09-03 20:31:59,891 : INFO : topic #72 (0.010): 0.095*"rule" + 0.022*"type" + 0.021*"set" + 0.019*"action" + 0.019*"engine" + 0.017*"operator" + 0.013*"example" + 0.011*"order" + 0.010*"plan" + 0.010*"production"
2018-09-03 20:31:59,892 : INFO : topic #52 (0.010): 0.061*"last" + 0.039*"title" + 0.025*"year" + 0.023*"journal" + 0.020*"page" + 0.019*"url" + 0.019*"publisher" + 0.017*"volume" + 0.014*"issue" + 0.012*"cite"
2018-09-03 20:31:59,893 : INFO : topic #3 (0.010): 0.050*"carpenter" + 0.010*"painting" + 0.007*"vague" + 0.006*"eu" + 0.003*"upper" + 0.003*"alsonexternal" + 0.003*"byn" + 0.003*"andrea" + 0.0

* Let's print 10 of the topics

In [75]:
lda.print_topics(10)

2018-09-03 20:34:12,270 : INFO : topic #38 (0.010): 0.020*"algorithm" + 0.017*"bias" + 0.017*"data" + 0.014*"example" + 0.013*"may" + 0.009*"user" + 0.008*"model" + 0.007*"based" + 0.007*"program" + 0.007*"behavior"
2018-09-03 20:34:12,271 : INFO : topic #94 (0.010): 0.088*"decision" + 0.074*"service" + 0.035*"customer" + 0.033*"quality" + 0.025*"pp" + 0.018*"process" + 0.016*"account" + 0.015*"table" + 0.013*"business" + 0.012*"list"
2018-09-03 20:34:12,273 : INFO : topic #1 (0.010): 0.032*"series" + 0.018*"season" + 0.012*"episode" + 0.010*"nn" + 0.009*"hawkins" + 0.007*"rowspan" + 0.007*"max" + 0.006*"million" + 0.006*"july" + 0.006*"june"
2018-09-03 20:34:12,274 : INFO : topic #6 (0.010): 0.060*"language" + 0.041*"word" + 0.028*"text" + 0.027*"semantic" + 0.023*"knowledge" + 0.023*"natural" + 0.019*"sentence" + 0.018*"pattern" + 0.012*"relation" + 0.011*"cognitive"
2018-09-03 20:34:12,276 : INFO : topic #47 (0.010): 0.034*"company" + 0.023*"product" + 0.021*"technology" + 0.014*"na

[(38,
  '0.020*"algorithm" + 0.017*"bias" + 0.017*"data" + 0.014*"example" + 0.013*"may" + 0.009*"user" + 0.008*"model" + 0.007*"based" + 0.007*"program" + 0.007*"behavior"'),
 (94,
  '0.088*"decision" + 0.074*"service" + 0.035*"customer" + 0.033*"quality" + 0.025*"pp" + 0.018*"process" + 0.016*"account" + 0.015*"table" + 0.013*"business" + 0.012*"list"'),
 (1,
  '0.032*"series" + 0.018*"season" + 0.012*"episode" + 0.010*"nn" + 0.009*"hawkins" + 0.007*"rowspan" + 0.007*"max" + 0.006*"million" + 0.006*"july" + 0.006*"june"'),
 (6,
  '0.060*"language" + 0.041*"word" + 0.028*"text" + 0.027*"semantic" + 0.023*"knowledge" + 0.023*"natural" + 0.019*"sentence" + 0.018*"pattern" + 0.012*"relation" + 0.011*"cognitive"'),
 (47,
  '0.034*"company" + 0.023*"product" + 0.021*"technology" + 0.014*"name" + 0.013*"service" + 0.012*"million" + 0.011*"inc" + 0.011*"founded" + 0.010*"announced" + 0.009*"infobox"'),
 (35,
  '0.029*"uav" + 0.009*"selfdriving" + 0.008*"vehicle" + 0.008*"may" + 0.008*"em" + 

* And now let's save our model and dictionary

In [76]:
lda.save(LDA_BACKUP)
with open(DICT_BACKUP, "wb") as fp:
    pickle.dump(dictionary, fp)
fp.close()
with open(CORPUS_BACKUP, "wb") as fp:
    pickle.dump(corpus, fp)
fp.close()

2018-09-03 20:36:05,448 : INFO : saving LdaState object under ../data/lda_model.state, separately None
2018-09-03 20:36:05,764 : INFO : saved ../data/lda_model.state
2018-09-03 20:36:05,798 : INFO : saving LdaModel object under ../data/lda_model, separately ['expElogbeta', 'sstats']
2018-09-03 20:36:05,799 : INFO : storing np array 'expElogbeta' to ../data/lda_model.expElogbeta.npy
2018-09-03 20:36:06,371 : INFO : not storing attribute state
2018-09-03 20:36:06,373 : INFO : not storing attribute id2word
2018-09-03 20:36:06,374 : INFO : not storing attribute dispatcher
2018-09-03 20:36:06,398 : INFO : saved ../data/lda_model


### Now let's query our dataset to find related documents

* Let's start by creating a get_similarity() function

In [34]:
def get_similarity(lda, q_vec):
    index = similarities.MatrixSimilarity(lda[corpus])
    sims = index[q_vec]
    return sims
    #return set(sims)

* Let's manually create a subject we want to query the dataset for

In [35]:
query = "using deep learning for computer vision in real time"

* And see how our LDA model interprets this

Remember, we hare passing this through the same text cleaning functionality as the documents, so punctuation, stop words, etc. will all be removed.

This leaves us with the statistically important related words from our dictionary

In [79]:
bow = dictionary.doc2bow(get_cleaned_text(query).split())
words = [word for word in bow]
for word in words:
    print('{}: {}'.format(word[0], dictionary[word[0]]))

444: deep
968: learning
1398: real
1720: time
1803: using
2200: vision


* Now let's query our LDA model based on this bag of words

This will give us a vector based on our model for our query above. Note that LDA uses some randomisation and therefore you may appear to get different vectors output here if you run this multiple times.  However, the results below are generally consistent.

In [80]:
q_vec = lda[bow]
print(q_vec)

[(37, 0.41977125), (80, 0.44022873)]


* Let's view the details for the LDA topic relating to the query above

In [81]:
print(lda.print_topic(max(q_vec, key=lambda item: item[1])[0]))

0.131*"vision" + 0.061*"retrieved" + 0.037*"research" + 0.020*"algorithm" + 0.014*"ict" + 0.014*"centre" + 0.013*"automated" + 0.010*"winter" + 0.010*"visual" + 0.010*"technology"


* Now let's get the similarity of this query vector to the document vectors and sort in high-to-low order

In [82]:
sims = get_similarity(lda, q_vec)
sims = sorted(enumerate(sims), key=lambda item: -item[1])

2018-09-03 20:38:56,308 : INFO : creating matrix with 2729 documents and 100 features
  if np.issubdtype(vec.dtype, np.int):


* Now render the results

The gensim MatrixSimilarity function used above doesn't always give unique values, hence we can't just print the top n results.  If we do, we occasionally get duplication.

In [83]:
idx=0
pids=[]
result = 10
while result > 0:
    pageid = page_ids[sims[idx][0]]
    if pageid not in pids:
        pids.append(pageid)
        #print(pageid)
        print('Page ID {}: {}'.format(pageid[0], content.get_page_url_by_id(pageid)[0]))
        result -= 1
    idx += 1  

Page ID 49803819: https://en.wikipedia.org/wiki/Australian_Research_Council_Centre_of_Excellence_for_Robotic_Vision
Page ID 4081616: https://en.wikipedia.org/wiki/Relaxation_labelling
Page ID 55843837: https://en.wikipedia.org/wiki/Automated_machine_learning
Page ID 866256: https://en.wikipedia.org/wiki/User_illusion
Page ID 2884728: https://en.wikipedia.org/wiki/Automated_reasoning
Page ID 4510677: https://en.wikipedia.org/wiki/Agent_architecture
Page ID 727607: https://en.wikipedia.org/wiki/Guarded_Command_Language
Page ID 46572604: https://en.wikipedia.org/wiki/Cortical_modem
Page ID 16810240: https://en.wikipedia.org/wiki/Kraken_botnet
Page ID 49726563: https://en.wikipedia.org/wiki/Brian_Deer_Classification_System
