# Using Wikipedia API to get Artificial Intelligence articles

In [89]:
import sqlite3
import wptools
import re
from bs4 import BeautifulSoup

In [251]:
category = 'Category:Artificial_intelligence'
depth = 2
members = []

In [252]:
class crawl_wikipedia:
    def __init__(self, db_file):
        self.categories=[]
        #self.pageids=[]
        self.count=0
        # Create db
        self.conn = sqlite3.connect(db_file)
        c = self.conn.cursor()
        c.execute('''CREATE TABLE IF NOT EXISTS content
            (pageid text, url text, content text)''')
        self.conn.commit()
        self.cursor = self.conn.cursor()
     
    def save_page_content(pageid, url, content):
        self.cursor.execute("INSERT INTO content VALUES (?,?,?)",
            (pageid, url, content))
        self.conn.commit()
        
    def get_categories_and_members(self, category, depth):
        print('Checking for subcategories of {} at depth {}'.format(category, depth))
        if depth:
            cat = wptools.category(category)
            cat_members = cat.get_members()
            # First let's save any members (pages) for this category
            if 'members' in cat_members.data.keys():
                for cat_member in cat_members.data['members']:
                    # print('Appending {} to pageids'.format(cat_member['pageid']))                   
                    #self.pageids.append(cat_member['pageid'])
                    page = wptools.page(pageid=cat_member['pageid']).get_parse()
                    # Remove <ref> and other HTML syntax
                    text = BeautifulSoup(page.data['wikitext'], 'html.parser').get_text()
                    # Remove other markup such as [[...]] and {{...}}
                    clean_content = re.sub(r"\s*{.*}\s*|\s*\[.*\]\s*", " ", text)
                    # Get URL in wikipedia
                    url = page.get_query().data['url']
                    # Now store
                    self.save_page_content(pageid, url, clean_content)
            # Now iterate through any subcategories
            if 'subcategories' in cat_members.data.keys():
                subcats = cat_members.data['subcategories']               
                for subcat in subcats: 
                    self.categories.append(subcat)
                    self.count += 1
                    #print('Appending {} / count = {}'.format(subcat['title'], self.count))
                    self.get_categories_and_members(subcat['title'], depth - 1)
            

In [253]:
crawler = crawl_wikipedia()
crawler.get_categories_and_members(category, depth)

Checking for subcategories of Category:Artificial_intelligence at depth 2


en.wikipedia.org (categorymembers) Category:Artificial_intelligence
Category:Artificial intelligence (en) data
{
  members: <list(326)> {'pageid': 1164, 'ns': 0, 'title': 'Artific...
  requests: <list(1)> category
  subcategories: <list(37)> {'pageid': 46305725, 'ns': 14, 'title'...
}
en.wikipedia.org (categorymembers) Category:Affective computing


Appending 1164 to pageids
Appending 6585513 to pageids
Appending 2142 to pageids
Appending 23886619 to pageids
Appending 43470933 to pageids
Appending 34668189 to pageids
Appending 1548772 to pageids
Appending 25277457 to pageids
Appending 5033373 to pageids
Appending 11338826 to pageids
Appending 42042198 to pageids
Appending 14273658 to pageids
Appending 6983799 to pageids
Appending 142224 to pageids
Appending 2862 to pageids
Appending 52642349 to pageids
Appending 43681495 to pageids
Appending 57638150 to pageids
Appending 402688 to pageids
Appending 21417543 to pageids
Appending 57812996 to pageids
Appending 57040423 to pageids
Appending 12379384 to pageids
Appending 24571654 to pageids
Appending 1126216 to pageids
Appending 24238010 to pageids
Appending 9025771 to pageids
Appending 15893057 to pageids
Appending 21577031 to pageids
Appending 35457079 to pageids
Appending 1908395 to pageids
Appending 195552 to pageids
Appending 51023476 to pageids
Appending 586357 to pageids
Appendi

Category:Affective computing (en) data
{
  members: <list(8)> {'pageid': 233942, 'ns': 0, 'title': 'Affecti...
  requests: <list(1)> category
  subcategories: <list(1)> {'pageid': 31996622, 'ns': 14, 'title':...
}
en.wikipedia.org (categorymembers) Category:AI accelerators


Appending 233942 to pageids
Appending 38411570 to pageids
Appending 51023476 to pageids
Appending 14199008 to pageids
Appending 57687371 to pageids
Appending 56649828 to pageids
Appending 6435232 to pageids
Appending 53571766 to pageids
Appending Category:Social robots / count = 2
Checking for subcategories of Category:Social robots at depth 0
Appending Category:AI accelerators / count = 3
Checking for subcategories of Category:AI accelerators at depth 1


Category:AI accelerators (en) data
{
  members: <list(10)> {'pageid': 50827978, 'ns': 0, 'title': 'AI a...
  requests: <list(1)> category
}
en.wikipedia.org (categorymembers) Category:Artificial intelligen...


Appending 50827978 to pageids
Appending 453086 to pageids
Appending 49982604 to pageids
Appending 50362874 to pageids
Appending 50858569 to pageids
Appending 35891416 to pageids
Appending 50673241 to pageids
Appending 44297493 to pageids
Appending 50228744 to pageids
Appending 45639654 to pageids
Appending Category:Artificial intelligence applications / count = 4
Checking for subcategories of Category:Artificial intelligence applications at depth 1


Category:Artificial intelligence applications (en) data
{
  members: <list(102)> {'pageid': 15893057, 'ns': 0, 'title': 'App...
  requests: <list(1)> category
  subcategories: <list(15)> {'pageid': 21145129, 'ns': 14, 'title'...
}
en.wikipedia.org (categorymembers) Category:Artificial immune systems


Appending 15893057 to pageids
Appending 2142 to pageids
Appending 49238421 to pageids
Appending 15795950 to pageids
Appending 25910473 to pageids
Appending 52642349 to pageids
Appending 28139201 to pageids
Appending 49242352 to pageids
Appending 55572262 to pageids
Appending 55981499 to pageids
Appending 14842794 to pageids
Appending 13234913 to pageids
Appending 33529387 to pageids
Appending 40218456 to pageids
Appending 51404222 to pageids
Appending 53070037 to pageids
Appending 1931185 to pageids
Appending 1657551 to pageids
Appending 45396428 to pageids
Appending 54249476 to pageids
Appending 45001424 to pageids
Appending 8006328 to pageids
Appending 28650287 to pageids
Appending 2368154 to pageids
Appending 52649487 to pageids
Appending 6898858 to pageids
Appending 743971 to pageids
Appending 18629502 to pageids
Appending 16021556 to pageids
Appending 23392007 to pageids
Appending 1191600 to pageids
Appending 2090992 to pageids
Appending 4903304 to pageids
Appending 52345508 to pa

Category:Artificial immune systems (en) data
{
  members: <list(4)> {'pageid': 1589987, 'ns': 0, 'title': 'Artifi...
  requests: <list(1)> category
}
en.wikipedia.org (categorymembers) Category:Artificial intelligen...


Appending 1589987 to pageids
Appending 20434643 to pageids
Appending 12468879 to pageids
Appending 25532348 to pageids
Appending Category:Artificial intelligence associations / count = 21
Checking for subcategories of Category:Artificial intelligence associations at depth 1


Category:Artificial intelligence associations (en) data
{
  members: <list(16)> {'pageid': 35782731, 'ns': 0, 'title': 'Alle...
  requests: <list(1)> category
  subcategories: <list(1)> {'pageid': 38733908, 'ns': 14, 'title':...
}
en.wikipedia.org (categorymembers) Category:Automated reasoning


Appending 35782731 to pageids
Appending 3558615 to pageids
Appending 13234913 to pageids
Appending 534794 to pageids
Appending 49803819 to pageids
Appending 7101688 to pageids
Appending 19172663 to pageids
Appending 42912557 to pageids
Appending 30292113 to pageids
Appending 56382262 to pageids
Appending 38857256 to pageids
Appending 44299306 to pageids
Appending 732167 to pageids
Appending 48795986 to pageids
Appending 51792164 to pageids
Appending 9855223 to pageids
Appending Category:Association for the Advancement of Artificial Intelligence / count = 22
Checking for subcategories of Category:Association for the Advancement of Artificial Intelligence at depth 0
Appending Category:Automated reasoning / count = 23
Checking for subcategories of Category:Automated reasoning at depth 1


Category:Automated reasoning (en) data
{
  members: <list(10)> {'pageid': 3446141, 'ns': 0, 'title': 'Assoc...
  requests: <list(1)> category
  subcategories: <list(1)> {'pageid': 8387931, 'ns': 14, 'title': ...
}
en.wikipedia.org (categorymembers) Category:Chatterbots


Appending 3446141 to pageids
Appending 2884728 to pageids
Appending 568967 to pageids
Appending 996026 to pageids
Appending 8402579 to pageids
Appending 16920 to pageids
Appending 2708995 to pageids
Appending 26313930 to pageids
Appending 31103500 to pageids
Appending 13536810 to pageids
Appending Category:Automated theorem proving / count = 24
Checking for subcategories of Category:Automated theorem proving at depth 0
Appending Category:Chatterbots / count = 25
Checking for subcategories of Category:Chatterbots at depth 1


Category:Chatterbots (en) data
{
  members: <list(38)> {'pageid': 148349, 'ns': 0, 'title': 'Chatbo...
  requests: <list(1)> category
}
en.wikipedia.org (categorymembers) Category:Cloud robotics


Appending 148349 to pageids
Appending 1705011 to pageids
Appending 3075657 to pageids
Appending 26032317 to pageids
Appending 28650287 to pageids
Appending 2090992 to pageids
Appending 10235 to pageids
Appending 19902679 to pageids
Appending 51171662 to pageids
Appending 8703473 to pageids
Appending 33026100 to pageids
Appending 25455946 to pageids
Appending 22202872 to pageids
Appending 705605 to pageids
Appending 21826200 to pageids
Appending 8703613 to pageids
Appending 238725 to pageids
Appending 174091 to pageids
Appending 4478746 to pageids
Appending 41246558 to pageids
Appending 43021739 to pageids
Appending 52202762 to pageids
Appending 48723464 to pageids
Appending 405447 to pageids
Appending 453427 to pageids
Appending 56428662 to pageids
Appending 34882875 to pageids
Appending 946226 to pageids
Appending 57210608 to pageids
Appending 2809681 to pageids
Appending 42722041 to pageids
Appending 49933350 to pageids
Appending 30131890 to pageids
Appending 6773335 to pageids
Appen

Category:Cloud robotics (en) data
{
  members: <list(4)> {'pageid': 44628427, 'ns': 0, 'title': 'Cloud...
  requests: <list(1)> category
}
en.wikipedia.org (categorymembers) Category:Cognitive architecture


Appending 44628427 to pageids
Appending 34643983 to pageids
Appending 38776364 to pageids
Appending 38782554 to pageids
Appending Category:Cognitive architecture / count = 27
Checking for subcategories of Category:Cognitive architecture at depth 1


Category:Cognitive architecture (en) data
{
  members: <list(34)> {'pageid': 1700176, 'ns': 0, 'title': 'Cogni...
  requests: <list(1)> category
}
en.wikipedia.org (categorymembers) Category:Computer vision


Appending 1700176 to pageids
Appending 23882349 to pageids
Appending 821071 to pageids
Appending 6456832 to pageids
Appending 12632281 to pageids
Appending 13550938 to pageids
Appending 13030638 to pageids
Appending 25256781 to pageids
Appending 1757239 to pageids
Appending 6874 to pageids
Appending 36298401 to pageids
Appending 33548913 to pageids
Appending 3559731 to pageids
Appending 12838512 to pageids
Appending 31462536 to pageids
Appending 1847118 to pageids
Appending 24870679 to pageids
Appending 41780237 to pageids
Appending 22912887 to pageids
Appending 57720057 to pageids
Appending 446606 to pageids
Appending 2311118 to pageids
Appending 8233911 to pageids
Appending 15311631 to pageids
Appending 7827235 to pageids
Appending 31103500 to pageids
Appending 32043571 to pageids
Appending 6216524 to pageids
Appending 729751 to pageids
Appending 23572558 to pageids
Appending 33547203 to pageids
Appending 50568976 to pageids
Appending 83552 to pageids
Appending 14241792 to pageids
Ap

Category:Computer vision (en) data
{
  members: <list(89)> {'pageid': 6596, 'ns': 0, 'title': 'Computer...
  requests: <list(1)> category
  subcategories: <list(21)> {'pageid': 2700744, 'ns': 14, 'title':...
}
en.wikipedia.org (categorymembers) Category:Artificial intelligen...


Appending 6596 to pageids
Appending 5104401 to pageids
Appending 44534652 to pageids
Appending 53065847 to pageids
Appending 24286785 to pageids
Appending 25860534 to pageids
Appending 34668189 to pageids
Appending 56084743 to pageids
Appending 5717580 to pageids
Appending 12555662 to pageids
Appending 2288302 to pageids
Appending 15375961 to pageids
Appending 289860 to pageids
Appending 13274389 to pageids
Appending 27395982 to pageids
Appending 38561540 to pageids
Appending 9170159 to pageids
Appending 16741381 to pageids
Appending 230834 to pageids
Appending 474813 to pageids
Appending 35026656 to pageids
Appending 1571780 to pageids
Appending 7174467 to pageids
Appending 34641430 to pageids
Appending 40409788 to pageids
Appending 33512934 to pageids
Appending 39922027 to pageids
Appending 10019306 to pageids
Appending 97922 to pageids
Appending 34413019 to pageids
Appending 51396023 to pageids
Appending 56249073 to pageids
Appending 348692 to pageids
Appending 33270868 to pageids
A

Category:Artificial intelligence conferences (en) data
{
  members: <list(18)> {'pageid': 24714635, 'ns': 0, 'title': 'AAAI...
  requests: <list(1)> category
}
en.wikipedia.org (categorymembers) Category:Signal processing con...


Appending 24714635 to pageids
Appending 57562297 to pageids
Appending 27562454 to pageids
Appending 57094780 to pageids
Appending 38629606 to pageids
Appending 22663171 to pageids
Appending 1175156 to pageids
Appending 1124646 to pageids
Appending 19834151 to pageids
Appending 3358541 to pageids
Appending 56416028 to pageids
Appending 18586449 to pageids
Appending 56417628 to pageids
Appending 2614944 to pageids
Appending 39758073 to pageids
Appending 30401716 to pageids
Appending 36494971 to pageids
Appending 27898066 to pageids
Appending Category:Signal processing conferences / count = 51
Checking for subcategories of Category:Signal processing conferences at depth 1


Category:Signal processing conferences (en) data
{
  members: <list(4)> {'pageid': 24073428, 'ns': 0, 'title': 'Confe...
  requests: <list(1)> category
}
en.wikipedia.org (categorymembers) Category:Evolutionary computation


Appending 24073428 to pageids
Appending 1175156 to pageids
Appending 22318852 to pageids
Appending 23376109 to pageids
Appending Category:Evolutionary computation / count = 52
Checking for subcategories of Category:Evolutionary computation at depth 1


Category:Evolutionary computation (en) data
{
  members: <list(25)> {'pageid': 268020, 'ns': 0, 'title': 'Evolut...
  requests: <list(1)> category
  subcategories: <list(4)> {'pageid': 811694, 'ns': 14, 'title': '...
}
en.wikipedia.org (categorymembers) Category:Artificial Intelligen...


Appending 268020 to pageids
Appending 25839999 to pageids
Appending 44636079 to pageids
Appending 1514142 to pageids
Appending 26536158 to pageids
Appending 39447416 to pageids
Appending 418075 to pageids
Appending 3062637 to pageids
Appending 15702071 to pageids
Appending 30035652 to pageids
Appending 1050195 to pageids
Appending 1098818 to pageids
Appending 49305019 to pageids
Appending 5751182 to pageids
Appending 21573718 to pageids
Appending 4483084 to pageids
Appending 901162 to pageids
Appending 12836631 to pageids
Appending 6548718 to pageids
Appending 1514566 to pageids
Appending 34542671 to pageids
Appending 344922 to pageids
Appending 1346015 to pageids
Appending 25056220 to pageids
Appending 36089423 to pageids
Appending Category:Artificial life / count = 53
Checking for subcategories of Category:Artificial life at depth 0
Appending Category:Digital organisms / count = 54
Checking for subcategories of Category:Digital organisms at depth 0
Appending Category:Evolutionary alg

Category:Artificial Intelligence existential risk (en) data
{
  members: <list(18)> {'pageid': 31641770, 'ns': 0, 'title': 'AI b...
  requests: <list(1)> category
}
en.wikipedia.org (categorymembers) Category:Artificial intelligen...


Appending 31641770 to pageids
Appending 50785023 to pageids
Appending 813176 to pageids
Appending 57638150 to pageids
Appending 36747851 to pageids
Appending 46583121 to pageids
Appending 42912557 to pageids
Appending 43591208 to pageids
Appending 46577193 to pageids
Appending 48718351 to pageids
Appending 55090826 to pageids
Appending 732167 to pageids
Appending 46504825 to pageids
Appending 48795986 to pageids
Appending 40930229 to pageids
Appending 726659 to pageids
Appending 43427948 to pageids
Appending 54245 to pageids
Appending Category:Artificial intelligence in fiction / count = 58
Checking for subcategories of Category:Artificial intelligence in fiction at depth 1


Category:Artificial intelligence in fiction (en) data
{
  members: <list(180)> {'pageid': 11746227, 'ns': 0, 'title': 'Art...
  requests: <list(1)> category
  subcategories: <list(13)> {'pageid': 41037406, 'ns': 14, 'title'...
}
en.wikipedia.org (categorymembers) Category:Fictional artificial ...


Appending 11746227 to pageids
Appending 11750751 to pageids
Appending 240931 to pageids
Appending 39350301 to pageids
Appending 1660387 to pageids
Appending 54532744 to pageids
Appending 2233223 to pageids
Appending 40008467 to pageids
Appending 49635999 to pageids
Appending 50178216 to pageids
Appending 636502 to pageids
Appending 41476698 to pageids
Appending 39171154 to pageids
Appending 47431252 to pageids
Appending 23903477 to pageids
Appending 12524432 to pageids
Appending 7491122 to pageids
Appending 39516319 to pageids
Appending 12690765 to pageids
Appending 44449 to pageids
Appending 1963048 to pageids
Appending 990322 to pageids
Appending 5584041 to pageids
Appending 4742 to pageids
Appending 184211 to pageids
Appending 600721 to pageids
Appending 42122348 to pageids
Appending 55503282 to pageids
Appending 855981 to pageids
Appending 2989163 to pageids
Appending 753201 to pageids
Appending 54799795 to pageids
Appending 6790509 to pageids
Appending 1594929 to pageids
Appending

Category:Fictional artificial intelligences (en) data
{
  members: <list(89)> {'pageid': 626913, 'ns': 0, 'title': 'Adam (...
  requests: <list(1)> category
  subcategories: <list(4)> {'pageid': 11305939, 'ns': 14, 'title':...
}
en.wikipedia.org (categorymembers) Category:Fuzzy logic


Appending 626913 to pageids
Appending 6422674 to pageids
Appending 1216187 to pageids
Appending 297146 to pageids
Appending 10330695 to pageids
Appending 805753 to pageids
Appending 27867111 to pageids
Appending 681260 to pageids
Appending 32891878 to pageids
Appending 2937935 to pageids
Appending 623280 to pageids
Appending 525160 to pageids
Appending 853152 to pageids
Appending 22233770 to pageids
Appending 13388109 to pageids
Appending 2123542 to pageids
Appending 1115033 to pageids
Appending 7708467 to pageids
Appending 6073394 to pageids
Appending 47676 to pageids
Appending 54442191 to pageids
Appending 28039836 to pageids
Appending 27076 to pageids
Appending 8412 to pageids
Appending 232212 to pageids
Appending 990490 to pageids
Appending 693362 to pageids
Appending 5523115 to pageids
Appending 302789 to pageids
Appending 50660484 to pageids
Appending 575649 to pageids
Appending 24102946 to pageids
Appending 1171000 to pageids
Appending 3177898 to pageids
Appending 26468301 to pa

Category:Fuzzy logic (en) data
{
  members: <list(56)> {'pageid': 1028978, 'ns': 0, 'title': 'Combs...
  requests: <list(1)> category
  subcategories: <list(1)> {'pageid': 15522913, 'ns': 14, 'title':...
}
en.wikipedia.org (categorymembers) Category:Game artificial intel...


Appending 1028978 to pageids
Appending 1973470 to pageids
Appending 49180 to pageids
Appending 46948278 to pageids
Appending 31663887 to pageids
Appending 30692142 to pageids
Appending 16991824 to pageids
Appending 9823717 to pageids
Appending 1035745 to pageids
Appending 537311 to pageids
Appending 28232105 to pageids
Appending 33876751 to pageids
Appending 1028755 to pageids
Appending 28492170 to pageids
Appending 11270885 to pageids
Appending 971690 to pageids
Appending 48660 to pageids
Appending 1958097 to pageids
Appending 22506808 to pageids
Appending 35890194 to pageids
Appending 14075975 to pageids
Appending 1626494 to pageids
Appending 22506723 to pageids
Appending 21787029 to pageids
Appending 8373333 to pageids
Appending 16415639 to pageids
Appending 56601 to pageids
Appending 1651811 to pageids
Appending 20814025 to pageids
Appending 8018271 to pageids
Appending 15639656 to pageids
Appending 33922707 to pageids
Appending 54036086 to pageids
Appending 13649912 to pageids
App

Category:Game artificial intelligence (en) data
{
  members: <list(48)> {'pageid': 1654769, 'ns': 0, 'title': 'Artif...
  requests: <list(1)> category
  subcategories: <list(10)> {'pageid': 47893983, 'ns': 14, 'title'...
}
en.wikipedia.org (categorymembers) Category:History of artificial...


Appending 1654769 to pageids
Appending 100558 to pageids
Appending 159501 to pageids
Appending 452582 to pageids
Appending 14993828 to pageids
Appending 284528 to pageids
Appending 7249174 to pageids
Appending 8539064 to pageids
Appending 207188 to pageids
Appending 39278567 to pageids
Appending 6172005 to pageids
Appending 68367 to pageids
Appending 227021 to pageids
Appending 64187 to pageids
Appending 501462 to pageids
Appending 19496019 to pageids
Appending 41755648 to pageids
Appending 159513 to pageids
Appending 1153192 to pageids
Appending 27164904 to pageids
Appending 28134188 to pageids
Appending 6476399 to pageids
Appending 7408685 to pageids
Appending 46519807 to pageids
Appending 253556 to pageids
Appending 2256654 to pageids
Appending 904345 to pageids
Appending 42146944 to pageids
Appending 173889 to pageids
Appending 23762260 to pageids
Appending 1074656 to pageids
Appending 25681069 to pageids
Appending 19589 to pageids
Appending 54451950 to pageids
Appending 1732703 to

Category:History of artificial intelligence (en) data
{
  members: <list(78)> {'pageid': 2894560, 'ns': 0, 'title': 'Histo...
  requests: <list(1)> category
}
en.wikipedia.org (categorymembers) Category:Human–computer interaction


Appending 2894560 to pageids
Appending 12413470 to pageids
Appending 8992008 to pageids
Appending 24109330 to pageids
Appending 3548574 to pageids
Appending 27562454 to pageids
Appending 3993712 to pageids
Appending 6487479 to pageids
Appending 7754370 to pageids
Appending 2995827 to pageids
Appending 8383635 to pageids
Appending 404048 to pageids
Appending 26258490 to pageids
Appending 1124646 to pageids
Appending 15938221 to pageids
Appending 1065253 to pageids
Appending 557912 to pageids
Appending 10235 to pageids
Appending 307517 to pageids
Appending 347832 to pageids
Appending 8170186 to pageids
Appending 27305427 to pageids
Appending 9924067 to pageids
Appending 33026100 to pageids
Appending 15848842 to pageids
Appending 25455946 to pageids
Appending 1234658 to pageids
Appending 42124868 to pageids
Appending 4891153 to pageids
Appending 27837170 to pageids
Appending 27303185 to pageids
Appending 649572 to pageids
Appending 303031 to pageids
Appending 33517824 to pageids
Appending

Category:Human–computer interaction (en) data
{
  members: <list(241)> {'pageid': 23534602, 'ns': 0, 'title': 'Hum...
  requests: <list(1)> category
  subcategories: <list(25)> {'pageid': 46305725, 'ns': 14, 'title'...
}
en.wikipedia.org (categorymembers) Category:Knowledge engineering


Appending 23534602 to pageids
Appending 904443 to pageids
Appending 10450559 to pageids
Appending 9192926 to pageids
Appending 20783988 to pageids
Appending 4184791 to pageids
Appending 38721413 to pageids
Appending 42136536 to pageids
Appending 21704475 to pageids
Appending 53584972 to pageids
Appending 52345954 to pageids
Appending 31030040 to pageids
Appending 56463048 to pageids
Appending 18915928 to pageids
Appending 7113944 to pageids
Appending 1375531 to pageids
Appending 1853276 to pageids
Appending 54517958 to pageids
Appending 34953916 to pageids
Appending 15481604 to pageids
Appending 44500218 to pageids
Appending 5548958 to pageids
Appending 43506102 to pageids
Appending 623686 to pageids
Appending 20349155 to pageids
Appending 16761943 to pageids
Appending 12805720 to pageids
Appending 46665674 to pageids
Appending 2792572 to pageids
Appending 5728377 to pageids
Appending 26336225 to pageids
Appending 3225176 to pageids
Appending 30306484 to pageids
Appending 26143506 to p

Category:Knowledge engineering (en) data
{
  members: <list(32)> {'pageid': 458499, 'ns': 0, 'title': 'Knowle...
  requests: <list(1)> category
  subcategories: <list(1)> {'pageid': 796635, 'ns': 14, 'title': '...
}
en.wikipedia.org (categorymembers) Category:Knowledge representation


Appending 458499 to pageids
Appending 26143506 to pageids
Appending 4985613 to pageids
Appending 38982174 to pageids
Appending 27928333 to pageids
Appending 35281629 to pageids
Appending 19330337 to pageids
Appending 469578 to pageids
Appending 485226 to pageids
Appending 22713707 to pageids
Appending 16070200 to pageids
Appending 25683591 to pageids
Appending 4724116 to pageids
Appending 18039001 to pageids
Appending 3517589 to pageids
Appending 1904337 to pageids
Appending 11856314 to pageids
Appending 610789 to pageids
Appending 12829835 to pageids
Appending 3030181 to pageids
Appending 1899829 to pageids
Appending 23470390 to pageids
Appending 20748299 to pageids
Appending 49681 to pageids
Appending 4696039 to pageids
Appending 20400528 to pageids
Appending 16743556 to pageids
Appending 23872172 to pageids
Appending 45274436 to pageids
Appending 13536810 to pageids
Appending 29123 to pageids
Appending 1958462 to pageids
Appending Category:Knowledge representation / count = 118
Chec

Category:Knowledge representation (en) data
{
  members: <list(200)> {'pageid': 16920, 'ns': 0, 'title': 'Knowle...
  requests: <list(1)> category
  subcategories: <list(18)> {'pageid': 11461295, 'ns': 14, 'title'...
}
en.wikipedia.org (categorymembers) Category:Artificial intelligen...


Appending 16920 to pageids
Appending 3348350 to pageids
Appending 5465589 to pageids
Appending 5465644 to pageids
Appending 23241698 to pageids
Appending 5465574 to pageids
Appending 54186633 to pageids
Appending 10483232 to pageids
Appending 6190251 to pageids
Appending 6520028 to pageids
Appending 7512482 to pageids
Appending 43480298 to pageids
Appending 37291130 to pageids
Appending 18025074 to pageids
Appending 17952329 to pageids
Appending 1187311 to pageids
Appending 853832 to pageids
Appending 49726563 to pageids
Appending 3388492 to pageids
Appending 57078271 to pageids
Appending 72717 to pageids
Appending 2889648 to pageids
Appending 5625552 to pageids
Appending 12680566 to pageids
Appending 46926920 to pageids
Appending 2526582 to pageids
Appending 1385766 to pageids
Appending 6888 to pageids
Appending 25154746 to pageids
Appending 4476270 to pageids
Appending 39089943 to pageids
Appending 698226 to pageids
Appending 346755 to pageids
Appending 38982174 to pageids
Appending 

Category:Artificial intelligence laboratories (en) data
{
  members: <list(24)> {'pageid': 57444980, 'ns': 0, 'title': 'Acti...
  requests: <list(1)> category
}
en.wikipedia.org (categorymembers) Category:Logic programming


Appending 57444980 to pageids
Appending 35782731 to pageids
Appending 33900354 to pageids
Appending 7753733 to pageids
Appending 14027739 to pageids
Appending 4103607 to pageids
Appending 41755648 to pageids
Appending 6610123 to pageids
Appending 15034 to pageids
Appending 2828572 to pageids
Appending 1247612 to pageids
Appending 9168065 to pageids
Appending 2227943 to pageids
Appending 33524353 to pageids
Appending 434274 to pageids
Appending 48795986 to pageids
Appending 54407907 to pageids
Appending 12685352 to pageids
Appending 29152434 to pageids
Appending 29152426 to pageids
Appending 23087437 to pageids
Appending 4225178 to pageids
Appending 41268341 to pageids
Appending 23981191 to pageids
Appending Category:Logic programming / count = 139
Checking for subcategories of Category:Logic programming at depth 1


Category:Logic programming (en) data
{
  members: <list(48)> {'pageid': 17927, 'ns': 0, 'title': 'Logic p...
  requests: <list(1)> category
  subcategories: <list(10)> {'pageid': 24525021, 'ns': 14, 'title'...
}
en.wikipedia.org (categorymembers) Category:Machine learning


Appending 17927 to pageids
Appending 31103500 to pageids
Appending 12737925 to pageids
Appending 8992008 to pageids
Appending 2386211 to pageids
Appending 2738697 to pageids
Appending 1187311 to pageids
Appending 2634917 to pageids
Appending 4638484 to pageids
Appending 2526582 to pageids
Appending 14734259 to pageids
Appending 10904266 to pageids
Appending 4467477 to pageids
Appending 42755423 to pageids
Appending 5421193 to pageids
Appending 4249442 to pageids
Appending 7260862 to pageids
Appending 889639 to pageids
Appending 2634860 to pageids
Appending 2628057 to pageids
Appending 3338671 to pageids
Appending 39813016 to pageids
Appending 2897680 to pageids
Appending 11306 to pageids
Appending 31798895 to pageids
Appending 727607 to pageids
Appending 2636072 to pageids
Appending 2681863 to pageids
Appending 41644056 to pageids
Appending 39116526 to pageids
Appending 42480453 to pageids
Appending 2526537 to pageids
Appending 1063946 to pageids
Appending 2692616 to pageids
Appending 

Category:Machine learning (en) data
{
  members: <list(199)> {'pageid': 233488, 'ns': 0, 'title': 'Machi...
  requests: <list(1)> category
  subcategories: <list(31)> {'pageid': 33547387, 'ns': 14, 'title'...
}
en.wikipedia.org (categorymembers) Category:Mind–body problem


Appending 233488 to pageids
Appending 49082762 to pageids
Appending 53587467 to pageids
Appending 3771060 to pageids
Appending 43808044 to pageids
Appending 28801798 to pageids
Appending 45049676 to pageids
Appending 52642349 to pageids
Appending 30511763 to pageids
Appending 50773876 to pageids
Appending 55817338 to pageids
Appending 20890511 to pageids
Appending 19463198 to pageids
Appending 55843837 to pageids
Appending 14003441 to pageids
Appending 31877832 to pageids
Appending 9732182 to pageids
Appending 35867897 to pageids
Appending 40973765 to pageids
Appending 50211107 to pageids
Appending 40678189 to pageids
Appending 55075082 to pageids
Appending 205393 to pageids
Appending 50646178 to pageids
Appending 1191936 to pageids
Appending 44439173 to pageids
Appending 53631046 to pageids
Appending 39182554 to pageids
Appending 8964665 to pageids
Appending 17114678 to pageids
Appending 22795783 to pageids
Appending 28650287 to pageids
Appending 2934910 to pageids
Appending 9583985 t

Category:Mind–body problem (en) data
{
  members: <list(12)> {'pageid': 11081176, 'ns': 0, 'title': 'Mind...
  requests: <list(1)> category
  subcategories: <list(4)> {'pageid': 1056640, 'ns': 14, 'title': ...
}
en.wikipedia.org (categorymembers) Category:Multi-agent systems


Appending 11081176 to pageids
Appending 540801 to pageids
Appending 20651606 to pageids
Appending 42371992 to pageids
Appending 33034640 to pageids
Appending 35182952 to pageids
Appending 262714 to pageids
Appending 192355 to pageids
Appending 634216 to pageids
Appending 36502973 to pageids
Appending 41856558 to pageids
Appending 146062 to pageids
Appending Category:Behaviorism / count = 183
Checking for subcategories of Category:Behaviorism at depth 0
Appending Category:Dualism (philosophy of mind) / count = 184
Checking for subcategories of Category:Dualism (philosophy of mind) at depth 0
Appending Category:Monism / count = 185
Checking for subcategories of Category:Monism at depth 0
Appending Category:Physicalism / count = 186
Checking for subcategories of Category:Physicalism at depth 0
Appending Category:Multi-agent systems / count = 187
Checking for subcategories of Category:Multi-agent systems at depth 1


Category:Multi-agent systems (en) data
{
  members: <list(98)> {'pageid': 50292266, 'ns': 0, 'title': 'Agen...
  requests: <list(1)> category
  subcategories: <list(4)> {'pageid': 919839, 'ns': 14, 'title': '...
}
en.wikipedia.org (categorymembers) Category:Open-source artificia...


Appending 50292266 to pageids
Appending 938833 to pageids
Appending 29360873 to pageids
Appending 5229527 to pageids
Appending 18902782 to pageids
Appending 3348350 to pageids
Appending 1109552 to pageids
Appending 985619 to pageids
Appending 29782518 to pageids
Appending 21687295 to pageids
Appending 22623404 to pageids
Appending 26251183 to pageids
Appending 24259380 to pageids
Appending 15574908 to pageids
Appending 1908395 to pageids
Appending 28205830 to pageids
Appending 12342665 to pageids
Appending 473449 to pageids
Appending 1974235 to pageids
Appending 636268 to pageids
Appending 29474849 to pageids
Appending 1109117 to pageids
Appending 47902284 to pageids
Appending 53849136 to pageids
Appending 47902724 to pageids
Appending 17974229 to pageids
Appending 26270834 to pageids
Appending 26070855 to pageids
Appending 27046624 to pageids
Appending 29584036 to pageids
Appending 237629 to pageids
Appending 23426266 to pageids
Appending 28202716 to pageids
Appending 2961998 to pagei

Category:Open-source artificial intelligence (en) data
{
  members: <list(11)> {'pageid': 53194499, 'ns': 0, 'title': 'Chai...
  requests: <list(1)> category
  subcategories: <list(1)> {'pageid': 21174286, 'ns': 14, 'title':...
}
en.wikipedia.org (categorymembers) Category:Philosophy of artific...


Appending 53194499 to pageids
Appending 43169442 to pageids
Appending 49594059 to pageids
Appending 295299 to pageids
Appending 48795986 to pageids
Appending 22837841 to pageids
Appending 22815 to pageids
Appending 42129549 to pageids
Appending 54022970 to pageids
Appending 57741272 to pageids
Appending 48508507 to pageids
Appending Category:Free artificial intelligence applications / count = 193
Checking for subcategories of Category:Free artificial intelligence applications at depth 0
Appending Category:Philosophy of artificial intelligence / count = 194
Checking for subcategories of Category:Philosophy of artificial intelligence at depth 1


Category:Philosophy of artificial intelligence (en) data
{
  members: <list(39)> {'pageid': 2958015, 'ns': 0, 'title': 'Philo...
  requests: <list(1)> category
  subcategories: <list(1)> {'pageid': 1196890, 'ns': 14, 'title': ...
}
en.wikipedia.org (categorymembers) Category:Artificial intelligen...


Appending 2958015 to pageids
Appending 55962927 to pageids
Appending 31641770 to pageids
Appending 50785023 to pageids
Appending 21666977 to pageids
Appending 55817338 to pageids
Appending 9755539 to pageids
Appending 52411579 to pageids
Appending 16707204 to pageids
Appending 55941593 to pageids
Appending 1134562 to pageids
Appending 6216 to pageids
Appending 3951220 to pageids
Appending 2712053 to pageids
Appending 404048 to pageids
Appending 263636 to pageids
Appending 1124646 to pageids
Appending 56221934 to pageids
Appending 8919856 to pageids
Appending 33034640 to pageids
Appending 433005 to pageids
Appending 13659583 to pageids
Appending 351887 to pageids
Appending 46577193 to pageids
Appending 32237314 to pageids
Appending 46181931 to pageids
Appending 47937215 to pageids
Appending 12476035 to pageids
Appending 404037 to pageids
Appending 23569174 to pageids
Appending 4522868 to pageids
Appending 2685999 to pageids
Appending 30405742 to pageids
Appending 20044858 to pageids
App

Category:Artificial intelligence publications (en) data
{
  members: <list(24)> {'pageid': 31790538, 'ns': 0, 'title': 'Adap...
  requests: <list(1)> category
}
en.wikipedia.org (categorymembers) Category:Artificial intelligen...


Appending 31790538 to pageids
Appending 24109330 to pageids
Appending 40956547 to pageids
Appending 5841092 to pageids
Appending 566680 to pageids
Appending 8839580 to pageids
Appending 2911050 to pageids
Appending 404048 to pageids
Appending 28394308 to pageids
Appending 14509578 to pageids
Appending 38365867 to pageids
Appending 12953341 to pageids
Appending 12953325 to pageids
Appending 52318213 to pageids
Appending 2468974 to pageids
Appending 454351 to pageids
Appending 21393064 to pageids
Appending 25512652 to pageids
Appending 1196709 to pageids
Appending 9756006 to pageids
Appending 53889803 to pageids
Appending 43054485 to pageids
Appending 295320 to pageids
Appending 1804053 to pageids
Appending Category:Artificial intelligence researchers / count = 197
Checking for subcategories of Category:Artificial intelligence researchers at depth 1


Category:Artificial intelligence researchers (en) data
{
  members: <list(299)> {'pageid': 382535, 'ns': 0, 'title': 'Hal A...
  requests: <list(1)> category
  subcategories: <list(4)> {'pageid': 22121360, 'ns': 14, 'title':...
}
en.wikipedia.org (categorymembers) Category:Robotics


Appending 382535 to pageids
Appending 632583 to pageids
Appending 13744516 to pageids
Appending 18036927 to pageids
Appending 16712518 to pageids
Appending 298926 to pageids
Appending 30783354 to pageids
Appending 21410069 to pageids
Appending 30968392 to pageids
Appending 316559 to pageids
Appending 6151319 to pageids
Appending 47749536 to pageids
Appending 19301286 to pageids
Appending 55086225 to pageids
Appending 21109528 to pageids
Appending 24310293 to pageids
Appending 3777527 to pageids
Appending 30738275 to pageids
Appending 3011693 to pageids
Appending 1998313 to pageids
Appending 2922454 to pageids
Appending 35253686 to pageids
Appending 853832 to pageids
Appending 5522291 to pageids
Appending 1562996 to pageids
Appending 24021705 to pageids
Appending 632498 to pageids
Appending 6320384 to pageids
Appending 11701105 to pageids
Appending 1058864 to pageids
Appending 14459292 to pageids
Appending 3360396 to pageids
Appending 12017224 to pageids
Appending 48874465 to pageids
Ap

Category:Robotics (en) data
{
  members: <list(120)> {'pageid': 25781, 'ns': 0, 'title': 'Robot'...
  requests: <list(1)> category
  subcategories: <list(35)> {'pageid': 943048, 'ns': 14, 'title': ...
}
en.wikipedia.org (categorymembers) Category:Robots


Appending 25781 to pageids
Appending 20903754 to pageids
Appending 43065875 to pageids
Appending 19148519 to pageids
Appending 24684701 to pageids
Appending 48805092 to pageids
Appending 47796548 to pageids
Appending 1367992 to pageids
Appending 1908395 to pageids
Appending 49803819 to pageids
Appending 31976229 to pageids
Appending 189749 to pageids
Appending 245926 to pageids
Appending 12342665 to pageids
Appending 44484413 to pageids
Appending 53810462 to pageids
Appending 508896 to pageids
Appending 1006293 to pageids
Appending 1673867 to pageids
Appending 27792533 to pageids
Appending 49944357 to pageids
Appending 47133704 to pageids
Appending 44628427 to pageids
Appending 54982564 to pageids
Appending 2934910 to pageids
Appending 28928489 to pageids
Appending 57834314 to pageids
Appending 45533018 to pageids
Appending 51925440 to pageids
Appending 20756967 to pageids
Appending 50952451 to pageids
Appending 53815584 to pageids
Appending 56275884 to pageids
Appending 48834013 to pa

Category:Robots (en) data
{
  members: <list(35)> {'pageid': 25781, 'ns': 0, 'title': 'Robot'}...
  requests: <list(1)> category
  subcategories: <list(38)> {'pageid': 41247177, 'ns': 14, 'title'...
}
en.wikipedia.org (categorymembers) Category:Rule engines


Appending 25781 to pageids
Appending 11005995 to pageids
Appending 2938409 to pageids
Appending 48049 to pageids
Appending 11559431 to pageids
Appending 2999200 to pageids
Appending 6724915 to pageids
Appending 3808074 to pageids
Appending 2245058 to pageids
Appending 3926040 to pageids
Appending 7501487 to pageids
Appending 14716135 to pageids
Appending 57638150 to pageids
Appending 713 to pageids
Appending 37663509 to pageids
Appending 56349200 to pageids
Appending 46434772 to pageids
Appending 43046621 to pageids
Appending 48050589 to pageids
Appending 47133704 to pageids
Appending 57790317 to pageids
Appending 5115708 to pageids
Appending 43616041 to pageids
Appending 44195492 to pageids
Appending 48560508 to pageids
Appending 22259308 to pageids
Appending 50571328 to pageids
Appending 42943542 to pageids
Appending 56428662 to pageids
Appending 7068605 to pageids
Appending 55790714 to pageids
Appending 57375524 to pageids
Appending 39338990 to pageids
Appending 47502544 to pageids


Category:Rule engines (en) data
{
  members: <list(21)> {'pageid': 13536810, 'ns': 0, 'title': 'Sema...
  requests: <list(1)> category
  subcategories: <list(1)> {'pageid': 5679660, 'ns': 14, 'title': ...
}
en.wikipedia.org (categorymembers) Category:Turing tests


Appending 13536810 to pageids
Appending 2271744 to pageids
Appending 1867853 to pageids
Appending 2052570 to pageids
Appending 15900338 to pageids
Appending 5421193 to pageids
Appending 25260147 to pageids
Appending 35281629 to pageids
Appending 44425089 to pageids
Appending 10706365 to pageids
Appending 9519016 to pageids
Appending 41442396 to pageids
Appending 37091 to pageids
Appending 2645943 to pageids
Appending 29749471 to pageids
Appending 14996462 to pageids
Appending 5006904 to pageids
Appending 31103500 to pageids
Appending 19768799 to pageids
Appending 1568414 to pageids
Appending 1254288 to pageids
Appending Category:Expert systems / count = 278
Checking for subcategories of Category:Expert systems at depth 0
Appending Category:Turing tests / count = 279
Checking for subcategories of Category:Turing tests at depth 1


Category:Turing tests (en) data
{
  members: <list(11)> {'pageid': 21391751, 'ns': 0, 'title': 'Turi...
  requests: <list(1)> category
  subcategories: <list(1)> {'pageid': 8735377, 'ns': 14, 'title': ...
}
en.wikipedia.org (categorymembers) Category:Virtual assistants


Appending 21391751 to pageids
Appending 230834 to pageids
Appending 19451459 to pageids
Appending 16795043 to pageids
Appending 3897595 to pageids
Appending 28037152 to pageids
Appending 11451897 to pageids
Appending 723435 to pageids
Appending 3682165 to pageids
Appending 48589354 to pageids
Appending 44391058 to pageids
Appending Category:Chatterbots / count = 280
Checking for subcategories of Category:Chatterbots at depth 0
Appending Category:Virtual assistants / count = 281
Checking for subcategories of Category:Virtual assistants at depth 1


Category:Virtual assistants (en) data
{
  members: <list(27)> {'pageid': 19024298, 'ns': 0, 'title': 'Virt...
  requests: <list(1)> category
}
en.wikipedia.org (categorymembers) Category:Artificial intelligen...


Appending 19024298 to pageids
Appending 55531461 to pageids
Appending 56366274 to pageids
Appending 51060375 to pageids
Appending 43144075 to pageids
Appending 53639024 to pageids
Appending 13494976 to pageids
Appending 39791384 to pageids
Appending 56327946 to pageids
Appending 42119832 to pageids
Appending 50575063 to pageids
Appending 36279735 to pageids
Appending 55726033 to pageids
Appending 44969076 to pageids
Appending 52396698 to pageids
Appending 42370155 to pageids
Appending 53802682 to pageids
Appending 43236335 to pageids
Appending 50231947 to pageids
Appending 35713574 to pageids
Appending 38350839 to pageids
Appending 26086272 to pageids
Appending 17540116 to pageids
Appending 50491264 to pageids
Appending 40615979 to pageids
Appending 21903944 to pageids
Appending 56885536 to pageids
Appending Category:Artificial intelligence stubs / count = 282
Checking for subcategories of Category:Artificial intelligence stubs at depth 1
Appending 31663887 to pageids
Appending 4510677

Category:Artificial intelligence stubs (en) data
{
  members: <list(116)> {'pageid': 31663887, 'ns': 0, 'title': 'Ada...
  requests: <list(1)> category
}


In [254]:
len(crawler.categories)

282

In [255]:
len(crawler.pageids)

2714

In [288]:
page = wptools.page(pageid=1648132).get_parse()

en.wikipedia.org (parse) 1648132
Weak AI (en) data
{
  pageid: 1648132
  parsetree: <str(3573)> <root><template><title>Use dmy dates</tit...
  requests: <list(1)> parse
  title: Weak AI
  wikibase: Q17097955
  wikidata_url: https://www.wikidata.org/wiki/Q17097955
  wikitext: <str(2870)> {{Use dmy dates|date=January 2015}}'''Weak...
}


In [289]:
page.get_parse()

+ parse results in cache


<wptools.page.WPToolsPage at 0x7f5b8430bb00>

In [290]:
text = page.data['wikitext']

In [291]:
page.get_query().data['url']

en.wikipedia.org (query) Weak AI
Weak AI (en) data
{
  assessments: <dict(5)> Linguistics, Systems, Computing, Technolo...
  extext: <str(1261)> **Weak artificial intelligence** ( **weak AI...
  extract: <str(1305)> <p><b>Weak artificial intelligence</b> (<b>...
  label: Weak AI
  length: 2,893
  links: <list(13)> Artificial general intelligence, Artificial in...
  modified: <dict(1)> page
  pageid: 1648132
  parsetree: <str(3573)> <root><template><title>Use dmy dates</tit...
  random: Arturo Mas
  redirects: <list(4)> {'pageid': 7869823, 'ns': 0, 'title': 'Weak...
  requests: <list(2)> parse, query
  title: Weak AI
  url: https://en.wikipedia.org/wiki/Weak_AI
  url_raw: https://en.wikipedia.org/wiki/Weak_AI?action=raw
  watchers: 33
  wikibase: Q17097955
  wikidata_url: https://www.wikidata.org/wiki/Q17097955
  wikitext: <str(2870)> {{Use dmy dates|date=January 2015}}'''Weak...
}


'https://en.wikipedia.org/wiki/Weak_AI'

In [292]:
from bs4 import BeautifulSoup

In [293]:
soup = BeautifulSoup(text, 'html.parser')

In [294]:
txt2 = soup.get_text()

In [295]:
import re
re.sub(r"\s*{.*}\s*|\s*\[.*\]\s*", " ", txt2)

' \'\'\'Weak artificial intelligence\'\'\' (\'\'\'weak AI\'\'\'), also known as \'\'\'narrow AI\'\'\',io9.com mentions narrow AI. Published 1 April 2013, retrieved 16 February 2014: http://io9.com/how-much-longer-before-our-first-ai-catastrophe-464043243AI researcher Ben Goertzel explains why he became interested in AGI instead of narrow AI. Published 18 Oct 2013. Retrieved 16 February 2014. http://intelligence.org/2013/10/18/ben-goertzel/TechCrunch discusses AI App building regarding Narrow AI. Published 16 Oct 2015, retrieved 17 Oct 2015. https://techcrunch.com/2015/10/15/machine-learning-its-the-hard-problems-that-are-valuable/ is (a machine with the ability to apply intelligence to any problem, rather than just one specific problem). All currently existing systems considered artificial intelligence of any sort are weak AI at most. , on his blog in 2010, stated Siri was "VERY narrow and brittle" evidenced by annoying results if you ask questions outside the limits of the application