# Wikipedia API

Experiments with the Wikipedia API

In [2]:
import mwapi

session = mwapi.Session('https://en.wikipedia.org', user_agent="<jeffrey.arnold@gmail.com>")

# Backlog 

The page https://en.wikipedia.org/wiki/Wikipedia:Backlog contains the cleanup templates and lists common styles.

# Article Message Templates

Article message templates contain most cleanup templates that are large

In [12]:
import re
import requests
import itertools

def template_name(x):
    return re.sub("^Template:", "", x).strip()

params = {
      'action': 'query', 
      'generator': 'categorymembers', 
      'gcmtitle': None,
      'gcmnamespace': 10,
      'prop': 'redirects'
    }

def query(params):
    params['action'] = 'query'
    params['format'] = 'json'
    lastContinue = {}
    while True:
        # Clone original request
        req = params.copy()
        # Modify it with the values returned in the 'continue' section of the last result.
        req.update(lastContinue)
        # Call API
        result = requests.get('https://en.wikipedia.org/w/api.php', params=req).json()
        if 'error' in result:
            raise Error(result['error'])
        if 'warnings' in result:
            print(result['warnings'])
        if 'query' in result:
            yield result['query']
        if 'continue' not in result:
            break
        lastContinue = result['continue']

def get_pages(params, max_queries=None):
    for result in itertools.islice(query(params), max_queries):
        for page in result['pages'].items():
            yield page



Neutrality

- https://en.wikipedia.org/wiki/Category:Neutrality_templates

- https://en.wikipedia.org/wiki/Category:Inline_dispute_templates


Non editedable

- https://en.wikipedia.org/wiki/Category:Wikipedia_conflict_of_interest_templates
- https://en.wikipedia.org/wiki/Category:Dispute_templates


- https://en.wikipedia.org/wiki/Category:Coherency_templates
- https://en.wikipedia.org/wiki/Category:Copyright_maintenance_templates

- https://en.wikipedia.org/wiki/Category:Inline_cleanup_templates
- https://en.wikipedia.org/wiki/Category:Inline_citation_cleanup_templates
- https://en.wikipedia.org/wiki/Category:Inline_citation_and_verifiability_dispute_templates
- https://en.wikipedia.org/wiki/Category:Inline_dispute_templates

- https://en.wikipedia.org/wiki/Category:Dispute_templates
- https://en.wikipedia.org/wiki/Category:Neutrality_templates
- https://en.wikipedia.org/wiki/Category:Quotation_cleanup_templates
- https://en.wikipedia.org/wiki/Category:Introduction_cleanup_maintenance_templates
- https://en.wikipedia.org/wiki/Category:List_cleanup_maintenance_templates

In [10]:
categories = ["Neutrality templates", "Inline dispute templates", "Coherency templates", ""

In [79]:
inline_cleanup_templates

- https://en.wikipedia.org/wiki/Category:Inline_dispute_templates
- https://en.wikipedia.org/wiki/Category:Dispute_templates
- https://en.wikipedia.org/wiki/Category:Inline_citation_and_verifiability_dispute_templates
- https://en.wikipedia.org/wiki/Category:Inline_dispute_templates
- https://en.wikipedia.org/wiki/Category:Wikipedia_maintenance_templates

In [80]:
inline_cleanup_templates

{'5632010': {'pageid': 5632010,
  'ns': 10,
  'title': 'Template:Fix',
  'redirects': [{'pageid': 43931470, 'ns': 10, 'title': 'Template:Fake fix'}]},
 '6245183': {'pageid': 6245183, 'ns': 10, 'title': 'Template:Ambiguous'},
 '11446646': {'pageid': 11446646,
  'ns': 10,
  'title': 'Template:Anachronism inline'},
 '12503503': {'pageid': 12503503,
  'ns': 10,
  'title': 'Template:Attribution needed'},
 '25610087': {'pageid': 25610087, 'ns': 10, 'title': 'Template:Awkward'},
 '39275128': {'pageid': 39275128,
  'ns': 10,
  'title': 'Template:Bare URL inline'},
 '57798416': {'pageid': 57798416, 'ns': 10, 'title': 'Template:Brexit note'},
 '30304886': {'pageid': 30304886,
  'ns': 10,
  'title': 'Template:Buzzword inline'},
 '39643223': {'pageid': 39643223, 'ns': 10, 'title': 'Template:By how much'},
 '17004352': {'pageid': 17004352, 'ns': 10, 'title': 'Template:Called'},
 '34142075': {'pageid': 34142075,
  'ns': 10,
  'title': 'Template:Check quotation'},
 '42463713': {'pageid': 42463713,
  

In [68]:
def continuation(session, method, params=None, files=None, auth=None):
    if 'continue' not in params:
        params['continue'] = ''

    while True:
        try:
            doc = session._request(method, params=params, files=files, auth=None)
        except mwapi.errors.APIError:
            print(params)
            print(doc)
            break
        yield doc
        if 'continue' not in doc:
            break
        # re-send all continue values in the next call
        params.update(doc['continue'])
        files = None  # Don't send files again


{'Clarify timeframe', 'Copy edit inline'}

In [37]:
inline_templates

set()

# Get Inline Cleanup Templates

In [40]:
import itertools

In [13]:
templates = session.request(method='GET',
                                 params={
                                      'action':'query', 
                                      'generator':'categorymembers', 
                                      'gcmtitle':'Category:Inline cleanup templates',
                                      'prop': 'info|categories|redirects'
                                    },
                            continuation=True)

In [14]:
for r in templates:
    print(r)

{'continue': {'clcontinue': '12503503|Inline_cleanup_templates', 'rdcontinue': 'Bare_URL_inline|49364420', 'continue': '||info'}, 'query': {'pages': {'5632010': {'pageid': 5632010, 'ns': 10, 'title': 'Template:Fix', 'contentmodel': 'wikitext', 'pagelanguage': 'en', 'pagelanguagehtmlcode': 'en', 'pagelanguagedir': 'ltr', 'touched': '2018-09-20T17:03:50Z', 'lastrevid': 814292698, 'length': 1113, 'categories': [{'ns': 14, 'title': 'Category:Exclude in print'}, {'ns': 14, 'title': 'Category:Inline cleanup templates'}, {'ns': 14, 'title': 'Category:Wikipedia fully-protected templates'}, {'ns': 14, 'title': 'Category:Wikipedia metatemplates'}]}, '6245183': {'pageid': 6245183, 'ns': 10, 'title': 'Template:Ambiguous', 'contentmodel': 'wikitext', 'pagelanguage': 'en', 'pagelanguagehtmlcode': 'en', 'pagelanguagedir': 'ltr', 'touched': '2018-09-21T14:36:27Z', 'lastrevid': 635056001, 'length': 541, 'categories': [{'ns': 14, 'title': 'Category:Coherency templates'}, {'ns': 14, 'title': 'Category:In

APIError: badcontinue: Invalid continue param. You should pass the original value returned by the previous query. -- See https://en.wikipedia.org/w/api.php for API usage. Subscribe to the mediawiki-api-announce mailing list at &lt;https://lists.wikimedia.org/mailman/listinfo/mediawiki-api-announce&gt; for notice of API deprecations and breaking changes.

# Pages that include a Template

In [27]:
session.get(action='query', generator='embeddedin', geititle='Template:POV')

{'batchcomplete': '',
 'continue': {'geicontinue': '0|5597', 'continue': 'geicontinue||'},
 'query': {'pages': {'1724': {'pageid': 1724,
    'ns': 0,
    'title': 'Ammonius Saccas'},
   '2474': {'pageid': 2474, 'ns': 0, 'title': 'Abaddon'},
   '2875': {'pageid': 2875,
    'ns': 0,
    'title': 'Archduke Charles, Duke of Teschen'},
   '3382': {'pageid': 3382, 'ns': 0, 'title': 'Britney Spears'},
   '3867': {'pageid': 3867, 'ns': 1, 'title': 'Talk:Blue law'},
   '4778': {'pageid': 4778, 'ns': 0, 'title': 'Branch Davidians'},
   '4978': {'pageid': 4978, 'ns': 0, 'title': 'Battle of Berestechko'},
   '5249': {'pageid': 5249, 'ns': 0, 'title': 'Crony capitalism'},
   '5428': {'pageid': 5428, 'ns': 0, 'title': 'History of Cambodia'},
   '5593': {'pageid': 5593, 'ns': 0, 'title': 'Cyprus'}}}}

# Lacking References

- ``{{citation needed}}``  (inline)


# Accuracy

- Boxes: ``{{POV}}``, ``{{POV section}}``, ``{{POV lead}}``, ``{{POV check}}``, ``{{Systemic bias}}``
- Inline: ``{{Lopsided}}``, ``{{POV statement}}``, ``{{POV check inline}}``
- ``{{POV-check}}``
- ``{{Weasel}}`` ``{{Weasel inline}}``, ``{{Who inline}}``, ``{{By whom}}``
- ``{{Peacock}}`` ``{{Peacock term}}``


# Style

- ``{{Advert}}``, ``{{Newsrelease}}``, ``{{Advert inline}}``
- ``{{Promotion inline}}``, ``{{Fanpov}}``, ``{{Like resume}}``



In [35]:
res = session.get(action='query',
            prop='revisions', 
            generator='embeddedin', 
            geititle='Template:POV',
            rvslots='main', 
            rvprop='timestamp|sha1|content')         

The template for good article is https://en.wikipedia.org/wiki/Wikipedia:Featured_articles