Skip to content

Commit

Permalink
added random pages
Browse files Browse the repository at this point in the history
  • Loading branch information
Tom committed Mar 21, 2013
1 parent 2bb031f commit f7086dd
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 10 deletions.
2 changes: 1 addition & 1 deletion tests.py
Expand Up @@ -7,7 +7,7 @@ def printJSON(ugly_json):

def main():
w = wiki.Wiki() # defaults to English Wikipedia
page = w.getPage('Wikipedia')
page = w.getPage('Python (programming language)')
with open('temp.txt', 'w') as f:
f.write(page.getArticle().decode('ascii', 'ignore'))
f.close()
Expand Down
32 changes: 23 additions & 9 deletions wiki.py
Expand Up @@ -9,12 +9,19 @@ def __init__(self, endpoint='http://en.wikipedia.org/w/api.php'):
self.endpoint = endpoint

class Page():
def __init__(self, endpoint, title):
payload = {'format': 'json',
'action': 'query',
'titles': title,
'prop': 'revisions',
'rvprop': 'content'}
def __init__(self, endpoint, title='', random=False):
if random:
payload = {'format': 'json',
'action': 'query',
'generator': 'random',
'prop': 'revisions',
'rvprop': 'content'}
else:
payload = {'format': 'json',
'action': 'query',
'titles': title,
'prop': 'revisions',
'rvprop': 'content'}
r = requests.get(endpoint, params=payload, headers=headers)
self.json = r.json()

Expand All @@ -24,15 +31,19 @@ def getContent(self):

def getArticle(self):
content = self.getContent()
i = content[:content.index("'''")].rindex('}}')+ len('}}')
content = content[i:]
if "'''" in content:
content_head = content[:content.index("'''")]
if '}}' in content_head:
article_start = content_head.rindex('}}')+ len('}}')
content = content[article_start:]
article_endings = ['== See also ==', '==See also==',
'== References ==', '==References==',
'== Further reading ==', '==Further reading==',
'== External links ==', '==External links==']
for ending in article_endings:
if ending in content:
return content[:content.index(ending)].strip()
return content.strip()

def getReferences(self):
content = self.getContent()
Expand All @@ -43,4 +54,7 @@ def getReferences(self):
return 'No references found.'

def getPage(self, title):
return self.Page(self.endpoint, title)
return self.Page(self.endpoint, title)

def getRandomPage(self):
return self.Page(self.endpoint, random=True)

0 comments on commit f7086dd

Please sign in to comment.