From bfbae9587fe0a04f8157fd1d51ab2602c1b1e817 Mon Sep 17 00:00:00 2001 From: pa1007 Date: Wed, 23 Feb 2022 15:11:52 +0100 Subject: [PATCH 1/3] Add alias calls from API --- tests/mock_data.py | 145 ++++++++++++++++++++++++--------------- tests/page_terms_test.py | 34 +++++++++ wikipediaapi/__init__.py | 91 ++++++++++++++++++++++-- 3 files changed, 212 insertions(+), 58 deletions(-) create mode 100644 tests/page_terms_test.py diff --git a/tests/mock_data.py b/tests/mock_data.py index f6b95b3..b4985c6 100644 --- a/tests/mock_data.py +++ b/tests/mock_data.py @@ -30,30 +30,30 @@ def wikipedia_api_request(page, params): "ns": 0, "title": "Test 1", "extract": ( - "Summary text\n\n\n" + - "== Section 1 ==\n" + - "Text for section 1\n\n\n" + - "=== Section 1.1 ===\n" + - "Text for section 1.1\n\n\n" + - "=== Section 1.2 ===\n" + - "Text for section 1.2\n\n\n" + - "== Section 2 ==\n" + - "Text for section 2\n\n\n" + - "== Section 3 ==\n" + - "Text for section 3\n\n\n" + - "== Section 4 ==\n\n\n" + - "=== Section 4.1 ===\n" + - "Text for section 4.1\n\n\n" + - "=== Section 4.2 ===\n" + - "Text for section 4.2\n\n\n" + - "==== Section 4.2.1 ====\n" + - "Text for section 4.2.1\n\n\n" + - "==== Section 4.2.2 ====\n" + - "Text for section 4.2.2\n\n\n" + - "== Section 5 ==\n" + - "Text for section 5\n\n\n" + - "=== Section 5.1 ===\n" + - "Text for section 5.1\n" + "Summary text\n\n\n" + + "== Section 1 ==\n" + + "Text for section 1\n\n\n" + + "=== Section 1.1 ===\n" + + "Text for section 1.1\n\n\n" + + "=== Section 1.2 ===\n" + + "Text for section 1.2\n\n\n" + + "== Section 2 ==\n" + + "Text for section 2\n\n\n" + + "== Section 3 ==\n" + + "Text for section 3\n\n\n" + + "== Section 4 ==\n\n\n" + + "=== Section 4.1 ===\n" + + "Text for section 4.1\n\n\n" + + "=== Section 4.2 ===\n" + + "Text for section 4.2\n\n\n" + + "==== Section 4.2.1 ====\n" + + "Text for section 4.2.1\n\n\n" + + "==== Section 4.2.2 ====\n" + + "Text for section 4.2.2\n\n\n" + + "== Section 5 ==\n" + + "Text for section 5\n\n\n" + + "=== Section 5.1 ===\n" + + "Text for section 5.1\n" ) } } @@ -79,7 +79,7 @@ def wikipedia_api_request(page, params): "ns": 0, "title": "No Sections", "extract": ( - "Summary text\n\n\n" + "Summary text\n\n\n" ) } } @@ -105,30 +105,30 @@ def wikipedia_api_request(page, params): "ns": 0, "title": "Test 1", "extract": ( - "

Summary text\n\n

\n" + - "

Section 1

\n" + - "

Text for section 1

\n\n\n" + - "

Section 1.1

\n" + - "

Text for section 1.1\n\n\n

" + - "

Section 1.2

\n" + - "

Text for section 1.2\n\n\n

" + - "

Section 2

\n" + - "

Text for section 2\n\n\n

" + - "

Section 3

\n" + - "

Text for section 3\n\n\n

" + - "

Section 4

\n" + - "

Section 4.1

\n" + - "

Text for section 4.1\n\n\n

" + - "

Section 4.2

\n" + - "

Text for section 4.2\n\n\n

" + - "

Section 4.2.1

\n" + - "

Text for section 4.2.1\n\n\n

" + - "

Section 4.2.2

\n" + - "

Text for section 4.2.2\n\n\n

" + - "

Section 5

\n" + - "

Text for section 5\n\n\n

" + - "

Section 5.1

\n" + - "

Text for section 5.1\n\n\n

" + "

Summary text\n\n

\n" + + "

Section 1

\n" + + "

Text for section 1

\n\n\n" + + "

Section 1.1

\n" + + "

Text for section 1.1\n\n\n

" + + "

Section 1.2

\n" + + "

Text for section 1.2\n\n\n

" + + "

Section 2

\n" + + "

Text for section 2\n\n\n

" + + "

Section 3

\n" + + "

Text for section 3\n\n\n

" + + "

Section 4

\n" + + "

Section 4.1

\n" + + "

Text for section 4.1\n\n\n

" + + "

Section 4.2

\n" + + "

Text for section 4.2\n\n\n

" + + "

Section 4.2.1

\n" + + "

Text for section 4.2.1\n\n\n

" + + "

Section 4.2.2

\n" + + "

Text for section 4.2.2\n\n\n

" + + "

Section 5

\n" + + "

Text for section 5\n\n\n

" + + "

Section 5.1

\n" + + "

Text for section 5.1\n\n\n

" ) } } @@ -154,11 +154,11 @@ def wikipedia_api_request(page, params): "ns": 0, "title": "Test Edit", "extract": ( - "

Summary text\n\n

\n" + - "

Section 1

\n" + - "

Text for section 1

\n\n\n" + - "

Section with EditEdit

\n" + - "

Text for section with edit\n\n\n

" + "

Summary text\n\n

\n" + + "

Section 1

\n" + + "

Text for section 1

\n\n\n" + + "

Section with EditEdit

\n" + + "

Text for section with edit\n\n\n

" ) } } @@ -586,4 +586,41 @@ def wikipedia_api_request(page, params): } } }, + 'en:action=query&prop=pageterms&titles=Test1&': { + "batchcomplete": "", + "query": { + "pages": { + "1": { + "pageid": 1, + "ns": 0, + "title": "Test1", + "terms": { + "alias": [ + "Test 1", + "Test one", + "Test ONE" + ], + "label": [ + "Test 1" + ], + "description": [ + "test" + ] + } + } + } + } + }, + 'en:action=query&prop=pageterms&titles=Non_Existent&': { + "batchcomplete": "", + "query": { + "pages": { + "-1": { + "ns": 0, + "title": "Non Existent", + "missing": "" + } + } + } + }, } diff --git a/tests/page_terms_test.py b/tests/page_terms_test.py new file mode 100644 index 0000000..9326350 --- /dev/null +++ b/tests/page_terms_test.py @@ -0,0 +1,34 @@ +import unittest + +import wikipediaapi +from tests.mock_data import wikipedia_api_request + + +class TestPageTerm(unittest.TestCase): + def setUp(self): + self.wiki = wikipediaapi.Wikipedia("en") + self.wiki._query = wikipedia_api_request + + def test_alias_good_parsing(self): + page = self.wiki.page('Test1') + self.assertEqual(page.alias, ["Test 1", "Test one", "Test ONE"]) + + def test_description_good_parsing(self): + page = self.wiki.page('Test1') + self.assertEqual(page.desc, ['test']) + + def test_label_good_parsing(self): + page = self.wiki.page('Test1') + self.assertEqual(page.label, ['Test 1']) + + def test_label_nonexistent_page(self): + page = self.wiki.page('Non_Existent') + self.assertEqual(page.label, []) + + def test_alias_nonexistent_page(self): + page = self.wiki.page('Non_Existent') + self.assertEqual(page.alias, []) + + def test_description_nonexistent_page(self): + page = self.wiki.page('Non_Existent') + self.assertEqual(page.desc, []) \ No newline at end of file diff --git a/wikipediaapi/__init__.py b/wikipediaapi/__init__.py index 267d8a8..070baa8 100644 --- a/wikipediaapi/__init__.py +++ b/wikipediaapi/__init__.py @@ -7,13 +7,13 @@ """ __version__ = (0, 5, 4) + import logging import re -from enum import IntEnum -from typing import Union - import requests +from enum import IntEnum from typing import Dict, Any, List, Optional +from typing import Union from urllib import parse log = logging.getLogger(__name__) @@ -563,6 +563,40 @@ def categorymembers( return self._build_categorymembers(v, page) + def pageterms(self, + page: 'WikipediaPage', + **kwargs, + ): + """ + https://www.mediawiki.org/w/api.php?action=query&prop=pageterms + https://www.wikidata.org/wiki/Help:Aliases + + :param page: :class:`WikipediaPage` + :param kwargs: parameters used in API call + :return: Alias,label and description of given page + """ + params = { + 'action': 'query', + 'prop': 'pageterms', + 'titles': page.title, + } + used_params = kwargs + used_params.update(params) + + raw = self._query( + page, + used_params + ) + self._common_attributes(raw['query'], page) + pages = raw['query']['pages'] + for k, v in pages.items(): + if k == '-1': + page._attributes['pageid'] = -1 + return page._pageterms + else: + return self._build_pageterms(v, page) + return page._pageterms + def _query( self, page: 'WikipediaPage', @@ -768,6 +802,17 @@ def _build_categorymembers( return page._categorymembers + def _build_pageterms( + self, + extract, + page + ) -> PagesDict: + + self._common_attributes(extract, page) + + page._pageterms = extract.get('terms', []) + return page._pageterms + def _common_attributes( self, extract, @@ -909,7 +954,10 @@ class WikipediaPage(object): "canonicalurl": ["info"], "readable": ["info"], "preload": ["info"], - "displaytitle": ["info"] + "displaytitle": ["info"], + "alias": ["pageterms"], + "label": ["pageterms"], + "description": ["pageterms"] } def __init__( @@ -929,6 +977,7 @@ def __init__( self._backlinks = {} # type: PagesDict self._categories = {} # type: PagesDict self._categorymembers = {} # type: PagesDict + self._pageterms = {'alias': [], 'label': [], 'description': []} # type : PagesDict self._called = { 'extracts': False, @@ -938,6 +987,7 @@ def __init__( 'backlinks': False, 'categories': False, 'categorymembers': False, + 'pageterms': False, } self._attributes = { @@ -1144,6 +1194,39 @@ def categorymembers(self) -> PagesDict: self._fetch('categorymembers') return self._categorymembers + @property + def alias(self) -> List[str]: + """ + Returns the list alias of the current page. + + :return: alias + """ + if not self._called['pageterms']: + self._fetch('pageterms') + return self._pageterms['alias'] + + @property + def label(self) -> List[str]: + """ + Returns the list of label of the current page. + + :return: label + """ + if not self._called['pageterms']: + self._fetch('pageterms') + return self._pageterms['label'] + + @property + def desc(self) -> List[str]: + """ + Returns the list of descriptive word of the current page. + + :return: description + """ + if not self._called['pageterms']: + self._fetch('pageterms') + return self._pageterms['description'] + def _fetch(self, call) -> 'WikipediaPage': getattr(self.wiki, call)(self) self._called[call] = True From 09350347d7be208402a45ea74dc4eb5b3f31075b Mon Sep 17 00:00:00 2001 From: pa1007 Date: Wed, 23 Feb 2022 15:24:05 +0100 Subject: [PATCH 2/3] Remove of unintentional refactor --- tests/mock_data.py | 108 ++++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/tests/mock_data.py b/tests/mock_data.py index b4985c6..59ac272 100644 --- a/tests/mock_data.py +++ b/tests/mock_data.py @@ -30,30 +30,30 @@ def wikipedia_api_request(page, params): "ns": 0, "title": "Test 1", "extract": ( - "Summary text\n\n\n" + - "== Section 1 ==\n" + - "Text for section 1\n\n\n" + - "=== Section 1.1 ===\n" + - "Text for section 1.1\n\n\n" + - "=== Section 1.2 ===\n" + - "Text for section 1.2\n\n\n" + - "== Section 2 ==\n" + - "Text for section 2\n\n\n" + - "== Section 3 ==\n" + - "Text for section 3\n\n\n" + - "== Section 4 ==\n\n\n" + - "=== Section 4.1 ===\n" + - "Text for section 4.1\n\n\n" + - "=== Section 4.2 ===\n" + - "Text for section 4.2\n\n\n" + - "==== Section 4.2.1 ====\n" + - "Text for section 4.2.1\n\n\n" + - "==== Section 4.2.2 ====\n" + - "Text for section 4.2.2\n\n\n" + - "== Section 5 ==\n" + - "Text for section 5\n\n\n" + - "=== Section 5.1 ===\n" + - "Text for section 5.1\n" + "Summary text\n\n\n" + + "== Section 1 ==\n" + + "Text for section 1\n\n\n" + + "=== Section 1.1 ===\n" + + "Text for section 1.1\n\n\n" + + "=== Section 1.2 ===\n" + + "Text for section 1.2\n\n\n" + + "== Section 2 ==\n" + + "Text for section 2\n\n\n" + + "== Section 3 ==\n" + + "Text for section 3\n\n\n" + + "== Section 4 ==\n\n\n" + + "=== Section 4.1 ===\n" + + "Text for section 4.1\n\n\n" + + "=== Section 4.2 ===\n" + + "Text for section 4.2\n\n\n" + + "==== Section 4.2.1 ====\n" + + "Text for section 4.2.1\n\n\n" + + "==== Section 4.2.2 ====\n" + + "Text for section 4.2.2\n\n\n" + + "== Section 5 ==\n" + + "Text for section 5\n\n\n" + + "=== Section 5.1 ===\n" + + "Text for section 5.1\n" ) } } @@ -79,7 +79,7 @@ def wikipedia_api_request(page, params): "ns": 0, "title": "No Sections", "extract": ( - "Summary text\n\n\n" + "Summary text\n\n\n" ) } } @@ -105,30 +105,30 @@ def wikipedia_api_request(page, params): "ns": 0, "title": "Test 1", "extract": ( - "

Summary text\n\n

\n" + - "

Section 1

\n" + - "

Text for section 1

\n\n\n" + - "

Section 1.1

\n" + - "

Text for section 1.1\n\n\n

" + - "

Section 1.2

\n" + - "

Text for section 1.2\n\n\n

" + - "

Section 2

\n" + - "

Text for section 2\n\n\n

" + - "

Section 3

\n" + - "

Text for section 3\n\n\n

" + - "

Section 4

\n" + - "

Section 4.1

\n" + - "

Text for section 4.1\n\n\n

" + - "

Section 4.2

\n" + - "

Text for section 4.2\n\n\n

" + - "

Section 4.2.1

\n" + - "

Text for section 4.2.1\n\n\n

" + - "

Section 4.2.2

\n" + - "

Text for section 4.2.2\n\n\n

" + - "

Section 5

\n" + - "

Text for section 5\n\n\n

" + - "

Section 5.1

\n" + - "

Text for section 5.1\n\n\n

" + "

Summary text\n\n

\n" + + "

Section 1

\n" + + "

Text for section 1

\n\n\n" + + "

Section 1.1

\n" + + "

Text for section 1.1\n\n\n

" + + "

Section 1.2

\n" + + "

Text for section 1.2\n\n\n

" + + "

Section 2

\n" + + "

Text for section 2\n\n\n

" + + "

Section 3

\n" + + "

Text for section 3\n\n\n

" + + "

Section 4

\n" + + "

Section 4.1

\n" + + "

Text for section 4.1\n\n\n

" + + "

Section 4.2

\n" + + "

Text for section 4.2\n\n\n

" + + "

Section 4.2.1

\n" + + "

Text for section 4.2.1\n\n\n

" + + "

Section 4.2.2

\n" + + "

Text for section 4.2.2\n\n\n

" + + "

Section 5

\n" + + "

Text for section 5\n\n\n

" + + "

Section 5.1

\n" + + "

Text for section 5.1\n\n\n

" ) } } @@ -154,11 +154,11 @@ def wikipedia_api_request(page, params): "ns": 0, "title": "Test Edit", "extract": ( - "

Summary text\n\n

\n" + - "

Section 1

\n" + - "

Text for section 1

\n\n\n" + - "

Section with EditEdit

\n" + - "

Text for section with edit\n\n\n

" + "

Summary text\n\n

\n" + + "

Section 1

\n" + + "

Text for section 1

\n\n\n" + + "

Section with EditEdit

\n" + + "

Text for section with edit\n\n\n

" ) } } From 2dd78ffe77730a7b1824100265e6df06c52b2eb7 Mon Sep 17 00:00:00 2001 From: pa1007 Date: Thu, 24 Feb 2022 09:39:27 +0100 Subject: [PATCH 3/3] Fix keyError for missing data on specific pages --- wikipediaapi/__init__.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/wikipediaapi/__init__.py b/wikipediaapi/__init__.py index 070baa8..09c914c 100644 --- a/wikipediaapi/__init__.py +++ b/wikipediaapi/__init__.py @@ -809,7 +809,6 @@ def _build_pageterms( ) -> PagesDict: self._common_attributes(extract, page) - page._pageterms = extract.get('terms', []) return page._pageterms @@ -977,7 +976,7 @@ def __init__( self._backlinks = {} # type: PagesDict self._categories = {} # type: PagesDict self._categorymembers = {} # type: PagesDict - self._pageterms = {'alias': [], 'label': [], 'description': []} # type : PagesDict + self._pageterms = {} # type : PagesDict self._called = { 'extracts': False, @@ -1203,7 +1202,7 @@ def alias(self) -> List[str]: """ if not self._called['pageterms']: self._fetch('pageterms') - return self._pageterms['alias'] + return self._pageterms.get('alias', []) @property def label(self) -> List[str]: @@ -1214,7 +1213,7 @@ def label(self) -> List[str]: """ if not self._called['pageterms']: self._fetch('pageterms') - return self._pageterms['label'] + return self._pageterms.get('label', []) @property def desc(self) -> List[str]: @@ -1225,7 +1224,7 @@ def desc(self) -> List[str]: """ if not self._called['pageterms']: self._fetch('pageterms') - return self._pageterms['description'] + return self._pageterms.get('description', []) def _fetch(self, call) -> 'WikipediaPage': getattr(self.wiki, call)(self)