In [1]:
import requests

## CRUD

| | SQL | RESTful API |
|:-:|:-:|:-:|
| create | `INSERT` | `POST` |
| read | `SELECT` | `GET` |
| update | `UPDATE` | `PUT` |
| delete | `DELETE` | `DELETE` |

![](http://interactive.blockdiag.com/image?compression=deflate&encoding=base64&src=eJxLyslPzk7JTExXqOZSUFAPcnV0UUgrys9VKM_MzixIBcqoK-jaKRSlFpfmlBSDmOqefsGuQSEKJfkKBfnFJelAKXVrrloAaBAXgg)

## Read results from wikipedia

In [2]:
response = requests.get('https://google.com')

In [3]:
type(response)

requests.models.Response

In [4]:
response.status_code

200

In [5]:
response.url

'https://www.google.com/'

In [6]:
response.text[:50]

'<!doctype html><html itemscope="" itemtype="http:/'

## Query Wikipedia

In [7]:
requests.get('https://www.wikidata.org/w/api.php?action=wbgetentities&ids=Q39246&format=json')

<Response [200]>

In [8]:
response = requests.get('https://www.wikidata.org/w/api.php?action=wbgetentities&ids=Q39246&format=json')

In [9]:
type(response)

requests.models.Response

In [10]:
response.status_code

200

In [11]:
response_json = response.json()

In [12]:
type(response_json)

dict

In [13]:
response_json.keys()

dict_keys(['entities', 'success'])

In [14]:
response_json['success']

1

In [15]:
response_json['entities'].keys()

dict_keys(['Q39246'])

In [16]:
!pip install wikipedia

Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz
Building wheels for collected packages: wikipedia
  Running setup.py bdist_wheel for wikipedia ... [?25ldone
[?25h  Stored in directory: /home/jovyan/.cache/pip/wheels/bf/87/25/df698dd7b66a42c1c5f3bd36f8155d4518d210f5e2c128b440
Successfully built wikipedia
Installing collected packages: wikipedia
Successfully installed wikipedia-1.4.0


In [17]:
import wikipedia
results = wikipedia.search(query='Machine Learning', results = 10)
page = wikipedia.WikipediaPage( results[0], preload = True)
page.content

'Machine learning is the subfield of computer science that, according to Arthur Samuel, gives "computers the ability to learn without being explicitly programmed." Samuel, an American pioneer in the field of computer gaming and artificial intelligence, coined the term "machine learning" in 1959 while at IBM. Evolved from the study of pattern recognition and computational learning theory in artificial intelligence, machine learning explores the study and construction of algorithms that can learn from and make predictions on data – such algorithms overcome following strictly static program instructions by making data-driven predictions or decisions, through building a model from sample inputs. Machine learning is employed in a range of computing tasks where designing and programming explicit algorithms with good performance is difficult or infeasible; example applications include email filtering, detection of network intruders or malicious insiders working towards a data breach, optical 

In [27]:
response_wiki = wikipedia.requests.get('https://www.wikidata.org/w/api.php?action=wbgetentities&ids=Q39246&format=json')

In [29]:
response_wiki.json()

{'entities': {'Q39246': {'aliases': {'am': [{'language': 'am',
      'value': 'ሪቻርድ ፌይንማን'}],
    'ar': [{'language': 'ar', 'value': 'ريتشارد فينمان'}],
    'be': [{'language': 'be', 'value': 'Рычард Фейнман'},
     {'language': 'be', 'value': 'Рычард Файнман'}],
    'bg': [{'language': 'bg', 'value': 'Ричард Филипс Фейнман'},
     {'language': 'bg', 'value': 'Ричард Фейнман'},
     {'language': 'bg', 'value': 'Фейнмън'},
     {'language': 'bg', 'value': 'Фейнман'},
     {'language': 'bg', 'value': 'Ричард Филипс Файнмън'},
     {'language': 'bg', 'value': 'Ричард Файнмън'},
     {'language': 'bg', 'value': 'Ричард Фейнмън'},
     {'language': 'bg', 'value': 'Файнмън'},
     {'language': 'bg', 'value': 'Ричард Филипс Фейнмън'}],
    'bn': [{'language': 'bn', 'value': 'আর পি ফাইনম্যান'},
     {'language': 'bn', 'value': 'রিচার্ড ফিলিপ্\u200cস ফাইনম্যান'}],
    'bs': [{'language': 'bs', 'value': 'Richard P. Feynman'}],
    'ca': [{'language': 'ca', 'value': 'Richard Phillips Feynman'},
 

## Using my old repo

In [18]:
base_url = "https://en.wikipedia.org/w/api.php"

In [19]:
action = "?action=mobileview"
parameters = "&format=json&prop=sections&sections=all"
page = "&page="

In [20]:
richard_feynman_url = base_url + action + parameters + page + 'Richard_Feynman'

In [21]:
response_rich_f = requests.get(richard_feynman_url)

In [22]:
response_rich_f.status_code

200

In [23]:
response_rich_f.json()

{'mobileview': {'sections': [{'id': 0},
   {'id': 1, 'line': 'Early life', 'toclevel': 1},
   {'id': 2, 'line': 'Education', 'toclevel': 1},
   {'id': 3, 'line': 'Manhattan Project', 'toclevel': 1},
   {'id': 4, 'line': 'Cornell', 'toclevel': 1},
   {'id': 5, 'line': 'Caltech years', 'toclevel': 1},
   {'id': 6, 'line': 'Personal and political life', 'toclevel': 2},
   {'id': 7, 'line': 'Physics', 'toclevel': 2},
   {'id': 8, 'line': 'Pedagogy', 'toclevel': 2},
   {'id': 9, 'line': "<i>Surely You're Joking Mr. Feynman</i>", 'toclevel': 2},
   {'id': 10, 'line': 'Challenger disaster', 'toclevel': 2},
   {'id': 11, 'line': 'Recognition and awards', 'toclevel': 2},
   {'id': 12, 'line': 'Death', 'toclevel': 1},
   {'id': 13, 'line': 'Popular legacy', 'toclevel': 1},
   {'id': 14, 'line': 'Bibliography', 'toclevel': 1},
   {'id': 15, 'line': 'Selected scientific works', 'toclevel': 2},
   {'id': 16, 'line': 'Textbooks and lecture notes', 'toclevel': 2},
   {'id': 17, 'line': 'Popular works

In [24]:
richard_f_page_headings = response_rich_f.json()

In [25]:
richard_f_page_headings['mobileview'].keys()

dict_keys(['sections'])

## Experimentation with Wikipedia

In [155]:
base_url = "https://en.wikipedia.org/w/api.php"

In [192]:
action = "?action=query"
# parameters = "&format=json&generator=links&gpllimit=500&redirects=true"
parameters = "&prop=info|revisions&list=backlinks|embeddedin|allimages&meta=userinfo"
page = "&page="

In [193]:
ml_url = base_url  + action + parameters + page + 'Category:Machine_learning'

In [194]:
ml_url

'https://en.wikipedia.org/w/api.php?action=query&prop=info|revisions&list=backlinks|embeddedin|allimages&meta=userinfo&page=Category:Machine_learning'

In [195]:
response_ml = requests.get(ml_url)

In [196]:
response_ml.json()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [197]:
ml_url = 'https://en.wikipedia.org/wiki/Category:Machine_learning'

In [198]:
response_ml = requests.get(ml_url)

In [204]:
from bs4 import BeautifulSoup

In [205]:
soup = BeautifulSoup(response_ml.text)
inner_ul = soup.find('ul', class_='innerUl')
inner_items = [li.text.strip() for li in inner_ul.ul.find_all('li')]

outer_ul_text = soup.ul.span.text.strip()
inner_ul_text = inner_ul.span.text.strip()

result = {outer_ul_text: {inner_ul_text: inner_items}}
print (result)



 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "html5lib")

  markup_type=markup_type))


AttributeError: 'NoneType' object has no attribute 'ul'