/
Day_08_freebase_intro.py
194 lines (143 loc) · 5.48 KB
/
Day_08_freebase_intro.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>
# <headingcell level=1>
# Goals
# <markdowncell>
# We will be working with [Freebase][http://dev.freebase.com] and [OpenRefine](http://openrefine.org/) throughout the semester. Today, I wanted to get us set up with using the API -- the goal today is simply for all of you to go get a Google API key and configure
# <markdowncell>
# Follow instructions at
#
# http://wiki.freebase.com/wiki/Freebase_API#Getting_Started
#
# to get a key. You may need to go to the Services section at https://code.google.com/apis/console/b/0/ to make sure Freebase access is turned on:
#
# <img src="https://www.evernote.com/shard/s1/sh/0c4fb5bf-d3af-4ab6-a249-e6ee92d93ca6/5534f2603dd54ba2d0bbb5a3bbc36cc5/res/6c448c96-5821-4ef0-be80-1eab40825351/Google_APIs_Console-20130214-093024.jpg.jpg?resizeSmall&width=832" />
#
# <img src="https://www.evernote.com/shard/s1/sh/a19bdb08-69ae-4902-909e-d521627ca16f/f9b32ea595d0206a0a567afe2e19af23/res/f272e2ce-f8d2-41a5-8bad-21d8c3a14f3b/Google_APIs_Console-20130214-093415.jpg.jpg?resizeSmall&width=832" />
#
#
# Then go to the API Access screen and the Create New Browser Key button on bottom of page to get a key.
#
# Make a CREDENTIALS.py in the same directory as your IPython notebooks to hold this key:
#
# FREEBASE_KEY = '[INSERT_YOUR_KEY]'
#
# You need to confKey for browser apps (with referers)
# <headingcell level=1>
# Sample Code to pull up list of planets
# <codecell>
# https://dev.freebase.com/astronomy/planet?instances
# http://wiki.freebase.com/wiki/Google_API_Client_Libraries#Python
from apiclient import discovery
from apiclient import model
import json
from CREDENTIALS import FREEBASE_KEY
DEVELOPER_KEY = FREEBASE_KEY
model.JsonModel.alt_param = ""
freebase = discovery.build('freebase', 'v1', developerKey=DEVELOPER_KEY)
query = [{'id': None, 'name': None, 'type': '/astronomy/planet'}]
response = json.loads(freebase.mqlread(query=json.dumps(query)).execute())
planets = []
for planet in response['result']:
print planet['name']
planets.append(planet['name'])
assert planets == [u'Earth',
u'Venus',
u'Mars',
u'Mercury',
u'Jupiter',
u'Neptune',
u'Saturn',
u'Uranus']
# <headingcell level=1>
# Pulling up all current US State Governors, their party affiliations, and Wikipedia page ids
# <codecell>
# http://wiki.freebase.com/wiki/Google_API_Client_Libraries#Python
from itertools import islice
from apiclient import discovery
from apiclient import model
import json
from CREDENTIALS import FREEBASE_KEY
from pandas import DataFrame, Series
DEVELOPER_KEY = FREEBASE_KEY
model.JsonModel.alt_param = ""
freebase = discovery.build('freebase', 'v1', developerKey=DEVELOPER_KEY)
query_json = """[{
"id": null,
"wiki_en:key": [{
"/type/key/namespace": "/wikipedia/en_id",
"value": null,
"optional": true
}],
"/location/administrative_division/fips_10_4_region_code": null,
"/location/administrative_division/first_level_division_of": "United States of America",
"type": "/government/governmental_jurisdiction",
"governing_officials": [{
"type": null,
"office_holder": {
"id": null,
"en:name": null,
"type": "/government/politician",
"party": [{
"party": null
}]
},
"basic_title": "Governor",
"from": null,
"to": {
"optional": "forbidden",
"value": null
}
}]
}]""".replace("\n", " ")
query = json.loads(query_json)
response = json.loads(freebase.mqlread(query=json.dumps(query)).execute())
results=list()
for result in islice(response['result'], None):
#print result
results.append( {'fips': result['/location/administrative_division/fips_10_4_region_code'],
'state': result['id'],
'name': result['governing_officials'][0]['office_holder']['en:name'],
'party': [p['party'] for p in result['governing_officials'][0]['office_holder']['party']],
'en_wikipedia_key': [k["value"] for k in result["wiki_en:key"]]
})
governors = DataFrame(results)
governors[:5]
# <codecell>
# which ones are Republicans (or have been Republican)
governors[governors["party"].apply(lambda x: 'Republican Party' in x)]
# <codecell>
# state centroids
# http://tinyurl.com/cjuy6k3
from itertools import islice
from apiclient import discovery
from apiclient import model
import json
from CREDENTIALS import FREEBASE_KEY
from pandas import DataFrame, Series
DEVELOPER_KEY = FREEBASE_KEY
model.JsonModel.alt_param = ""
freebase = discovery.build('freebase', 'v1', developerKey=DEVELOPER_KEY)
query_json = """
[{
"id": null,
"name": null,
"/location/administrative_division/fips_10_4_region_code": [],
"/location/administrative_division/first_level_division_of": "United States of America",
"/location/location/geolocation": {
"latitude": null,
"longitude": null
}
}]""".replace("\n", " ")
query = json.loads(query_json)
response = json.loads(freebase.mqlread(query=json.dumps(query)).execute())
results = list()
for result in islice(response['result'], None):
results.append( {'id': result['id'],
'name': result['name'],
'latitude': float(result['/location/location/geolocation']['latitude']),
'longitude': float(result['/location/location/geolocation']['longitude']),
'fips': result['/location/administrative_division/fips_10_4_region_code'],
} )
states = DataFrame(results)
plt.scatter(states["longitude"], states["latitude"])