/
ppoc.py
41 lines (32 loc) · 1.31 KB
/
ppoc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/env python
import requests
import json
import re
from urlparse import *
from datetime import datetime
from config import data_dir
today = datetime.now()
url = 'http://loc.gov/pictures/search/?'
query = {'c':50, 'sp': 1, 'fo':'json', 'co!':'hh', #exclude HAER, HABS
'fa': 'displayed:anywhere', 'fi': 'subject' }
def getdata(disasters):
for subject in disasters:
query['q'] = subject
r = requests.get(url, params=query)
response = r.json()
createfile(response['results'], subject, query['sp'])
while response['pages']['next']: # get rest of pages
link = response['pages']['next'] + '&fo=json'
r = requests.get(link)
response = r.json()
params = parse_qs(urlparse(r.url).query) # creates dict out of params in url
createfile(response['results'], subject, params['sp'][0])
# write response to timestamped and pagestamped files
def createfile(data, subject, page):
e = re.compile('\W.*')
subject = e.sub('', subject)
filename = data_dir + '{0}-{1}-{2}-ppoc.json'.format(subject,today.isoformat(),page)
with open(filename, 'w') as f:
f.write(json.dumps(data))
disasters = ['earthquakes','hurricanes','floods','forest fires']
getdata(disasters)