Skip to content

Commit

Permalink
changes based on feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
Cynthia Cobb committed May 31, 2018
1 parent aa050ea commit ea97a8a
Showing 1 changed file with 31 additions and 20 deletions.
51 changes: 31 additions & 20 deletions get-extracts/get-extracts.py
@@ -1,3 +1,4 @@
#!/usr/bin/env python

# Ookla
# Updated 5/30/18
Expand All @@ -11,7 +12,6 @@
import urllib.request as compatible_urllib
except ImportError: # Python 2
import urllib2 as compatible_urllib

import json
import os
import base64
Expand Down Expand Up @@ -39,7 +39,17 @@
opener.addheaders = [('Authorization', 'Basic %s' % base64string)]

# makes request for files
response = compatible_urllib.urlopen(extracts_url).read()
try:
response = compatible_urllib.urlopen(extracts_url).read()
except compatible_urllib.HTTPError as error:
if error.code == 401:
print("Authentication Error\nPlease verify that the API key and secrete are correct")
elif error.code == 404:
print("The account associated with this API key does not have any files attached to it.\nPlease contact your technical account manager to enable data extracts for this account.")
elif error.code == 500:
print("Server Error\nPlease contact your technical account manager")
sys.exit()

try:
content = json.loads(response)
except ValueError as err:
Expand All @@ -48,34 +58,35 @@

#############################################################
# loop through contents, sort through files and directories
def sort_files_and_directories(contents):
def sort_files_and_directories(contents, files={}):
for entry in contents:
if entry['type'] == 'file' and entry['name'].find('headers') == -1 and '_20' in entry['name']:
filter(entry)
filter(entry, files)
elif entry['type'] == 'dir':
subdir = extracts_url + entry['url']
files = json.loads(compatible_urllib.urlopen(subdir).read())
sort_files_and_directories(files)
sub_files = json.loads(compatible_urllib.urlopen(subdir).read())
sort_files_and_directories(sub_files, files)

return files

# determine if file should be downloaded - check for new datasets and most current file for exisiting datasets
def filter(file):
def filter(data_file, files):
# identify the dataset by the file name prefix
dataset = file['name'][:file['name'].index('_20')]
if dataset not in files or dataset in files and file['mtime'] > files[dataset]['age']:
files[dataset] = {'name': file['name'], 'url': file['url'], 'age': file['mtime']}
dataset = data_file['name'][:data_file['name'].index('_20')]
if dataset not in files or data_file['mtime'] > files[dataset]['age']:
files[dataset] = {'name': data_file['name'], 'url': data_file['url'], 'age': data_file['mtime']}

def download(files):
if len(files):
for data_set, file in files.items():
response = compatible_urllib.urlopen(file['url'])
flocation = storageDir + '/' + file['name']
print(("Downloading: %s" % (file['name'])))
with open(flocation, 'wb') as content:
content.write(response.read())
if not files:
print("No data extract files found. If this is an error, please contact your technical account manager.")
return
print("No data extract files found. If this is an error, please contact your technical account manager.")

for data_set, file in files.items():
response = compatible_urllib.urlopen(file['url'])
flocation = storageDir + '/' + file['name']
print(("Downloading: %s" % (file['name'])))
with open(flocation, 'wb') as content:
content.write(response.read())
#############################################################
files = {}
sort_files_and_directories(content)
files = sort_files_and_directories(content)
download(files)

0 comments on commit ea97a8a

Please sign in to comment.