changes based on feedback

teamookla · May 31, 2018 · ea97a8a · ea97a8a
1 parent aa050ea
commit ea97a8a
Showing 1 changed file with 31 additions and 20 deletions.
diff --git a/get-extracts/get-extracts.py b/get-extracts/get-extracts.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 
 # Ookla
 # Updated 5/30/18
@@ -11,7 +12,6 @@
     import urllib.request as compatible_urllib
 except ImportError: # Python 2
     import urllib2 as compatible_urllib
-
 import json
 import os
 import base64
@@ -39,7 +39,17 @@
 opener.addheaders = [('Authorization', 'Basic %s' % base64string)]
 
 # makes request for files
-response = compatible_urllib.urlopen(extracts_url).read()
+try:
+    response = compatible_urllib.urlopen(extracts_url).read()
+except compatible_urllib.HTTPError as error:
+    if error.code == 401:
+        print("Authentication Error\nPlease verify that the API key and secrete are correct")
+    elif error.code == 404:
+        print("The account associated with this API key does not have any files attached to it.\nPlease contact your technical account manager to enable data extracts for this account.")
+    elif error.code == 500:
+        print("Server Error\nPlease contact your technical account manager")
+    sys.exit()
+
 try:
     content = json.loads(response)
 except ValueError as err:
@@ -48,34 +58,35 @@
 
 #############################################################
 # loop through contents, sort through files and directories
-def sort_files_and_directories(contents):
+def sort_files_and_directories(contents, files={}):
     for entry in contents:
         if entry['type'] == 'file' and entry['name'].find('headers') == -1 and '_20' in entry['name']:
-            filter(entry)
+            filter(entry, files)
         elif entry['type'] == 'dir':
             subdir = extracts_url + entry['url']
-            files = json.loads(compatible_urllib.urlopen(subdir).read())
-            sort_files_and_directories(files)
+            sub_files = json.loads(compatible_urllib.urlopen(subdir).read())
+            sort_files_and_directories(sub_files, files)
+
+    return files
 
 # determine if file should be downloaded - check for new datasets and most current file for exisiting datasets
-def filter(file):
+def filter(data_file, files):
     # identify the dataset by the file name prefix
-    dataset = file['name'][:file['name'].index('_20')]
-    if dataset not in files or dataset in files and file['mtime'] > files[dataset]['age']:
-        files[dataset] = {'name': file['name'], 'url': file['url'], 'age': file['mtime']}
+    dataset = data_file['name'][:data_file['name'].index('_20')]
+    if dataset not in files or data_file['mtime'] > files[dataset]['age']:
+        files[dataset] = {'name': data_file['name'], 'url': data_file['url'], 'age': data_file['mtime']}
 
 def download(files):
-    if len(files):
-        for data_set, file in files.items():
-            response = compatible_urllib.urlopen(file['url'])
-            flocation = storageDir + '/' + file['name']
-            print(("Downloading: %s" % (file['name'])))
-            with open(flocation, 'wb') as content:
-                content.write(response.read())
+    if not files:
+        print("No data extract files found. If this is an error, please contact your technical account manager.")
         return
-    print("No data extract files found. If this is an error, please contact your technical account manager.")
 
+    for data_set, file in files.items():
+        response = compatible_urllib.urlopen(file['url'])
+        flocation = storageDir + '/' + file['name']
+        print(("Downloading: %s" % (file['name'])))
+        with open(flocation, 'wb') as content:
+            content.write(response.read())
 #############################################################
-files = {}
-sort_files_and_directories(content)
+files = sort_files_and_directories(content)
 download(files)