Push to github, using code borrowed from @andylolz

markbrough · May 19, 2017 · 349c570 · 349c570
1 parent 36672bb
commit 349c570
Showing 1 changed file with 58 additions and 10 deletions.
diff --git a/scraper.py b/scraper.py
@@ -3,6 +3,15 @@
 from lxml import html
 import scraperwiki
 import requests
+import shutil
+from git import Repo
+from os.path import join
+from os import environ, remove
+from glob import glob
+import unicodecsv
+
+output_dir = "output"
+data_dir = "data"
 
 def get_page():
     r = requests.get(URL)
@@ -14,18 +23,57 @@ def clean_fy(value):
     if value == "NA": return "Unknown"
     fy_start, fy_end = value.split("-")
     return fy_start
-
+
+def save_csv(name, codelist, fieldnames):
+        for row in codelist:
+            writer.writerow(row)
+
+def init_git_repo():
+    shutil.rmtree(output_dir, ignore_errors=True)
+    git = Repo.init(output_dir).git
+    git.remote('add', 'origin', 'https://{}@github.com/markbrough/country-fiscal-years.git'.format(environ.get('MORPH_GH_API_KEY')))
+    try:
+        git.pull('origin', 'update')
+    except:
+        git.pull('origin', 'gh-pages')
+        git.checkout(b='update')
+    for to_remove in glob(join(data_dir, '*.csv')):
+        remove(to_remove)
+
+def push_to_github():
+    url = 'https://api.github.com/repos/markbrough/country-fiscal-years/pulls'
+    git = Repo.init(output_dir).git
+    git.add('.')
+    git.config('user.email', environ.get('MORPH_GH_EMAIL'))
+    git.config('user.name', environ.get('MORPH_GH_USERNAME'))
+    git.commit(m='Update')
+    git.push('origin', 'update')
+    payload = {
+        'title': 'Merge in latest changes',
+        'body': 'This is an auto- pull request.',
+        'head': 'update',
+        'base': 'gh-pages',
+    }
+    r = requests.post(url, json=payload, auth=(environ.get('MORPH_GH_USERNAME'), environ.get('MORPH_GH_API_KEY')))
+    shutil.rmtree(output_dir, ignore_errors=True)
+
 def run():
     page = get_page()
     table = page.xpath("//table")[0]
-    for row in table.xpath("//tr")[1:]:
-        country_code = row.get("id")
-        cols = row.xpath("td")
-        country_name = cols[0].find("a").text
-        fiscal_year = cols[1].text
-        scraperwiki.sqlite.save(unique_keys=['code'], data={
-            "code": country_code.upper(), 
-            "name": country_name,
-            "fy_start": clean_fy(fiscal_year)})
+
+    with open(join(data_dir, 'countries_fiscal_years.csv'), 'w', encoding='utf-8') as f:
+        writer = unicodecsv.DictWriter(f, fieldnames=["code","name","fy_start"], 
+                                quoting=csv.QUOTE_ALL)
+        writer.writeheader()
+        for row in table.xpath("//tr")[1:]:
+            country_code = row.get("id")
+            cols = row.xpath("td")
+            country_name = cols[0].find("a").text
+            fiscal_year = cols[1].text
+            data = {"code": country_code.upper(), 
+                    "name": country_name,
+                    "fy_start": clean_fy(fiscal_year)}
+            scraperwiki.sqlite.save(unique_keys=['code'], data=data)
+            writer.writerow(data)
 
 run()