Skip to content

Commit

Permalink
Merge pull request #22 from ror-community/separate-address-update
Browse files Browse the repository at this point in the history
Separate address update
  • Loading branch information
adambuttrick committed Mar 23, 2023
2 parents 94793d3 + 49ac002 commit cbaf521
Show file tree
Hide file tree
Showing 9 changed files with 51 additions and 21 deletions.
4 changes: 1 addition & 3 deletions generate_relationships/generate_relationships.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from csv import DictReader
import re
import sys
import update_address as ua

ERROR_LOG = "relationship_errors.log"
logging.basicConfig(filename=ERROR_LOG,level=logging.ERROR, filemode='w')
Expand Down Expand Up @@ -98,9 +97,8 @@ def get_record(id, filename):

try:
response = rsp.json()
updated_record = ua.update_geonames(response)
with open(UPDATED_RECORDS_PATH + filename, "w", encoding='utf8') as f:
json.dump(updated_record, f, ensure_ascii=False)
json.dump(response, f, ensure_ascii=False)
except Exception as e:
logging.error(f"Writing {filename}: {e}")

Expand Down
3 changes: 1 addition & 2 deletions generate_relationships/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
requests==2.27.1
update_address @ git+https://github.com/ror-community/update_address.git
requests==2.27.1
4 changes: 1 addition & 3 deletions remove_relationships/remove_relationships.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import logging
import requests
import sys
import update_address as ua
from urllib.parse import urlparse

ERROR_LOG = "relationship_errors.log"
Expand Down Expand Up @@ -46,9 +45,8 @@ def get_record(id, filename, inactive_id):
if (response['status'] =='active') and len(response['relationships']) > 0:
inactive_relationships = [r for r in response['relationships'] if (r['id'] == inactive_id and r['type'] != 'Predecessor')]
if len(inactive_relationships) > 0:
updated_record = ua.update_geonames(response)
with open(UPDATED_RECORDS_PATH + filename, "w", encoding='utf8') as f:
json.dump(updated_record, f, ensure_ascii=False)
json.dump(response, f, ensure_ascii=False)
filepath = check_file(filename)
except Exception as e:
logging.error(f"Error writing {filename}: {e}")
Expand Down
1 change: 0 additions & 1 deletion remove_relationships/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
requests==2.27.1
update_address @ git+https://github.com/ror-community/update_address.git
3 changes: 0 additions & 3 deletions update_address_only/address_only_release_file.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
import os
import sys
import json
import re
import glob
import update_address

Expand Down
6 changes: 1 addition & 5 deletions update_address_only/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,2 @@
certifi==2021.10.8
charset-normalizer==2.0.11
idna==3.3
requests==2.27.1
urllib3==1.26.8
update_address @ git+https://github.com/ror-community/update_address.git
update_address @ git+https://github.com/ror-community@add-caching/update_address.git
46 changes: 46 additions & 0 deletions update_address_only/update_addresses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import os
import json
import logging
import sys
sys.path.append('/Users/ekrznarich/git/update_address')
import update_address

RECORDS_PATH = "."
ERROR_LOG = "address_update_errors.log"
logging.basicConfig(filename=ERROR_LOG,level=logging.ERROR, filemode='w')

def export_json(json_data, json_file):
json_file.seek(0)
json.dump(json_data, json_file, ensure_ascii=False, indent=2)
json_file.truncate()

def get_files(top):
filepaths = []
for dirpath, dirs, files in os.walk(top, topdown=True):
for file in files:
filepaths.append(os.path.join(dirpath, file))
return filepaths

def update_addresses(filepaths):
for filepath in filepaths:
filename, file_extension = os.path.splitext(filepath)
if file_extension == '.json':
try:
with open(filepath, 'r+') as json_in:
print("updating " + filepath)
json_data = json.load(json_in)
json_data = update_address.update_geonames(json_data)
export_json(json_data, json_in)
except Exception as e:
logging.error(f"Writing {filepath}: {e}")

if __name__ == '__main__':
update_addresses(get_files(RECORDS_PATH))
file_size = os.path.getsize(ERROR_LOG)
if (file_size == 0):
os.remove(ERROR_LOG)
elif (file_size != 0):
print("ERRORS RECORDED IN address_update_errors.log")
with open(ERROR_LOG, 'r') as f:
print(f.read())
sys.exit(1)
3 changes: 1 addition & 2 deletions update_related_records/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
requests==2.27.1
update_address @ git+https://github.com/ror-community/update_address.git
requests==2.27.1
2 changes: 0 additions & 2 deletions update_related_records/update_related.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import json
import urllib
import requests
import update_address

API_URL = "https://api.ror.org/organizations"
INACTIVE_STATUSES = ('inactive', 'withdrawn')
Expand Down Expand Up @@ -42,7 +41,6 @@ def check_update_production_file(ror_id, related_id, related_name):
print('Current name:', prod_record['relationships']
[index]['label'], '- Updated Name:', related_name)
prod_record['relationships'][index]['label'] = related_name
prod_record = update_address.update_geonames(prod_record)
json_file = short_id + '.json'
json_file_path = UPDATED_RECORDS_PATH + json_file
with open(json_file_path, 'w', encoding='utf8') as f_out:
Expand Down

0 comments on commit cbaf521

Please sign in to comment.