Skip to content

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
...
  • 2 commits
  • 5 files changed
  • 0 commit comments
  • 1 contributor
Showing with 90 additions and 49 deletions.
  1. +27 −7 wacart/create_cspace_records.py
  2. +12 −0 wacart/create_test.py
  3. +2 −2 wacart/csconstants.py
  4. +7 −3 wacart/read_test.py
  5. +42 −37 wacart/wacart.py
View
34 wacart/create_cspace_records.py
@@ -1,12 +1,15 @@
#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
"""
Reads the pickle files of records in CSpace and of records from the
WACArt FM export, and inserts appropriate things into CSpace.
"""
+import codecs
+import cPickle
import httplib2
-import pickle
+import json
from lxml import etree
from lxml.builder import E
@@ -160,9 +163,25 @@ def xml_from(record):
values += record[key]
cs_schema.append(CC.editionNumber("\n".join(values)))
+ # Actually, inscriptionContent could also include signature, workshop
+ # number, signed/location, and printer's marks. Only one field to jam
+ # them in.
if record.has_key('inscription_location'):
cs_schema.append(CC.inscriptionContent("\n".join(record['inscription_location'])))
+ if record.has_key('running_time'):
+ cs_schema.append(
+ CC.dimensions(
+ CC.dimensionList(
+ CC.dimensionGroup(
+ CC.value(record['running_time']),
+ CC.measurementUnit('minutes'),
+ CC.dimension('running-time')
+ )
+ )
+ )
+ )
+
# There's probably a class of variables that we can easily handle with
# just a fieldname mapping; let's set that up, and then let the
# exceptions be exceptions
@@ -198,6 +217,7 @@ def insert_into_cspace(record):
# ampersands coming our way, so we just do a replace.
# TODO expand to handle other potentially invalid characters, such as
# the FileMaker repeat character that may sneak in anywhere
+ # Maybe handle this in the parsing stage?
#
for k in record.keys():
if type(record[k]) == type('') and record[k] is not None:
@@ -260,14 +280,14 @@ def insert_into_cspace(record):
def load_cspace_objectids():
pickle_file = open(CS_OBJECT_FILE, 'rb')
- cobjects = pickle.load(pickle_file)
+ cobjects = cPickle.load(pickle_file)
pickle_file.close()
return cobjects
-def load_wacart_objectids():
- pickle_file = open(WAC_OBJECTS_FILE, 'rb')
- cobjects = pickle.load(pickle_file)
- pickle_file.close()
+def load_wacart_objects():
+ jfile = codecs.open(WAC_OBJECTS_FILE, 'r', 'utf-8')
+ cobjects = json.load(jfile)
+ jfile.close()
return cobjects
def prune_existing_records(objects, existing_objectids):
@@ -291,7 +311,7 @@ def split_records_by_artist_count(records):
if __name__ == "__main__":
existing_cspace_records = load_cspace_objectids()
print "existing records loaded"
- wacart_records = load_wacart_objectids()
+ wacart_records = load_wacart_objects()
print "records to insert loaded"
records_to_create = prune_existing_records(wacart_records, existing_cspace_records)
print "records pruned"
View
12 wacart/create_test.py
@@ -29,5 +29,17 @@ def testConditionProcessing(self):
self.assertTrue(some_xml.find('1984') > -1)
self.assertTrue(some_xml.find('2020.142') > -1)
+ def testDimensions(self):
+ simpleRecord = {
+ 'acc_no': '2020.142.2',
+ 'condition_date': ['sometime in 1984'],
+ 'running_time': '234'
+ }
+ some_xml = create_cspace_records.xml_from(simpleRecord)
+ self.assertTrue(some_xml.find('dimension') > -1)
+ self.assertTrue(some_xml.find('minutes') > -1)
+
+ # then try a combo, eg. width and depth
+
if __name__ == "__main__":
unittest.main()
View
4 wacart/csconstants.py
@@ -1,5 +1,5 @@
CSPACE_URL = 'http://localhost:8180/cspace-services/'
-CSPACE_USER = 'admin@core.collectionspace.org'
+CSPACE_USER = 'admin@walkerart.org'
CSPACE_PASS = 'Administrator'
CS_OBJECT_FILE = 'collectionspace_objects.pickle'
-WAC_OBJECTS_FILE= 'wacart_objects.pickle'
+WAC_OBJECTS_FILE= 'wacart_objects.json'
View
10 wacart/read_test.py
@@ -18,14 +18,18 @@ def mockExport(fielddict):
class ObjectStuff(unittest.TestCase):
def testSomeFields(self):
- """should be able to find the object's title and author's birthdate"""
+ """should be able to find the object's title and author's
+ birthdate. Should not pass empty values."""
oneLineExport = mockExport(
- {'title':'foo', 'born':'1900', 'creator_text_inverted':'Bob, Jim'})
+ {'title':'foo', 'born':'1900',
+ 'creator_text_inverted':'Bob, Jim',
+ 'running_time': ''})
objekt, agents = wacart.parse_line(oneLineExport)
self.assertEqual(['foo'], objekt['title'])
self.assertEqual('1900', agents[0]['born'])
+ self.assertEqual(False, objekt.has_key('running_time'))
def testRepeats(self):
"""Some works have multiple titles, measurements, etc. And there can
@@ -193,6 +197,6 @@ def testGuessNameOrder(self):
namestring7 = "von Smith, Bob"
self.assertEqual("von Smith", wacart.guess_name_order(namestring7)['last_name'])
self.assertEqual("Bob", wacart.guess_name_order(namestring7)['first_name'])
-
+
if __name__ == "__main__":
unittest.main()
View
79 wacart/wacart.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
"""
Reads a FileMaker export of the WACART database, saves python data
@@ -6,8 +7,9 @@
"""
+import codecs
+import json
import re
-import pickle
from csconstants import *
NAME_DELIMITERS = [';', ' and ']
@@ -95,15 +97,16 @@ def parse_line(line):
#
# FileMaker gives us OS 9-era output.
#
- line = line.decode('mac-roman').encode('utf-8')
+ line = line.decode('mac-roman')
objekt = {}
fields = line.split("\t")
for i in range(len(COLUMNS)):
- objekt[COLUMNS[i]['name']] = fields[i]
- if COLUMNS[i].has_key('repeat'):
- break_out_multiple_objects(COLUMNS[i]['name'], objekt)
+ if re.match(r'.*\w.*', fields[i]):
+ objekt[COLUMNS[i]['name']] = fields[i]
+ if COLUMNS[i].has_key('repeat'):
+ break_out_multiple_objects(COLUMNS[i]['name'], objekt)
for field in objekt.keys():
trim_extra_spaces(field, objekt)
@@ -131,7 +134,7 @@ def break_out_multiple_objects(field, target):
# object in question could already be a list. Assume that a given
# field will only have one type of delimiters.
for delimiter in [" ", ""]:
- if type(target[field]) == type(""):
+ if type(target[field]) != type([]):
if target[field].find(delimiter) > -1:
all_values = target[field].split(delimiter)
return_values = []
@@ -241,8 +244,9 @@ def break_out_agents(agentdict):
if len(artists) > i:
artists[i][field] = agentdict[field][i]
else:
- print "This record has a birth place / sex that is perplexing: %s %s, from %s" \
+ errah = "This record has a birth place / sex that is perplexing: %s %s, from %s" \
% (field, agentdict[field], agentdict['creator_text_inverted'])
+ print errah.encode('utf-8')
# These fields rarely, if ever, repeat
for field in ['died', 'ethnicity', 'nationality']:
@@ -345,33 +349,35 @@ def note_oddities(objekt):
if agent.has_key('last_name') and agent['last_name'].find(prob) > -1:
weird_name = True
- if weird_name:
- AGENTS.write("for object %s, we parsed '%s' as:\n" % (objekt['object_id'], objekt['creator_text_inverted']))
- for agent in objekt['agents']:
- for field in ['first_name', 'middle_name', 'last_name']:
- if agent.has_key(field):
- AGENTS.write(" %s: %s\n" % (field, agent[field]))
- if field == 'last_name':
- AGENTS.write("\n")
+ #if weird_name:
+ # AGENTS.write("for object %s, we parsed '%s' as:\n" % (objekt['object_id'], objekt['creator_text_inverted']))
+ # for agent in objekt['agents']:
+ # for field in ['first_name', 'middle_name', 'last_name']:
+ # if agent.has_key(field):
+ # AGENTS.write(" %s: %s\n" % (field, agent[field]))
+ # if field == 'last_name':
+ # AGENTS.write("\n")
- if type(objekt['running_time']) == type(''):
+ if objekt.has_key('running_time') and type(objekt['running_time']) == type(''):
if objekt['running_time'] != '' and objekt['running_time'].find('inute') < 0:
RUNTIME.write("%s: %s\n" % (objekt['object_id'], objekt['running_time']))
- match = re.search(r'\d', objekt['ethnicity'])
- if match is not None:
- ETHNICITY.write("%s: %s\n" % (objekt['object_id'], objekt['ethnicity']))
+ if objekt.has_key('ethnicity'):
+ match = re.search(r'\d', objekt['ethnicity'])
+ if match is not None:
+ ETHNICITY.write("%s: %s\n" % (objekt['object_id'], objekt['ethnicity']))
understood_frames = ['Artist Specified Framing', 'Yes', 'yes', 'No',
'no', 'No Frame', 'Unique Frame', 'Frame', ['no', 'No Frame'],
['yes', 'Frame'], ['Yes', 'Frame'], ['No', 'No Frame'], ['N.A.',
'No Frame'], ['Frame', 'Artist Specified Framing']]
- if objekt['frame'] != '' and not objekt['frame'] in understood_frames:
+ if objekt.has_key('frame') and not objekt['frame'] in understood_frames:
FRAME.write("%s: %s\n" % (objekt['object_id'], objekt['frame']))
- match = re.search(r'the undersigned', objekt['editor'])
- if match is not None:
- EDITORS.write("%s: %s\n" % (objekt['object_id'], objekt['editor']))
+ if objekt.has_key('editor'):
+ match = re.search(r'the undersigned', objekt['editor'])
+ if match is not None:
+ EDITORS.write("%s: %s\n" % (objekt['object_id'], objekt['editor']))
if __name__ == "__main__":
TABFILE = open('wacart.tab')
@@ -380,31 +386,30 @@ def note_oddities(objekt):
objects = []
for line in TABFILE:
- try:
- objekt, agents = parse_line(line)
- except ValueError as err:
- BADLINES.write("%s: %s" % (err, line))
-
+ objekt, agents = parse_line(line)
print "--------------------"
for row in COLUMNS:
field = row['name']
- if type(objekt[field]) == type([]):
- for datum in objekt[field]:
- print "%s -- '%s'" % (field, datum)
- else:
- print "%s -- '%s'" % (field, objekt[field])
+ if objekt.has_key(field):
+ if type(objekt[field]) == type([]):
+ for datum in objekt[field]:
+ debug = "%s -- '%s'" % (field, datum)
+ print debug.encode('utf-8')
+ else:
+ debug = "%s -- '%s'" % (field, objekt[field])
+ print debug.encode('utf-8')
print "--------------------"
print "Agent details:"
for agent in agents:
for field in agent.keys():
- print "%s -- '%s'" % (field, agent[field])
+ debug = "%s -- '%s'" % (field, agent[field])
+ print debug.encode('utf-8')
objekt['agents'] = agents
objects.append(objekt)
-
note_oddities(objekt)
TABFILE.close()
- output = open(WAC_OBJECTS_FILE, 'wb')
- pickle.dump(objects, output)
+ output = codecs.open(WAC_OBJECTS_FILE, 'w', 'utf-8')
+ json.dump(objects, output, ensure_ascii=False)
output.close()

No commit comments for this range

Something went wrong with that request. Please try again.