Skip to content

Commit

Permalink
Merge remote-tracking branch 'miguel/mmo-adjust-bejune-import' into test
Browse files Browse the repository at this point in the history
  • Loading branch information
jma committed Sep 23, 2021
2 parents a268ce2 + 4843fcb commit c09b140
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 6 deletions.
26 changes: 20 additions & 6 deletions sonar/modules/cli/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ def hepbejune(data_file, pdf_directory):
if row[1] == 'SKIP':
continue

degree = row[14]
if degree == 'bachelor thesis':
degree = 'Mémoire de bachelor'
elif degree == 'master thesis':
degree = 'Mémoire de master'
else:
degree = 'Mémoire'

data = {
'title': [{
'type': 'bf:Title',
Expand All @@ -86,16 +94,22 @@ def hepbejune(data_file, pdf_directory):
'role': ['cre']
}],
'dissertation': {
'degree': row[12],
'grantingInstitution': row[10],
'degree': degree,
'grantingInstitution': 'Haute école pédagogique BEJUNE',
'date': date.strftime('%Y-%m-%d')
},
'provisionActivity': [{
'type': 'bf:Publication',
'startDate': row[11]
'startDate': date.strftime('%Y')
}],
'customField1': [
row[12]
],
'customField2': [
row[13]
],
'documentType':
DOCUMENT_TYPE_MAPPING.get(row[13], 'coar:c_1843'),
DOCUMENT_TYPE_MAPPING.get(row[14], 'coar:c_1843'),
'usageAndAccessPolicy': {
'license': 'CC BY-NC-ND'
},
Expand All @@ -106,10 +120,10 @@ def hepbejune(data_file, pdf_directory):
'harvested':
True,
'masked':
True
'masked_for_external_ips'
}

file_path = os.path.join(pdf_directory, row[15])
file_path = os.path.join(pdf_directory, row[16])
if os.path.isfile(file_path):
data['files'] = [{
'key': 'fulltext.pdf',
Expand Down
10 changes: 10 additions & 0 deletions sonar/modules/documents/dojson/rerodoc/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,16 @@ def marc21_to_dissertation_field_502(self, key, value):
dissertation['degree'] = value.get('a')
self['dissertation'] = dissertation

# try to parse the thesis note more precisely
matches = re.match(r'^(?P<degree>[^:]+) : (?P<grantingInstitution>[^,]+) ?[,:] (?P<date>\d{4})( ; .*)?$', value.get('a'))
if matches:
if matches.group("degree"):
dissertation['degree'] = matches.group("degree")
if matches.group("grantingInstitution"):
dissertation['grantingInstitution'] = matches.group("grantingInstitution")
if matches.group("date"):
dissertation['date'] = matches.group("date")

# Try to get start date and store in provision activity
# 260$c and 269$c have priority to this date
record = overdo.blob_record
Expand Down

0 comments on commit c09b140

Please sign in to comment.