Skip to content

Commit

Permalink
change requested by maps accessioning team in prep script to allow fo…
Browse files Browse the repository at this point in the history
…r differing algorithms when searching for files
  • Loading branch information
peetucket committed Aug 17, 2018
1 parent ff55374 commit 738216a
Showing 1 changed file with 14 additions and 4 deletions.
18 changes: 14 additions & 4 deletions devel/prepare_content.rb
Expand Up @@ -7,7 +7,7 @@
# see https://consul.stanford.edu/pages/viewpage.action?pageId=146704638 for more documentation
#
# Run with
# ROBOT_ENVIRONMENT=production ruby devel/prepare_content.rb INPUT_CSV_FILE.csv FULL_PATH_TO_CONTENT FULL_PATH_TO_STAGING_AREA [--no-object-folders] [--report] [--content-metadata] [--content-metadata-style map]
# ROBOT_ENVIRONMENT=production ruby devel/prepare_content.rb INPUT_CSV_FILE.csv FULL_PATH_TO_CONTENT FULL_PATH_TO_STAGING_AREA [--exact-match] [--no-object-folders] [--report] [--content-metadata] [--content-metadata-style map]
# e.g.
# ROBOT_ENVIRONMENT=production ruby devel/prepare_content.rb /maps/ThirdParty/Rumsey/Rumsey_Batch1.csv /maps/ThirdParty/Rumsey/content /maps/ThirdParty/Rumsey [--no-object-folders] [--report] [--content-metadata] [--content-metadata-style map]

Expand All @@ -18,6 +18,8 @@
# if you set the --report switch, it will only produce the output report, it will not symlink any files
# if you set the --content-metadata switch, it will only generate content metadata for each object using the log file for successfully found files, assuming you also have columns in your input CSV labeled "Druid", "Sequence" and "Label"
# if you set the --no-object-folders switch, then all symlinks will be flat in the staging directory (i.e. no object level folders) -- this requires all filenames to be unique across objects, if left off, then object folders will be created to store symlinks
# if you set the --exact-match switch, then only files which match exactly (but not case sensitive) will be found; the normal finding algorithm allows for also matching files that have any number of leading 0s
# file extensions do not matter when matching

require File.expand_path(File.dirname(__FILE__) + '/../config/boot')
require 'optparse'
Expand All @@ -28,6 +30,7 @@
content_metadata = false # if set to true, will also generate content-metadata from values supplied in spreadsheet, can be set via switch
cm_style = 'map' # defaults to map type content metadata unless overriden
no_object_folders = false # if false, then each new object will be in a separately created folder, with symlinks contained inside it; if true, you will get a flat list
exact_match = false # if set to true, only files that match exactly (but case insensitive) will be found; any files with leading 0s will NOT match

help = "Usage:\n ruby prepare_content.rb INPUT_CSV_FILE BASE_CONTENT_FOLDER [STAGING_FOLDER] [--no-object-folders] [--report] [--content_metadata] [--content_metadata_style STYLE]\n"
OptionParser.new do |opts|
Expand All @@ -44,6 +47,9 @@
opts.on("--no-object-folders") do |_ob|
no_object_folders = true
end
opts.on("--exact-match") do |ob|
exact_match = true
end
end.parse!

if ARGV.size < 2
Expand Down Expand Up @@ -84,6 +90,7 @@
puts "Only producing report" if report
puts "Producing content metadata with style '#{cm_style}'" if content_metadata
puts "Creating object folders" unless no_object_folders
puts "Exact match algorithm" if exact_match
puts "Input CSV File: #{csv_in}"
puts "Logging to: #{csv_out}"
puts "Base Content Folder: #{base_content_folder}"
Expand Down Expand Up @@ -181,13 +188,16 @@

# now search for any file which ends with the filename (trying to catch cases where the filename has 0s at the beginning that were dropped from the spreadsheet)
puts "......#{Time.now}: looking for file '#{filename}', object '#{object}', label '#{label}'"
files_found = files_to_search.grep(/[0]*#{filename}\.\S+/)
files_found = files_to_search.grep(/^[0]*#{filename}\.\S+/i) # allow for leading zeros and case insensitivy; we will further refine later according to options set
files_found_basenames = files_found.map { |file| File.basename(file) }

# if found, symlink files that match or that end with the filename but have any number of leading zeros
# if found, symlink files that match
files_found.each do |input_file|
input_filename = File.basename(input_file)
if /^[0]*#{filename}\.\S+/.match(input_filename) # the found file matches the supplied filename with only leading 0s allowed, so it matches!
# match check, if exact match, do not look for leading zeros, but allow for case insensitivy
# if not exact match, allow leading 0s, keep case sensitivity
matched = exact_match ? /^#{filename}\.\S+/i.match(input_filename) : /^[0]*#{filename}\.\S+/.match(input_filename)
if matched # the found file matches the supplied filename
message = "found #{input_file}, symlink to object folder #{object_folder}"
output_file_full_path = no_object_folders ? File.join(staging_folder, input_filename) : (File.join(object_folder, input_filename))
input_file_full_path = Pathname.new(File.join(base_content_folder, input_file)).cleanpath(true).to_s
Expand Down

0 comments on commit 738216a

Please sign in to comment.