-
Notifications
You must be signed in to change notification settings - Fork 23
/
tika_file_characterization_service.rb
68 lines (60 loc) · 2.56 KB
/
tika_file_characterization_service.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# frozen_string_literal: true
# Class for Apache Tika based file characterization service
# defines the Apache Tika based characterization service a ValkyrieFileCharacterization service
# @since 0.1.0
class TikaFileCharacterizationService
attr_reader :file_node, :persister
def initialize(file_node:, persister:)
@file_node = file_node
@persister = persister
end
# characterizes the file_node passed into this service
# Default options are:
# save: true
# @param save [Boolean] should the persister save the file_node after Characterization
# @return [FileNode]
# @example characterize a file and persist the changes by default
# Valkyrie::FileCharacterizationService.for(file_node, persister).characterize
# @example characterize a file and do not persist the changes
# Valkyrie::FileCharacterizationService.for(file_node, persister).characterize(save: false)
def characterize(save: true)
result = JSON.parse(json_output).last
@file_characterization_attributes = FileCharacterizationAttributes.new(width: result['tiff:ImageWidth'],
height: result['tiff:ImageLength'],
mime_type: result['Content-Type'],
checksum: checksum,
size: result['Content-Length'])
@file_node = @file_node.new(@file_characterization_attributes.to_h)
@persister.save(resource: @file_node) if save
@file_node
end
# Provides the SHA256 hexdigest string for the file
# @return String
def checksum
Digest::SHA256.file(filename).hexdigest if filename
end
def json_output
"[#{RubyTikaApp.new(filename.to_s).to_json.gsub('}{', '},{')}]"
end
# Determines the location of the file on disk for the file_node
# @return [Pathname]
def filename
file_object.disk_path
end
# Provides the file attached to the file_node
# @return Valkyrie::FileRepository::File
def file_object
@file_object ||= Valkyrie::StorageAdapter.find_by(id: @file_node.file_identifiers[0])
end
def valid?
true
end
# Class for updating characterization attributes on the FileNode
class FileCharacterizationAttributes < Dry::Struct
attribute :width, Valkyrie::Types::Int
attribute :height, Valkyrie::Types::Int
attribute :mime_type, Valkyrie::Types::String
attribute :checksum, Valkyrie::Types::String
attribute :size, Valkyrie::Types::Int
end
end