Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2422 from mlibrary/2809_pdf_toc
Addresses HELIO-2809 Generate TOC from representative PDF's outline
- Loading branch information
Showing
14 changed files
with
379 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# frozen_string_literal: true | ||
|
||
class PDFEbookPresenter < ApplicationPresenter | ||
def initialize(pdf_ebook) | ||
@pdf_ebook = pdf_ebook | ||
end | ||
|
||
def id | ||
@pdf_ebook.id | ||
end | ||
|
||
def multi_rendition? | ||
false | ||
end | ||
|
||
def intervals? | ||
@pdf_ebook.intervals.count.positive? | ||
end | ||
|
||
def intervals | ||
@pdf_ebook.intervals.map { |interval| EPubIntervalPresenter.new(interval) } | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# frozen_string_literal: true | ||
|
||
module PDFEbook | ||
# | ||
# Logger | ||
# | ||
require 'logger' | ||
# mattr_accessor :logger | ||
@logger = Logger.new(STDOUT) | ||
|
||
def self.logger | ||
@logger | ||
end | ||
|
||
def self.logger=(logger) | ||
@logger = logger | ||
end | ||
|
||
# | ||
# Configure | ||
# | ||
@configured = false | ||
|
||
# spec helper | ||
def self.reset_configured_flag | ||
@configured = false | ||
end | ||
|
||
def self.configured? | ||
@configured | ||
end | ||
|
||
def self.configure | ||
@configured = true | ||
yield self | ||
end | ||
end | ||
|
||
# | ||
# Require Dependencies | ||
# | ||
require 'origami' | ||
|
||
# | ||
# Require Relative | ||
# | ||
require_relative './pdf_ebook/interval' | ||
require_relative './pdf_ebook/publication' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# frozen_string_literal: true | ||
|
||
module PDFEbook | ||
class Interval | ||
private_class_method :new | ||
|
||
# Class Methods | ||
def self.from_title_level_cfi(title, level, cfi) | ||
return null_object unless title&.instance_of?(String) && cfi&.instance_of?(String) | ||
new(title: title, depth: level, cfi: cfi) | ||
end | ||
|
||
def self.null_object | ||
IntervalNullObject.send(:new) | ||
end | ||
|
||
# Instance Methods | ||
|
||
def title | ||
@args[:title] || '' | ||
end | ||
|
||
def level | ||
@args[:depth] || 0 | ||
end | ||
|
||
def cfi | ||
@args[:cfi] || '' | ||
end | ||
|
||
def downloadable? | ||
false | ||
end | ||
|
||
def pages | ||
[] | ||
end | ||
|
||
def downloadable_pages | ||
[] | ||
end | ||
|
||
private | ||
|
||
def initialize(args) | ||
@args = args | ||
end | ||
end | ||
|
||
class IntervalNullObject < Interval | ||
private_class_method :new | ||
|
||
private | ||
|
||
def initialize | ||
super({}) | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
# frozen_string_literal: true | ||
|
||
module PDFEbook | ||
class Publication | ||
private_class_method :new | ||
attr_reader :id | ||
|
||
# Class Methods | ||
def self.from_string_id(string, id) | ||
file = StringIO.new(string) | ||
new(file, id) | ||
rescue StandardError => e | ||
::PDFEbook.logger.info("Publication.from_string_id(#{string[0..30]}) raised #{e} #{e.backtrace}") | ||
nil | ||
end | ||
|
||
def self.from_path_id(path, id) | ||
file = File.new(path) | ||
new(file, id) | ||
rescue StandardError => e | ||
::PDFEbook.logger.info("Publication.from_path_id(#{path}) raised #{e} #{e.backtrace}") | ||
nil | ||
end | ||
|
||
# Public method | ||
def intervals | ||
@intervals ||= extract_intervals | ||
end | ||
|
||
private | ||
|
||
def initialize(file, id) | ||
@pdf = Origami::PDF.read(file, verbosity: Origami::Parser::VERBOSE_QUIET) | ||
@id = id | ||
@obj_to_page = {} | ||
end | ||
|
||
def extract_intervals | ||
# Map of PDF page object number to 0-based linear page number | ||
if @obj_to_page.empty? | ||
@pdf.pages.each_with_index do |p, i| | ||
@obj_to_page[p.no] = i | ||
end | ||
end | ||
iterate_outlines(@pdf.Catalog.Outlines[:First]&.solve, 1) | ||
end | ||
|
||
# Takes Origami::OutlineItem and 1-based depth | ||
def iterate_outlines(outline, depth) | ||
intervals = [] | ||
until outline.nil? | ||
page = nil | ||
page = outline&.[](:A)&.solve&.[](:D)&.[](0)&.solve # Origami::Page | ||
page ||= outline[:Dest]&.solve&.[](0)&.solve | ||
unless page.nil? | ||
page_number = @obj_to_page[page.no] || 0 | ||
intervals << PDFEbook::Interval.from_title_level_cfi(outline[:Title].to_utf8, depth, "page=#{page_number}") | ||
end | ||
unless outline[:First]&.solve.nil? # Child outline | ||
intervals += iterate_outlines(outline[:First].solve, depth + 1) | ||
end | ||
outline = outline[:Next]&.solve | ||
end | ||
intervals | ||
end | ||
end | ||
end |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# frozen_string_literal: true | ||
|
||
RSpec.describe PDFEbook::Interval do | ||
describe '#new' do | ||
it { expect { is_expected }.to raise_error(NoMethodError) } | ||
end | ||
|
||
describe '#null_object' do | ||
subject { described_class.null_object } | ||
|
||
it { is_expected.to be_an_instance_of(PDFEbook::IntervalNullObject) } | ||
it { expect(subject.title).to be_empty } | ||
it { expect(subject.level).to be_zero } | ||
it { expect(subject.cfi).to be_empty } | ||
it { expect(subject.downloadable?).to be false } | ||
it { expect(subject.pages).to be_empty } | ||
end | ||
|
||
describe '#from_title_level_cfi' do | ||
subject { described_class.from_title_level_cfi(title, level, cfi) } | ||
|
||
let(:title) { double('title') } | ||
let(:level) { double('level') } | ||
let(:cfi) { double('cfi') } | ||
let(:interval) { double('interval', cfi: cfi, title: title) } | ||
|
||
it { is_expected.to be_an_instance_of(PDFEbook::IntervalNullObject) } | ||
|
||
context 'Strings' do | ||
before do | ||
allow(cfi).to receive(:instance_of?).with(String).and_return(true) | ||
allow(title).to receive(:instance_of?).with(String).and_return(true) | ||
end | ||
|
||
it { is_expected.to be_an_instance_of(described_class) } | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# frozen_string_literal: true | ||
|
||
RSpec.describe PDFEbook::Publication do | ||
describe '#new' do | ||
it { expect { is_expected }.to raise_error(NoMethodError) } | ||
end | ||
|
||
describe "with a test PDF" do | ||
context "using #from_path_id" do | ||
before do | ||
@noid = '99999999' | ||
@file = './spec/fixtures/fake_pdf01.pdf' | ||
end | ||
|
||
describe "#intervals" do | ||
subject { described_class.from_path_id(@file, @noid) } | ||
|
||
it { is_expected.to be_an_instance_of(described_class) } | ||
|
||
it "has 5 intervals" do | ||
expect(subject.intervals.count).to be 5 | ||
end | ||
|
||
describe "interval 1" do | ||
subject { described_class.from_path_id(@file, @noid).intervals[0] } | ||
|
||
it "has title Front Cover" do | ||
expect(subject.title).to eq "Front Cover" | ||
end | ||
it "has level 1" do | ||
expect(subject.level).to eq 1 | ||
end | ||
it "has the cfi of" do | ||
expect(subject.cfi).to eq 'page=0' | ||
end | ||
end | ||
|
||
describe "interval 4" do | ||
subject { described_class.from_path_id(@file, @noid).intervals[3] } | ||
|
||
it "has title Front Cover" do | ||
expect(subject.title).to eq "Section 2.1" | ||
end | ||
it "has level 2" do | ||
expect(subject.level).to eq 2 | ||
end | ||
it "has the cfi of" do | ||
expect(subject.cfi).to eq 'page=5' | ||
end | ||
end | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# frozen_string_literal: true | ||
|
||
RSpec.describe PDFEbook do | ||
describe '#logger' do | ||
it 'attribute getter' do | ||
expect { described_class.logger }.not_to raise_error | ||
end | ||
it 'attribute setter' do | ||
expect { described_class.logger = nil }.not_to raise_error | ||
end | ||
end | ||
|
||
describe '#configure' do | ||
before { described_class.reset_configured_flag } | ||
|
||
it 'setup block yields subject' do | ||
setup_config = nil | ||
described_class.configure do |config| | ||
setup_config = config | ||
end | ||
is_expected.to eq setup_config | ||
end | ||
|
||
it 'subject is configured' do | ||
described_class.configure do |config| | ||
end | ||
expect(subject.configured?).to be true | ||
end | ||
end | ||
end |
Oops, something went wrong.