Skip to content

Commit

Permalink
Support extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
kou committed Jul 5, 2017
1 parent 3dbb310 commit 4925101
Show file tree
Hide file tree
Showing 13 changed files with 152 additions and 1 deletion.
6 changes: 6 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,9 @@ gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby]

gem 'bootstrap', github: 'twbs/bootstrap-rubygem'
gem 'jquery-rails'

gem 'chupa-text', path: '../chupa-text'
local_gemfile = "#{__dir__}/Gemfile.local"
if File.exist?(local_gemfile)
eval(File.read(local_gemfile), binding, local_gemfile, 1)
end
4 changes: 4 additions & 0 deletions Gemfile.local
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
gem 'chupa-text-decomposer-html'
gem 'chupa-text-decomposer-pdf'
gem 'chupa-text-decomposer-spreadsheet'
gem 'chupa-text-decomposer-libreoffice'
42 changes: 42 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ GIT
popper_js (>= 1.9.9, < 2.0)
sass (>= 3.4.19)

PATH
remote: ../chupa-text
specs:
chupa-text (1.0.6)

GEM
remote: https://rubygems.org/
specs:
Expand Down Expand Up @@ -55,6 +60,9 @@ GEM
bindex (0.5.0)
builder (3.2.3)
byebug (9.0.6)
cairo (1.15.9)
native-package-installer (>= 1.0.3)
pkg-config (>= 1.2.2)
capybara (2.14.4)
addressable
mime-types (>= 1.16)
Expand All @@ -64,6 +72,17 @@ GEM
xpath (~> 2.0)
childprocess (0.7.1)
ffi (~> 1.0, >= 1.0.11)
chupa-text-decomposer-html (1.0.1)
chupa-text
nokogiri
chupa-text-decomposer-libreoffice (1.0.0)
chupa-text-decomposer-pdf
chupa-text-decomposer-pdf (1.0.3)
chupa-text
poppler
chupa-text-decomposer-spreadsheet (1.0.0)
chupa-text
roo
coffee-rails (4.2.2)
coffee-script (>= 2.2.0)
railties (>= 4.0.0)
Expand All @@ -75,8 +94,18 @@ GEM
erubi (1.6.1)
execjs (2.7.0)
ffi (1.9.18)
gdk_pixbuf2 (3.1.6)
gio2 (= 3.1.6)
gio2 (3.1.6)
glib2 (= 3.1.6)
gobject-introspection (= 3.1.6)
glib2 (3.1.6)
native-package-installer (>= 1.0.3)
pkg-config (>= 1.2.2)
globalid (0.4.0)
activesupport (>= 4.2.0)
gobject-introspection (3.1.6)
glib2 (= 3.1.6)
i18n (0.8.4)
jbuilder (2.7.0)
activesupport (>= 4.2.0)
Expand All @@ -100,10 +129,15 @@ GEM
mini_portile2 (2.2.0)
minitest (5.10.2)
multi_json (1.12.1)
native-package-installer (1.0.4)
nio4r (2.1.0)
nokogiri (1.8.0)
mini_portile2 (~> 2.2.0)
pkg-config (1.2.3)
popper_js (1.9.9)
poppler (3.1.6)
cairo (>= 1.14.0)
gdk_pixbuf2 (= 3.1.6)
public_suffix (2.0.5)
puma (3.9.1)
rack (2.0.3)
Expand Down Expand Up @@ -136,6 +170,9 @@ GEM
rb-fsevent (0.10.2)
rb-inotify (0.9.10)
ffi (>= 0.5.0, < 2)
roo (2.7.1)
nokogiri (~> 1)
rubyzip (~> 1.1, < 2.0.0)
ruby_dep (1.5.0)
rubyzip (1.2.1)
sass (3.4.24)
Expand Down Expand Up @@ -188,6 +225,11 @@ DEPENDENCIES
bootstrap!
byebug
capybara (~> 2.13)
chupa-text!
chupa-text-decomposer-html
chupa-text-decomposer-libreoffice
chupa-text-decomposer-pdf
chupa-text-decomposer-spreadsheet
coffee-rails (~> 4.2)
jbuilder (~> 2.5)
jquery-rails
Expand Down
2 changes: 1 addition & 1 deletion app/controllers/application_controller.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
class ApplicationController < ActionController::Base
protect_from_forgery with: :exception
protect_from_forgery with: :exception, unless: -> {request.format.json?}
end
28 changes: 28 additions & 0 deletions app/controllers/extractions_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
class ExtractionsController < ApplicationController
# GET /extractions/new
def new
@extraction = Extraction.new
end

# POST /extractions
# POST /extractions.json
def create
@extraction = Extraction.new(extraction_params)

respond_to do |format|
if @extraction.valid?
format.html { render :create }
format.json { render json: @extraction.extract }
else
format.html { render :new }
format.json { render json: @extraction.errors, status: :unprocessable_entity }
end
end
end

private
# Never trust parameters from the scary internet, only allow the white list through.
def extraction_params
params.permit(:input)
end
end
2 changes: 2 additions & 0 deletions app/helpers/extractions_helper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
module ExtractionsHelper
end
31 changes: 31 additions & 0 deletions app/models/extraction.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
class Extraction
include ActiveModel::Model

attr_accessor :input
validates :input, presence: true

def persisted?
false
end

def id
nil
end

def extract
extractor = ChupaText::Extractor.new
configuration = ChupaText::Configuration.new
configuration_loader = ChupaText::ConfigurationLoader.new(configuration)
configuration_loader.load(Rails.root + "config" + "chupa-text.rb")
extractor.apply_configuration(configuration)

data = ChupaText::VirtualFileData.new(Pathname(@input.original_filename),
@input.to_io)
formatter = ChupaText::Formatters::Hash.new
formatter.format_start(data)
extractor.extract(data) do |extracted|
formatter.format_extracted(extracted)
end
formatter.format_finish(data)
end
end
22 changes: 22 additions & 0 deletions app/views/extractions/_form.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<%= form_with(model: extraction, local: true, url: extraction_path, method: :post) do |form| %>
<% if extraction.errors.any? %>
<div id="error_explanation">
<h2><%= pluralize(extraction.errors.count, "error") %> prohibited this extraction from being saved:</h2>

<ul>
<% extraction.errors.full_messages.each do |message| %>
<li><%= message %></li>
<% end %>
</ul>
</div>
<% end %>

<div class="field">
<%= form.label :input %>
<%= form.file_field :input, id: :extraction_input, name: :input %>
</div>

<div class="actions">
<%= form.submit "Extract" %>
</div>
<% end %>
5 changes: 5 additions & 0 deletions app/views/extractions/create.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<h1>Extraction</h1>

<%= render 'form', extraction: @extraction %>

<pre><%= JSON.pretty_generate(@extraction.extract) %></pre>
3 changes: 3 additions & 0 deletions app/views/extractions/new.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<h1>Extraction</h1>

<%= render 'form', extraction: @extraction %>
5 changes: 5 additions & 0 deletions config/chupa-text.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
decomposer.names = ["*"]

Mime::EXTENSION_LOOKUP.each do |extension, mime_type|
mime_types[extension] = mime_type.to_s
end
1 change: 1 addition & 0 deletions config/initializers/chupa_text.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ChupaText::Decomposers.load
2 changes: 2 additions & 0 deletions config/routes.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
Rails.application.routes.draw do
# For details on the DSL available within this file, see http://guides.rubyonrails.org/routing.html

resource :extraction, only: [:new, :create]
end

0 comments on commit 4925101

Please sign in to comment.