Skip to content

Commit

Permalink
Merge bcd107e into 2893fee
Browse files Browse the repository at this point in the history
  • Loading branch information
gbp committed Feb 9, 2024
2 parents 2893fee + bcd107e commit 50897e4
Show file tree
Hide file tree
Showing 16 changed files with 209 additions and 25 deletions.
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ gem 'azure-storage', require: false
gem 'google-cloud-storage', '~> 1.47', require: false

# Storage content analyzers
gem 'excel_analyzer', path: 'gems/excel_analyzer'
gem 'excel_analyzer', path: 'gems/excel_analyzer', require: false

group :test do
gem 'fivemat', '~> 1.3.7'
Expand Down
2 changes: 2 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ PATH
specs:
excel_analyzer (0.0.1)
activestorage
mahoro
mail
rubyXL
rubyzip

Expand Down
7 changes: 7 additions & 0 deletions config/initializers/excel_analyzer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
require "excel_analyzer"

ExcelAnalyzer.on_spreadsheet_received = ->(raw_email_blob) do
incoming_message = IncomingMessage.joins(raw_email: :file_blob).
find_by(active_storage_blobs: { id: raw_email_blob })
incoming_message&.parse_raw_email!
end
1 change: 1 addition & 0 deletions doc/CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Highlighted Features

* Add automatic parsing of emails contain Excel spreadsheets (Graeme Porteous)
* Add admin list of all citations (Gareth Rees)
* Improve redirection flow after user account closure actions (Gareth Rees)
* Fix duplicated attachment masking jobs (Graeme Porteous)
Expand Down
2 changes: 2 additions & 0 deletions gems/excel_analyzer/excel_analyzer.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ Gem::Specification.new do |spec|
spec.add_dependency "activestorage"
spec.add_dependency "rubyXL"
spec.add_dependency "rubyzip"
spec.add_dependency "mail"
spec.add_dependency "mahoro"

spec.add_development_dependency "bundler"
spec.add_development_dependency "pry"
Expand Down
27 changes: 27 additions & 0 deletions gems/excel_analyzer/lib/excel_analyzer.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,30 @@
require "excel_analyzer/eml_analyzer"
require "excel_analyzer/xls_analyzer"
require "excel_analyzer/xlsx_analyzer"
require "excel_analyzer/railtie" if defined?(Rails)

##
# This module provides functionality to analyze Excel files, particularly to
# detect hidden data within spreadsheet attachments in emails. It supports .xls
# and .xlsx file formats.
module ExcelAnalyzer
# A configurable callable that gets executed when an email with a spreadsheet
# attachment is analyzed. This allows for custom handling of the spreadsheet
# data.
#
# @example Set a custom callable to handle received spreadsheets
# ExcelAnalyzer.on_spreadsheet_received = ->(blob) { process(blob) }
#
# @!attribute [rw] on_spreadsheet_received
# @return [Proc] the callable to run for spreadsheet attachments
mattr_accessor :on_spreadsheet_received, default: ->(blob) {}

# Provides the list of content types that the ExcelAnalyzer will attempt to
# analyze in search of hidden data. It currently includes content types for
# .xls and .xlsx files.
#
# @return [Array<String>] the list of supported spreadsheet content types
def self.content_types
[XlsAnalyzer::CONTENT_TYPE, XlsxAnalyzer::CONTENT_TYPE]
end
end
39 changes: 39 additions & 0 deletions gems/excel_analyzer/lib/excel_analyzer/eml_analyzer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
require "mail"

require "active_storage"
require "active_storage/analyzer"

require "mail_handler"

module ExcelAnalyzer
##
# The EmlAnalyzer class extends the ActiveStorage::Analyzer to define a custom
# analysis process for EML files. It checks for the presence of attachments
# with content types associated with spreadsheet formats and invokes a
# callback if necessary.
class EmlAnalyzer < ActiveStorage::Analyzer
CONTENT_TYPE = "message/rfc822"

def self.accept?(blob)
blob.content_type == CONTENT_TYPE
end

def metadata
download_blob_to_tempfile do |file|
mail = Mail.read(file.path)

content_types = MailHandler.get_attachment_attributes(mail).map do
_1[:content_type]
end

if content_types.any? { ExcelAnalyzer.content_types.include?(_1) }
# rubocop:disable Style/RescueModifier
ExcelAnalyzer.on_spreadsheet_received.call(blob) rescue nil
# rubocop:enable Style/RescueModifier
end
end

{}
end
end
end
1 change: 1 addition & 0 deletions gems/excel_analyzer/lib/excel_analyzer/railtie.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ module ExcelAnalyzer
# Analyzers.
#
class Railtie < Rails::Railtie
config.active_storage.analyzers.prepend ExcelAnalyzer::EmlAnalyzer
config.active_storage.analyzers.prepend ExcelAnalyzer::XlsxAnalyzer
config.active_storage.analyzers.prepend ExcelAnalyzer::XlsAnalyzer
end
Expand Down
85 changes: 85 additions & 0 deletions gems/excel_analyzer/spec/excel_analyzer/eml_analyzer_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# frozen_string_literal: true

require "spec_helper"
require_relative "../support/helpers"

RSpec.describe ExcelAnalyzer::EmlAnalyzer do
describe ".accept?" do
subject { ExcelAnalyzer::EmlAnalyzer.accept?(blob) }

context "when the blob is an email" do
let(:blob) { fake_blob(content_type: "message/rfc822") }
it { is_expected.to eq true }
end

context "when the blob is not an email" do
let(:blob) { fake_blob(content_type: "text/plain") }
it { is_expected.to eq false }
end
end

describe "#metadata" do
around do |example|
original_callback = ExcelAnalyzer.on_spreadsheet_received
ExcelAnalyzer.on_spreadsheet_received = ->(blob) {}
example.call
ExcelAnalyzer.on_spreadsheet_received = original_callback
end

let(:mail) do
Mail.new { add_file File.join(__dir__, "../fixtures/plain.txt") }
end

let(:io) { double(:File, path: "blob/path") }
let(:blob) { fake_blob(io: io, content_type: "message/rfc822") }

subject(:metadata) { ExcelAnalyzer::EmlAnalyzer.new(blob).metadata }

before { allow(Mail).to receive(:read).with("blob/path").and_return(mail) }

it { is_expected.to eq({}) }

context "when mail contains XLS attachment" do
let(:mail) do
Mail.new { add_file File.join(__dir__, "../fixtures/data.xls") }
end

it { is_expected.to eq({}) }

it "calls on_spreadsheet_received callback" do
expect(ExcelAnalyzer.on_spreadsheet_received).
to receive(:call).with(blob)
metadata
end
end

context "when mail contains XLSX attachment" do
let(:mail) do
Mail.new { add_file File.join(__dir__, "../fixtures/data.xlsx") }
end

it { is_expected.to eq({}) }
it "calls on_spreadsheet_received callback" do
expect(ExcelAnalyzer.on_spreadsheet_received).
to receive(:call).with(blob)
metadata
end
end

context "when mail contains XLS and XLSX attachment" do
let(:mail) do
Mail.new do
add_file File.join(__dir__, "../fixtures/data.xls")
add_file File.join(__dir__, "../fixtures/data.xlsx")
end
end

it { is_expected.to eq({}) }
it "calls on_spreadsheet_received callback once only" do
expect(ExcelAnalyzer.on_spreadsheet_received).
to receive(:call).with(blob).once
metadata
end
end
end
end
14 changes: 2 additions & 12 deletions gems/excel_analyzer/spec/excel_analyzer/xls_analyzer_spec.rb
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
# frozen_string_literal: true

require "spec_helper"
require_relative "../support/helpers"

RSpec.describe ExcelAnalyzer::XlsAnalyzer do
describe ".accept?" do
subject { ExcelAnalyzer::XlsAnalyzer.accept?(blob) }

context "when the blob is an Excel file" do
let(:blob) do
fake_blob(content_type: ExcelAnalyzer::XlsAnalyzer::CONTENT_TYPE)
end

let(:blob) { fake_blob(content_type: "application/vnd.ms-excel") }
it { is_expected.to eq true }
end

Expand Down Expand Up @@ -88,12 +86,4 @@
end
end
end

private

def fake_blob(io: nil, content_type:)
dbl = double(content_type: content_type)
allow(dbl).to receive(:open).and_yield(io)
dbl
end
end
14 changes: 4 additions & 10 deletions gems/excel_analyzer/spec/excel_analyzer/xlsx_analyzer_spec.rb
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
# frozen_string_literal: true

require "spec_helper"
require_relative "../support/helpers"

RSpec.describe ExcelAnalyzer::XlsxAnalyzer do
describe ".accept?" do
subject { ExcelAnalyzer::XlsxAnalyzer.accept?(blob) }

context "when the blob is an Excel file" do
let(:blob) do
fake_blob(content_type: ExcelAnalyzer::XlsxAnalyzer::CONTENT_TYPE)
let(:content_type) do
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
end

let(:blob) { fake_blob(content_type: content_type) }
it { is_expected.to eq true }
end

Expand Down Expand Up @@ -102,12 +104,4 @@
end
end
end

private

def fake_blob(io: nil, content_type:)
dbl = double(io: io, content_type: content_type)
allow(dbl).to receive(:open).and_yield(io)
dbl
end
end
9 changes: 7 additions & 2 deletions gems/excel_analyzer/spec/spec_helper.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# frozen_string_literal: true

require "bundler/setup"
require "excel_analyzer/xls_analyzer"
require "excel_analyzer/xlsx_analyzer"

RSpec.configure do |config|
# Enable flags like --only-failures and --next-failure
Expand All @@ -14,4 +12,11 @@
config.expect_with :rspec do |c|
c.syntax = :expect
end

config.libs = [
File.expand_path("../../../../lib/", __FILE__),
File.expand_path("../../../../app/helpers/", __FILE__)
]
end

require "excel_analyzer"
5 changes: 5 additions & 0 deletions gems/excel_analyzer/spec/support/helpers.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
def fake_blob(io: nil, content_type:)
dbl = double(io: io, content_type: content_type)
allow(dbl).to receive(:open).and_yield(io)
dbl
end
2 changes: 2 additions & 0 deletions lib/alaveteli_file_types.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
require "mahoro"

class AlaveteliFileTypes
# To add an image, create a file with appropriate name corresponding to the
# mime type in app/assets/images/content_type/ e.g. icon_image_tiff_large.png
Expand Down
2 changes: 2 additions & 0 deletions lib/mail_handler/backends/mail_backend.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
require 'mapi/msg'
require 'mapi/convert'
require 'config_helper'
require 'alaveteli_file_types'
require 'normalize_string'

module Mail
class Message
Expand Down
22 changes: 22 additions & 0 deletions spec/integration/incoming_mail_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -132,4 +132,26 @@
expect(page.response_headers['Content-Type']).to eq("application/octet-stream; charset=utf-8")
expect(page).to have_content "an unusual sort of file"
end

it "does not automatically extract attachments after receiving email" do
receive_incoming_mail('incoming-request-plain.email',
email_to: info_request.incoming_email)
perform_enqueued_jobs

im = info_request.incoming_messages.first
expect(im.foi_attachments).to be_empty
end

it "extract attachments when inbound email contains an Excel spreadsheet" do
mail = Mail.new(to: info_request.incoming_email) do
body 'My excel spreadsheet'
add_file 'gems/excel_analyzer/spec/fixtures/data.xlsx'
end

receive_incoming_mail(mail.to_s)
perform_enqueued_jobs

im = info_request.incoming_messages.first
expect(im.foi_attachments).to_not be_empty
end
end

0 comments on commit 50897e4

Please sign in to comment.