Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FeedVersion: Attach feedvalidator.py output #872

Merged
merged 11 commits into from
Dec 7, 2016
2 changes: 1 addition & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ GEM
vcr (3.0.3)
warden (1.2.6)
rack (>= 1.0)
webmock (2.3.0)
webmock (2.3.1)
addressable (>= 2.3.6)
crack (>= 0.3.2)
hashdiff
Expand Down
9 changes: 9 additions & 0 deletions app/models/feed_version.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# file_raw :string
# sha1_raw :string
# md5_raw :string
# file_feedvalidator :string
#
# Indexes
#
Expand All @@ -42,6 +43,7 @@ class FeedVersion < ActiveRecord::Base

mount_uploader :file, FeedVersionUploader
mount_uploader :file_raw, FeedVersionUploaderRaw
mount_uploader :file_feedvalidator, FeedVersionUploaderFeedvalidator

validates :sha1, presence: true, uniqueness: true
validates :feed, presence: true
Expand Down Expand Up @@ -120,6 +122,13 @@ def download_url
end
end

def feedvalidator_url
if self.try(:file).try(:url)
# we don't want to include any query parameters
self.file_feedvalidator.url.split('?').first
end
end

private

def compute_and_set_hashes
Expand Down
4 changes: 3 additions & 1 deletion app/serializers/feed_version_serializer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# file_raw :string
# sha1_raw :string
# md5_raw :string
# file_feedvalidator :string
#
# Indexes
#
Expand All @@ -44,7 +45,8 @@ class FeedVersionSerializer < ApplicationSerializer
:import_level,
:is_active_feed_version,
:changesets_imported_from_this_feed_version,
:download_url
:download_url,
:feedvalidator_url

def feed_version_imports
object.feed_version_imports.map(&:id)
Expand Down
27 changes: 27 additions & 0 deletions app/services/feed_fetcher_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ class FeedFetcherService

REFETCH_WAIT = 24.hours
SPLIT_REFETCH_INTO_GROUPS = 48 # and only refetch the first group
FEEDVALIDATOR_PATH = './virtualenv/bin/feedvalidator.py'

def self.fetch_this_feed_now(feed)
sync_fetch_and_return_feed_versions([feed])
Expand Down Expand Up @@ -81,6 +82,28 @@ def self.url_fragment(url)
(url || "").partition("#").last.presence
end

def self.run_google_feedvalidator(filename)
# Validate
return unless (Figaro.env.run_google_feedvalidator.present? && Figaro.env.run_google_feedvalidator == 'true')
# Create a tempfile to use the filename.
outfile = nil
Tempfile.open(['feedvalidator', '.html']) do |tmpfile|
outfile = tmpfile.path
end
# Run feedvalidator
feedvalidator_output = IO.popen([
FEEDVALIDATOR_PATH,
'-n',
'-o',
outfile,
filename
]).read
# Unlink temporary file
file_feedvalidator = File.open(outfile)
File.unlink(outfile) if File.exists?(outfile)
file_feedvalidator
end

def self.fetch_and_normalize_feed_version(feed)
gtfs = GTFS::Source.build(
feed.url,
Expand All @@ -105,6 +128,9 @@ def self.fetch_and_normalize_feed_version(feed)
gtfs_file = File.open(gtfs.archive)
sha1 = Digest::SHA1.file(gtfs_file).hexdigest
end

file_feedvalidator = run_google_feedvalidator(gtfs_file.path)

# Create a new FeedVersion
feed_version = FeedVersion.find_by(sha1: sha1)
if !feed_version
Expand All @@ -113,6 +139,7 @@ def self.fetch_and_normalize_feed_version(feed)
url: feed.url,
file: gtfs_file,
file_raw: gtfs_file_raw,
file_feedvalidator: file_feedvalidator,
fetched_at: DateTime.now
}
data = data.merge!(read_gtfs_info(gtfs))
Expand Down
10 changes: 10 additions & 0 deletions app/uploaders/feed_version_uploader_feedvalidator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class FeedVersionUploaderFeedvalidator < FeedVersionUploader
def filename
return unless file
"#{model.feed.onestop_id}-#{model.sha1}-feedvalidator.#{file.extension}"
end

def extension_white_list
%w(html)
end
end
17 changes: 0 additions & 17 deletions app/workers/feed_eater_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ class FeedEaterWorker
queue: :feed_eater,
retry: false

FEEDVALIDATOR_PATH = './virtualenv/bin/feedvalidator.py'

def perform(feed_onestop_id, feed_version_sha1=nil, import_level=0)
feed = Feed.find_by!(onestop_id: feed_onestop_id)

Expand All @@ -23,21 +21,6 @@ def perform(feed_onestop_id, feed_version_sha1=nil, import_level=0)
import_level: import_level
)

# Validate
# make sure to have local copy of file
feed_file_path = feed_version.file.local_path_copying_locally_if_needed
unless Figaro.env.run_google_feedvalidator.present? &&
Figaro.env.run_google_feedvalidator == 'false'
logger.info "FeedEaterWorker #{feed_onestop_id}: Validating feed"
validation_report = IO.popen([
FEEDVALIDATOR_PATH,
'-n',
'--output=CONSOLE',
feed_file_path
]).read
feed_version_import.update(validation_report: validation_report)
end

# Import feed
graph = nil
begin
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class AddFeedvalidatorOutputToFeedVersion < ActiveRecord::Migration
def change
add_column :feed_versions, :file_feedvalidator, :string
end
end
3 changes: 2 additions & 1 deletion db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema.define(version: 20161129205145) do
ActiveRecord::Schema.define(version: 20161207070207) do

# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
Expand Down Expand Up @@ -364,6 +364,7 @@
t.string "file_raw"
t.string "sha1_raw"
t.string "md5_raw"
t.string "file_feedvalidator"
end

add_index "feed_versions", ["earliest_calendar_date"], name: "index_feed_versions_on_earliest_calendar_date", using: :btree
Expand Down