diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e30e8f0642..7b1b2d8f7f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,7 @@ * `Style/ConditionalAssignment` doesn't crash if it finds a `case` with an empty branch. ([@lumeet][]) * [#2506](https://github.com/bbatsov/rubocop/issues/2506): `Lint/FormatParameterMismatch` understands %{} and %<> interpolations. ([@alexdowad][]) * [#2145](https://github.com/bbatsov/rubocop/issues/2145): `Lint/ParenthesesAsGroupedExpression` ignores calls with multiple arguments, since they are not ambiguous. ([@alexdowad][]) +* [#2484](https://github.com/bbatsov/rubocop/issues/2484): Remove two vulnerabilities in cache handling. ([@jonas054][]) ### Changes diff --git a/lib/rubocop.rb b/lib/rubocop.rb index 810285207af..5e3368fc4b2 100644 --- a/lib/rubocop.rb +++ b/lib/rubocop.rb @@ -352,6 +352,7 @@ require 'rubocop/formatter/offense_count_formatter' require 'rubocop/formatter/formatter_set' +require 'rubocop/cached_data' require 'rubocop/config' require 'rubocop/config_loader' require 'rubocop/config_store' diff --git a/lib/rubocop/cached_data.rb b/lib/rubocop/cached_data.rb new file mode 100644 index 00000000000..945de06cd92 --- /dev/null +++ b/lib/rubocop/cached_data.rb @@ -0,0 +1,86 @@ +# encoding: utf-8 + +module RuboCop + # Converts RuboCop objects to and from the serialization format JSON. + class CachedData + def initialize(filename) + @filename = filename + end + + def from_json(text) + offenses, disabled_line_ranges, comments = JSON.load(text) + deserialize_offenses(offenses) + deserialize_disabled_line_ranges(disabled_line_ranges) + deserialize_comments(comments) + [offenses, disabled_line_ranges, comments] + end + + def to_json(offenses, disabled_line_ranges, comments) + comments ||= [] + JSON.dump([offenses.map { |o| serialize_offense(o) }, + disabled_line_ranges, + comments.map { |c| serialize_comment(c) }]) + end + + private + + # Return a representation of a comment suitable for storing in JSON format. + def serialize_comment(comment) + expr = comment.loc.expression + expr.begin_pos...expr.end_pos + end + + def serialize_offense(offense) + { + severity: offense.severity, + location: { + begin_pos: offense.location.begin_pos, + end_pos: offense.location.end_pos + }, + message: offense.message, + cop_name: offense.cop_name, + status: offense.status + } + end + + # Restore an offense object loaded from a JSON file. + def deserialize_offenses(offenses) + offenses.map! do |o| + source_buffer = Parser::Source::Buffer.new(@filename) + source_buffer.read + location = Parser::Source::Range.new(source_buffer, + o['location']['begin_pos'], + o['location']['end_pos']) + Cop::Offense.new(o['severity'], location, o['message'], o['cop_name'], + o['status'].to_sym) + end + end + + def deserialize_disabled_line_ranges(disabled_line_ranges) + disabled_line_ranges.each do |cop_name, line_ranges| + disabled_line_ranges[cop_name] = line_ranges.map do |line_range| + case line_range + when /(\d+)\.\.(\d+)/ + Regexp.last_match(1).to_i..Regexp.last_match(2).to_i + when /(\d+)\.\.Infinity/ + Regexp.last_match(1).to_i..Float::INFINITY + else + fail "Unknown range: #{line_range}" + end + end + end + end + + def deserialize_comments(comments) + comments.map! do |c| + source_buffer = Parser::Source::Buffer.new(@filename) + source_buffer.read + c =~ /(\d+)\.\.\.(\d+)/ + range = Parser::Source::Range.new(source_buffer, + Regexp.last_match(1).to_i, + Regexp.last_match(2).to_i) + Parser::Source::Comment.new(range) + end + end + end +end diff --git a/lib/rubocop/cop/lint/syntax.rb b/lib/rubocop/cop/lint/syntax.rb index 5fe92e024e3..be2336abc8a 100644 --- a/lib/rubocop/cop/lint/syntax.rb +++ b/lib/rubocop/cop/lint/syntax.rb @@ -6,10 +6,11 @@ module Lint # This is actually not a cop and inspects nothing. It just provides # methods to repack Parser's diagnostics/errors into RuboCop's offenses. module Syntax - PseudoSourceRange = Struct.new(:line, :column, :source_line) + PseudoSourceRange = Struct.new(:line, :column, :source_line, :begin_pos, + :end_pos) COP_NAME = 'Syntax'.freeze - ERROR_SOURCE_RANGE = PseudoSourceRange.new(1, 0, '').freeze + ERROR_SOURCE_RANGE = PseudoSourceRange.new(1, 0, '', 0, 1).freeze def self.offenses_from_processed_source(processed_source) offenses = [] diff --git a/lib/rubocop/cop/offense.rb b/lib/rubocop/cop/offense.rb index 2b7bfafccf8..acf673e15cd 100644 --- a/lib/rubocop/cop/offense.rb +++ b/lib/rubocop/cop/offense.rb @@ -89,6 +89,9 @@ def to_s severity.code, line, real_column, message) end + # @api private + attr_reader :status + # @api private def line location.line diff --git a/lib/rubocop/result_cache.rb b/lib/rubocop/result_cache.rb index 7317d2db85d..3d000debf99 100644 --- a/lib/rubocop/result_cache.rb +++ b/lib/rubocop/result_cache.rb @@ -15,6 +15,7 @@ def initialize(file, options, config_store, cache_root = nil) @path = File.join(cache_root, rubocop_checksum, RUBY_VERSION, relevant_options(options), file_checksum(file, config_store)) + @cached_data = CachedData.new(file) end def valid? @@ -22,18 +23,20 @@ def valid? end def load - Marshal.load(IO.binread(@path)) + @cached_data.from_json(IO.binread(@path)) end def save(offenses, disabled_line_ranges, comments) - FileUtils.mkdir_p(File.dirname(@path)) + dir = File.dirname(@path) + FileUtils.mkdir_p(dir) preliminary_path = "#{@path}_#{rand(1_000_000_000)}" + # RuboCop must be in control of where its cached data is stored. A + # symbolic link anywhere in the cache directory tree is an indication + # that a symlink attack is being waged. + return if any_symlink?(dir) + File.open(preliminary_path, 'wb') do |f| - # The Hash[x.sort] call is a trick that converts a Hash with a default - # block to a Hash without a default block. Thus making it possible to - # dump. - f.write(Marshal.dump([offenses, Hash[disabled_line_ranges.sort], - comments])) + f.write(@cached_data.to_json(offenses, disabled_line_ranges, comments)) end # The preliminary path is used so that if there are multiple RuboCop # processes trying to save data for the same inspected file @@ -77,6 +80,17 @@ def self.cleanup(config_store, verbose, cache_root = nil) private + def any_symlink?(path) + while path != File.dirname(path) + if File.symlink?(path) + warn "Warning: #{path} is a symlink, which is not allowed." + return true + end + path = File.dirname(path) + end + false + end + def self.cache_root(config_store) root = config_store.for('.')['AllCops']['CacheRootDirectory'] root = File.join(Dir.tmpdir, Process.uid.to_s) if root == '/tmp' diff --git a/spec/rubocop/result_cache_spec.rb b/spec/rubocop/result_cache_spec.rb index 21fb7eeb195..84c2dbcab28 100644 --- a/spec/rubocop/result_cache_spec.rb +++ b/spec/rubocop/result_cache_spec.rb @@ -3,8 +3,6 @@ require 'spec_helper' describe RuboCop::ResultCache, :isolated_environment do - Loc = Struct.new(:column, :line) - include FileHelper subject(:cache) do @@ -19,15 +17,31 @@ 'Lint/UselessAssignment')] end let(:disabled_lines) { { 'Metrics/LineLength' => [4..5] } } - let(:comments) { ['# Hello'] } - let(:location) { Loc.new(3, 2) } + let(:encoding_comment) { '' } + let(:comment_text) { '# Hello' } + let(:comments) do + source_buffer = Parser::Source::Buffer.new(file) + source_buffer.source = encoding_comment + comment_text + range = Parser::Source::Range.new(source_buffer, + 0, + source_buffer.source.length) + [ + Parser::Source::Comment.new(range) + ] + end + let(:location) do + source_buffer = Parser::Source::Buffer.new(file) + source_buffer.read + Parser::Source::Range.new(source_buffer, 0, 2) + end def abs(path) File.expand_path(path) end before do - create_file('example.rb', ['x = 1']) + create_file('example.rb', ['# Hello', + 'x = 1']) allow(config_store).to receive(:for).with('example.rb').and_return({}) end @@ -40,7 +54,18 @@ def abs(path) saved_offenses, saved_disabled_lines, saved_comments = cache2.load expect(saved_offenses).to eq(offenses) expect(saved_disabled_lines).to eq(disabled_lines) - expect(saved_comments).to eq(comments) + # The new Comment objects that have been created from cached data are + # equivalent to the saved ones, but comparing them with the == operator + # still gives false. That's because they refer to locations in + # different source buffers. So we compare them attribute by attribute. + expect(saved_comments.size).to eq(comments.size) + comments.each_index do |ix| + c1 = comments[ix] + c2 = saved_comments[ix] + expect(c2.text).to eq(c1.text) + expect(c2.loc.expression.begin_pos).to eq(c1.loc.expression.begin_pos) + expect(c2.loc.expression.end_pos).to eq(c1.loc.expression.end_pos) + end end end @@ -56,6 +81,29 @@ def abs(path) expect(cache2.valid?).to eq(false) end end + + context 'when a symlink attack is made' do + before(:each) do + cache.save(offenses, disabled_lines, comments) + Find.find(cache_root) do |path| + next unless File.basename(path) == '_' + FileUtils.rm_rf(path) + FileUtils.ln_s('/bin', path) + end + $stderr = StringIO.new + end + after(:each) { $stderr = STDERR } + + it 'is stopped' do + cache2 = described_class.new(file, options, config_store, cache_root) + cache2.save(offenses, disabled_lines, comments) + # The cache file has not been created because there was a symlink in + # its path. + expect(cache2.valid?).to eq(false) + expect($stderr.string) + .to match(/Warning: .* is a symlink, which is not allowed.\n/) + end + end end context 'when --format is given' do @@ -84,7 +132,8 @@ def abs(path) describe '#save' do context 'when the default internal encoding is UTF-8' do - let(:comments) { ["# Hello \xF0"] } + let(:encoding_comment) { "# encoding: iso-8859-1\n" } + let(:comment_text) { "# Hello \xF0" } before(:each) { Encoding.default_internal = Encoding::UTF_8 } after(:each) { Encoding.default_internal = nil }