Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

executable file 242 lines (217 sloc) 7.501 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
#!/usr/bin/env ruby

# '$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" $request_time';

require 'rubygems'
require 'thor'
require 'fastercsv'
require 'zlib'
require 'pp'

module RunCombinedParser

  FORMAT_KEYS = [
                 :remote_addr,
                 :remote_user,
                 :time_local,
                 :request,
                 :status,
                 :body_bytes_sent,
                 :http_referer,
                 :http_user_agent,
                 :request_time
                 ]

  MIN_KEYS = [
              :request_time,
              :time_local,
              :status,
              :body_bytes_sent
              ]

  SORTABLE_KEYS = [
                 :request_time,
                 :time_local,
                 :status,
                 :body_bytes_sent,
                 :remote_addr,
                 :http_referer,
                 :http_user_agent,
                 :request,
                 :remote_user
                 ]
                 
  KEY_LIST_NAMES = [:FORMAT_KEYS, :MIN_KEYS, :SORTABLE_KEYS]
  KEY_LIST = [FORMAT_KEYS, MIN_KEYS, SORTABLE_KEYS]

  class Cli <Thor
    class_option :files,
    :type => :array,
    :desc => "One or more nginx log files to parse",
    :aliases => "-f"

    desc "parse", "Parse the specified log file and output as tab or comma delimited"
    method_option :no_headers,
    :type => :boolean,
    :default => false,
    :desc => "Will not display headers"
    method_option :select_keys,
    :type => :string,
    :desc => "Select which keys to use for display. #{KEY_LIST_NAMES.collect {|n| n.to_s}.join(",")}",
    :default => "SORTABLE_KEYS"
    method_option :status,
    :type => :boolean,
    :desc => "Send line number and other status to STDERR to show progress during the parsing of files",
    :default => false
    method_option :tab_separated,
    :type => :boolean,
    :desc => "Use tabs to separate the field of the output",
    :aliases => "-t"
    method_option :comma_separated,
    :type => :boolean,
    :desc => "Use commas to separate the field of the output",
    :aliases => "-c"
    def parse
      unless (selected_keys = key_by_string(options.select_keys))
        STDERR.puts "Invalid select_keys: options.select_keys.inspect"
        exit(-1)
      end
      
      case
      when options[:tab_separated]
        col_sep = "\t"
      when options[:comma_separated]
        col_sep = ","
      else
        col_sep = "\t"
      end
      
      opts = options.merge({:selected_keys => selected_keys, :col_sep => col_sep})
      opts.files.each do |file|
        Process.parse_file file, opts
      end
    end
    
    desc "histogram", "Generate a histogram of a tsv file generated by parse. Must have headers or supply the keys to the file"
    def histogram
      options[:files].each do |file|
        Process.histogram(file, options)
      end
    end

    no_tasks do
      def key_by_string(str)
        case str
        when "MIN_KEYS"
          MIN_KEYS
        when "FORMAT_KEYS"
          FORMAT_KEYS
        when "SORTABLE_KEYS"
          SORTABLE_KEYS
        else
          nil
        end
      end
    end
    
    desc "show_keys", "Show the Keys available"
    def show_keys
      KEY_LIST.each_with_index {|keys, i| puts "#{KEY_LIST_NAMES[i]}: #{keys.join(', ')}"}
    end
  end

  class Process
    ##
    # Basic parse of file into an array of hashes
    # @param [String] file_name
    #
    def self.parse_file(file_name, opts)
      pat = /(\S+)\s+-\s+(\S+)\s+\[(.+)\]\s+"(.*?)"\s+(\d+)\s+(\d+)\s+"(.*?)"\s+"(.*?)"\s+([\d.]+)/
      
      selected_keys = opts.selected_keys
      col_sep = opts.col_sep
      puts selected_keys.collect { |k| k.to_s}.join(col_sep) unless opts[:no_headers]
      indices = selected_keys.collect {|key| FORMAT_KEYS.find_index(key) }
      
      line_num = 0
      io = gzipped?(file_name) ? Zlib::GzipReader : File
      
      io.open(file_name) do |f|
        f.each_line do |line|
          STDERR.print "\r#{line_num}" if opts.status && line_num % 100
          
          if (m = pat.match(line))
            row = ""
            indices.each { |i| row += m[i+1] + col_sep }
            puts row.chop
          else
            STDERR.puts "Invalid line ##{line_num}: #{line}"
          end
          line_num += 1
        end
      end
    end
    
    ##
    # Generate a histogram of tsv data
    # @param [String] file_name Name of the tsv input file
    # @param [String] prefix Prefix to prepend to output files. It can be a path But path must exist
    # @param [DateTime] Used to generate timestamp. Defaults to Now
    # @param [Hash] opts
    #
    def self.histogram(file_name, opts)
      integer_hist = Hash.new(0)
      fractional_hist = Hash.new(0)
      line_num = 0
      io = gzipped?(file_name) ? Zlib::GzipReader : File
      
      io.open(file_name) do |f|
        unless opts[:keys]
          header = f.gets
          keys = header.split.collect {|k| k.to_sym}
        end
        f.each_line do |line|
          i = 0
          row = line.split.inject({}) do |result, item|
            key = keys[i]
            result[key] = (key == :request_time) ? item.to_f : item
            i += 1
            result
          end
          
          if row[:request_time] >= 0.0
            case row[:request_time].to_i
            when 0..1 then (integer_hist['0_1'] += 1)
            when 1..3 then (integer_hist['1_3'] += 1)
            when 4..9 then (integer_hist['4_9'] += 1)
            when 10..19 then (integer_hist['10_19'] += 1)
            when 20..29 then (integer_hist['20_29'] += 1)
            when 30..39 then (integer_hist['30_39'] += 1)
            when 40..49 then (integer_hist['40_49'] += 1)
            when 50..59 then (integer_hist['50_59'] += 1)
            when 60..69 then (integer_hist['60_69'] += 1)
            else
              integer_hist['unknown'] += 1
              STDERR.puts "Row with nknown Integer request_time: #{row.inspect}"
            end
          elsif row[:request_time] < 0.0
            case (row[:request_time] * 1000.0).to_i
            when 0..1 then (fractional_hist['000_010'] += 1)
            when 1..9 then (fractional_hist['001_009'] += 1)
            when 10..99 then (fractional_hist['010_099'] += 1)
            when 100..199 then (fractional_hist['010_199'] += 1)
            when 200..999 then (fractional_hist['100_999'] += 1)
            else
              fractional_hist['unknown'] += 1
              STDERR.puts "Row with nknown Fractional request_time: #{row.inspect}"
            end
          else
            STDERR.puts "Invalid row[:request_time]: #{row[:request_time].inspect}"
          end
        end
      end
      fkeys = fractional_hist.keys.sort
      ikeys = integer_hist.keys.sort
      fkeys.each do |k|
        puts "#{k}:\t#{fractional_hist[k]}"
      end
      ikeys.each do |k|
        puts "#{k}:\t#{integer_hist[k]}"
      end
    end
    
    ##
    # Helper to test if a file_name is for a gzipped file
    # @param [String] file_name Name of file to test
    # @retunr [Boolean] true if the file is gzipped
    #
    def self.gzipped?(file_name)
      begin
        z = Zlib::GzipReader.open(file_name)
      rescue Zlib::GzipFile::Error => e
        return false if e.class == Zlib::GzipFile::Error && e.message =~ /not in gzip format/
      end
      return true
    end
  end
end

if __FILE__ == $0
  RunCombinedParser::Cli.start
end
Something went wrong with that request. Please try again.