Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
Fetching contributors…

Cannot retrieve contributors at this time

file 61 lines (48 sloc) 1.354 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
#!/usr/bin/env ruby

require 'json'
require 'date'

def extract_post_info(line)
  match = line.match(/\sId="(.*?")/)
  unless match
    return nil
  end

  post_id = match[1].chomp('"')

  owner_match = line.match(/OwnerUserId="(.*?")/)
  owner_id = owner_match ? match[1].chomp('"') : ""

  title_match = line.match(/Title=(".*?")/)
  body_match = line.match(/Body="(.*?")/)

  title = title_match ? title_match[1] : ""
  body = body_match ? body_match[1] : ""

  tags_match = line.match(/Tags=(".*?")/)
  if tags_match
    tags = tags_match[0].downcase
    tags = tags.split(">").map {|s| s.gsub(/.*\&lt\;/, '')}
    tags = tags.slice(0, tags.length - 2)
  else
    tags = []
  end

  creation_date_match = line.match(/\sCreationDate="(.*?")/)

  if creation_date_match
    creation_string = creation_date_match[1].chomp('"')
    created_date = DateTime.parse(creation_string).to_time
  else
    created_date =Time.now
  end

  post_type = is_question?(line) ? "1" : "2"

  {
    :id => post_id,
    :title => title,
    :body => body,
    :post_type => post_type,
    :owner_id => owner_id,
    :tags => tags.join(","),
    :created_date => created_date
  }
end

def is_question?(string)
  string.index('PostTypeId="1"')
end

STDIN.each do |line|
  post = extract_post_info(line)
  puts "#{rand(10)}\t#{post.to_json}" if post
end
Something went wrong with that request. Please try again.