Skip to content

Commit

Permalink
Include Canada Mexico spreadsheet
Browse files Browse the repository at this point in the history
  • Loading branch information
tmhammer committed Jul 18, 2016
1 parent 9490028 commit 6e95733
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 24 deletions.
42 changes: 42 additions & 0 deletions canada_mexico_parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
require 'roo'
require 'roo-xls'
require 'open-uri'

class CanadaMexicoParser

def self.parse(path)
@path = path
@spreadsheet = Roo::Spreadsheet.open(@path)
@spreadsheet.parse(clean: true)

headers = {} #Get valid headers (years) from 4th row:
@spreadsheet.row(4).each_with_index{ |header, i| headers[header] = i unless header.nil? }

data = []
data.concat(transform_rows(headers, 'Canada', 574))
data.concat(transform_rows(headers, 'Mexico', 582))

return data
end

def self.transform_rows(headers, country, code)
transformed_rows = []
# Iterate over rows we need:
(5..19).each do |row_num|
# Only look at row if it starts with a month:
if Date::MONTHNAMES.include?(@spreadsheet.sheet(country).row(row_num)[0])
month = @spreadsheet.sheet(country).row(row_num)[0]
# Retrieve amount for each year across row:
headers.each do |k, v|
date = Date.new(k.to_i, Date::MONTHNAMES.index(month), 1)
date_str = date.strftime("%Y-%m")
amount = @spreadsheet.sheet('Canada').row(row_num)[v]
transformed_rows.push({ date: date_str, i94_code: code, country: country, amount: amount })
end
end
end

return transformed_rows
end

end
2 changes: 0 additions & 2 deletions excel_parser.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
require 'roo'
require 'roo-xls'
require 'open-uri'
require 'csv'
require 'yaml'

class ExcelParser

Expand Down
25 changes: 3 additions & 22 deletions scraper.rb
Original file line number Diff line number Diff line change
@@ -1,39 +1,20 @@
require './dropbox_client'
require './excel_parser'
require './canada_mexico_parser'
require 'scraperwiki'

client = DropboxClientWrapper.new
file_paths = client.get_file_paths

#File.open('countries.yaml', 'w'){|f| }
countries = {}

data = []
file_paths.each do |path|
client.download_file(path)
path.gsub!('/', '')

year = path.split('.')[0]
countries[year] = {}

new_data = ExcelParser.parse(path)
new_data.each do |row|
countries[year][row[:i94_code].to_i.to_s] = row[:country]
end
new_data = path == 'canada_mexico.xlsx' ? CanadaMexicoParser.parse(path) : ExcelParser.parse(path)

data.concat new_data
end

#File.open('countries.yaml', 'a'){ |f| f.write(countries.to_yaml) }

# # Write out to the sqlite database using scraperwiki library
ScraperWiki.save_sqlite([:date, :i94_code, :country], data)
#
# # An arbitrary query against the database
# ScraperWiki.select("* from data where 'name'='peter'")

# You don't have to do things with the Mechanize or ScraperWiki libraries.
# You can use whatever gems you want: https://morph.io/documentation/ruby
# All that matters is that your final data is written to an SQLite database
# called "data.sqlite" in the current working directory which has at least a table
# called "data".
ScraperWiki.save_sqlite([:date, :i94_code, :country], data)

0 comments on commit 6e95733

Please sign in to comment.