Skip to content

Commit

Permalink
Add helper to index companies with symbol
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinsawicki committed May 19, 2013
1 parent 7d0cde2 commit 69e9761
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 1 deletion.
4 changes: 3 additions & 1 deletion package.json
Expand Up @@ -51,6 +51,8 @@
"ftp": "~0.3.1",
"async": "~0.2.8",
"request": "~2.21.0",
"humanize-plus": "~1.1.0"
"humanize-plus": "~1.1.0",
"underscore": "~1.4.4",
"season": "~0.9.0"
}
}
46 changes: 46 additions & 0 deletions src/companies.coffee
@@ -0,0 +1,46 @@
async = require 'async'

readMasterIndex = (connection, year, quarter, callback) ->
connection.getGzip "/edgar/full-index/#{year}/QTR#{quarter}/master.gz", (error, data) =>
if error?
callback(error)
else
callback(null, data, year, quarter)

parseReportingCompanies = (quarters, reportName) ->
companies = []
for quarter in quarters
for line in quarter.split('\n')
segments = line.split('|')
continue unless segments.length is 5
continue if companies[segments[0]]? # Ignore companies already parsed
continue unless segments[2] is reportName
continue unless /^\d+$/.test(segments[0]) # CIK must be all digits

cik = segments[0]
name = segments[1].trim()
companies.push({cik, name})
companies

module.exports =
fetch: (connection, callback) ->
year = "#{new Date().getFullYear() - 1}"
quarterIndices = []
processQuarter = (quarter, callback) ->
readMasterIndex connection, year, quarter, (error, data, year, quarter) ->
if error?
callback(error)
else
quarterIndices[quarter - 1] = data
callback()

operations = []
[1..4].forEach (quarter) ->
operations.push (callback) -> processQuarter(quarter, callback)
async.waterfall operations, (error) ->
if error?
callback(error)
else
quarterIndices.reverse()
companies = parseReportingCompanies(quarterIndices, '10-K')
callback(null, companies)
70 changes: 70 additions & 0 deletions src/create-index.coffee
@@ -0,0 +1,70 @@
path = require 'path'
fs = require 'fs'
async = require 'async'
_ = require 'underscore'
CSON = require 'season'
{Connection, Companies} = require './filings'

simplifyCompanyName = (name) ->
name.toLowerCase().replace(/[,.]|( inc(orporated)?)|( corp(oration)?)/gi, '').trim()

parseSymbolLine = (line) ->
segments = line.split('"')
segments = _.reject segments, (segment) ->
segment = segment.trim()
not segment or segment is ',' or segment is '"'
symbol = segments[0]?.trim()
return unless symbol
return if symbol.indexOf('/') isnt -1
return if symbol.indexOf('^') isnt -1
name = segments[1]?.trim()
cap = parseFloat(segments[3]) or -1
return {name, symbol, cap} if name

buildSymbolIndex = (callback) ->
indexCompanies = []
queue = async.queue (name, callback) ->
indexPath = path.resolve(__dirname, '..', "#{name}.csv")
fs.readFile indexPath, 'utf8', (error, contents) ->
if error?
console.error(error)
callback(error)
else
lines = contents.split('\n')
lines.shift() # First line contains information about fields
for line in lines
company = parseSymbolLine(line)
indexCompanies.push(company) if company
callback()

queue.push('amex')
queue.push('nasdaq')
queue.push('nyse')
queue.drain = -> callback(indexCompanies)

Connection.open (connection) ->
Companies.fetch connection, (error, companies) ->
connection.close()
if error?
console.error(error)
else
companies = _.uniq companies, (company) -> company.cik
buildSymbolIndex (indexCompanies) ->
companiesWithSymbols = []
for company in companies
for indexCompany in indexCompanies
companyName = simplifyCompanyName(company.name)
indexCompanyName = simplifyCompanyName(indexCompany.name)
if companyName is indexCompanyName
company.symbol = indexCompany.symbol
company.cap = indexCompany.cap
companiesWithSymbols.push(company)

console.log 'Companies that filed a 10-K:', companies.length
console.log 'Companies on the NASDAQ, NYSE, and AMEX:', indexCompanies.length
console.log 'Companies matched to their symbol:', companiesWithSymbols.length
companiesWithSymbols.sort (company1, company2) ->
return -1 if company1.symbol < company2.symbol
return 1 if company1.symbol > company2.symbol
0
CSON.writeFile(path.join(process.cwd(), 'companies.json'), companiesWithSymbols)

0 comments on commit 69e9761

Please sign in to comment.