Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add helper to index companies with symbol
- Loading branch information
1 parent
7d0cde2
commit 69e9761
Showing
3 changed files
with
119 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
async = require 'async' | ||
|
||
readMasterIndex = (connection, year, quarter, callback) -> | ||
connection.getGzip "/edgar/full-index/#{year}/QTR#{quarter}/master.gz", (error, data) => | ||
if error? | ||
callback(error) | ||
else | ||
callback(null, data, year, quarter) | ||
|
||
parseReportingCompanies = (quarters, reportName) -> | ||
companies = [] | ||
for quarter in quarters | ||
for line in quarter.split('\n') | ||
segments = line.split('|') | ||
continue unless segments.length is 5 | ||
continue if companies[segments[0]]? # Ignore companies already parsed | ||
continue unless segments[2] is reportName | ||
continue unless /^\d+$/.test(segments[0]) # CIK must be all digits | ||
|
||
cik = segments[0] | ||
name = segments[1].trim() | ||
companies.push({cik, name}) | ||
companies | ||
|
||
module.exports = | ||
fetch: (connection, callback) -> | ||
year = "#{new Date().getFullYear() - 1}" | ||
quarterIndices = [] | ||
processQuarter = (quarter, callback) -> | ||
readMasterIndex connection, year, quarter, (error, data, year, quarter) -> | ||
if error? | ||
callback(error) | ||
else | ||
quarterIndices[quarter - 1] = data | ||
callback() | ||
|
||
operations = [] | ||
[1..4].forEach (quarter) -> | ||
operations.push (callback) -> processQuarter(quarter, callback) | ||
async.waterfall operations, (error) -> | ||
if error? | ||
callback(error) | ||
else | ||
quarterIndices.reverse() | ||
companies = parseReportingCompanies(quarterIndices, '10-K') | ||
callback(null, companies) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
path = require 'path' | ||
fs = require 'fs' | ||
async = require 'async' | ||
_ = require 'underscore' | ||
CSON = require 'season' | ||
{Connection, Companies} = require './filings' | ||
|
||
simplifyCompanyName = (name) -> | ||
name.toLowerCase().replace(/[,.]|( inc(orporated)?)|( corp(oration)?)/gi, '').trim() | ||
|
||
parseSymbolLine = (line) -> | ||
segments = line.split('"') | ||
segments = _.reject segments, (segment) -> | ||
segment = segment.trim() | ||
not segment or segment is ',' or segment is '"' | ||
symbol = segments[0]?.trim() | ||
return unless symbol | ||
return if symbol.indexOf('/') isnt -1 | ||
return if symbol.indexOf('^') isnt -1 | ||
name = segments[1]?.trim() | ||
cap = parseFloat(segments[3]) or -1 | ||
return {name, symbol, cap} if name | ||
|
||
buildSymbolIndex = (callback) -> | ||
indexCompanies = [] | ||
queue = async.queue (name, callback) -> | ||
indexPath = path.resolve(__dirname, '..', "#{name}.csv") | ||
fs.readFile indexPath, 'utf8', (error, contents) -> | ||
if error? | ||
console.error(error) | ||
callback(error) | ||
else | ||
lines = contents.split('\n') | ||
lines.shift() # First line contains information about fields | ||
for line in lines | ||
company = parseSymbolLine(line) | ||
indexCompanies.push(company) if company | ||
callback() | ||
|
||
queue.push('amex') | ||
queue.push('nasdaq') | ||
queue.push('nyse') | ||
queue.drain = -> callback(indexCompanies) | ||
|
||
Connection.open (connection) -> | ||
Companies.fetch connection, (error, companies) -> | ||
connection.close() | ||
if error? | ||
console.error(error) | ||
else | ||
companies = _.uniq companies, (company) -> company.cik | ||
buildSymbolIndex (indexCompanies) -> | ||
companiesWithSymbols = [] | ||
for company in companies | ||
for indexCompany in indexCompanies | ||
companyName = simplifyCompanyName(company.name) | ||
indexCompanyName = simplifyCompanyName(indexCompany.name) | ||
if companyName is indexCompanyName | ||
company.symbol = indexCompany.symbol | ||
company.cap = indexCompany.cap | ||
companiesWithSymbols.push(company) | ||
|
||
console.log 'Companies that filed a 10-K:', companies.length | ||
console.log 'Companies on the NASDAQ, NYSE, and AMEX:', indexCompanies.length | ||
console.log 'Companies matched to their symbol:', companiesWithSymbols.length | ||
companiesWithSymbols.sort (company1, company2) -> | ||
return -1 if company1.symbol < company2.symbol | ||
return 1 if company1.symbol > company2.symbol | ||
0 | ||
CSON.writeFile(path.join(process.cwd(), 'companies.json'), companiesWithSymbols) |