Permalink
Browse files

Initial commit

  • Loading branch information...
0 parents commit 9acfda44169cb235d289de61d175101158b5656a @petewarden committed Aug 24, 2010
Showing with 185,776 additions and 0 deletions.
  1. +9 −0 README
  2. +180 −0 cliargs.php
  3. +28,938 −0 companydata.csv
  4. +46,304 −0 companydata.txt
  5. +46,514 −0 companyurls.txt
  6. +12 −0 companyurls_test.txt
  7. +107 −0 gathercompanies.php
  8. +96 −0 gathercompanyurls.php
  9. +157 −0 parallelcurl.php
  10. +25,375 −0 places2k.txt
  11. +203 −0 scratchpad.py
  12. +33,233 −0 zcta5.txt
  13. +1,396 −0 zips_by_amount.csv
  14. +3,252 −0 zips_by_numbers.csv
@@ -0,0 +1,9 @@
+Crunchcrawl
+~~~~~~~~~~~~~~~
+
+This module lets you index and download the company information held in Crunchbase.
+*Before using, double-check http://www.crunchbase.com/robots.txt and the API conditions to ensure you're obeying the terms-of-service*
+
+It contains various scripts to index and pull down the latest data about the company, as well as a snaphot of the data as it was on Monday August 23rd 2010. This data is CC-BY, see http://www.crunchbase.com/help/licensing-policy for more information.
+
+By Pete Warden <pete@petewarden.com>, freely reusable, see http://petewarden.typepad.com for more
@@ -0,0 +1,180 @@
+<?php
+
+// A set of utility functions to make handling command line arguments in PHP easier
+// To use them, pass in an array describing the expected arguments, in the form
+//
+// array(
+// '<long name of argument>' => array(
+// 'short' => '<single letter version of argument>',
+// 'type' => <'switch' | 'optional' | 'required'>,
+// 'description' => '<help text for the argument>',
+// 'default' => '<value if this is an optional argument and it isn't specified>',
+// ),
+// ...
+// );
+//
+// If the type is switch, then the result is a boolean that will be false if it's
+// not present, or true if it is
+//
+// If the type is optional, then the result will be the default if it's not present
+//
+// If the type is required, then the script will print out the usage and exit if it's
+// not found
+//
+// To use, call cliargs_print_usage_and_exit() with the array of argument descriptions
+// The result will be an array with the argument names as keys to the found values
+//
+
+/*
+OpenHeatMap processing
+Copyright (C) 2010 Pete Warden <pete@petewarden.com>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+
+function cliargs_print_usage_and_exit($cliargs)
+{
+ print "Usage:\n";
+
+ foreach ($cliargs as $long => $arginfo)
+ {
+ $short = $arginfo['short'];
+ $type = $arginfo['type'];
+ $required = ($type=='required');
+ $optional = ($type=='optional');
+ $description = $arginfo['description'];
+
+ print "-$short/--$long ";
+
+ if ($optional||$required)
+ print "<value> ";
+
+ print ": $description";
+
+ if ($required)
+ print " (required)";
+
+ print "\n";
+ }
+
+ exit();
+}
+
+function cliargs_strstartswith($source, $prefix)
+{
+ return strncmp($source, $prefix, strlen($prefix)) == 0;
+}
+
+function cliargs_get_options($cliargs)
+{
+ global $argv;
+ global $argc;
+
+ $options = array('unnamed' => array());
+ for ($index=1; $index<$argc; $index+=1)
+ {
+ $currentarg = strtolower($argv[$index]);
+ $argparts = split('=', $currentarg);
+ $namepart = $argparts[0];
+
+ if (cliargs_strstartswith($namepart, '--'))
+ {
+ $longname = substr($namepart, 2);
+ }
+ else if (cliargs_strstartswith($namepart, '-'))
+ {
+ $shortname = substr($namepart, 1);
+ $longname = $shortname;
+ foreach ($cliargs as $name => $info)
+ {
+ if ($shortname===$info['short'])
+ {
+ $longname = $name;
+ break;
+ }
+ }
+
+ }
+ else
+ {
+ $longname = 'unnamed';
+ }
+
+ if ($longname=='unnamed')
+ {
+ $options['unnamed'][] = $namepart;
+ }
+ else
+ {
+ if (empty($cliargs[$longname]))
+ {
+ print "Unknown argument '$longname'\n";
+ cliargs_print_usage_and_exit($cliargs);
+ }
+
+ $arginfo = $cliargs[$longname];
+ $argtype = $arginfo['type'];
+ if ($argtype==='switch')
+ {
+ $value = true;
+ }
+ else if (isset($argparts[1]))
+ {
+ $value = $argparts[1];
+ }
+ else if (($index+1)<$argc)
+ {
+ $value = $argv[$index+1];
+ $index += 1;
+ }
+ else
+ {
+ print "Missing value after '$longname'\n";
+ cliargs_print_usage_and_exit($cliargs);
+ }
+
+ $options[$longname] = $value;
+ }
+ }
+
+ foreach ($cliargs as $longname => $arginfo)
+ {
+ $type = $arginfo['type'];
+
+ if (!isset($options[$longname]))
+ {
+ if ($type=='required')
+ {
+ print("Missing required value for '$longname'\n");
+ cliargs_print_usage_and_exit($cliargs);
+ }
+ else if ($type=='optional')
+ {
+ if (!isset($arginfo['default']))
+ die('Missing default value for '.$long);
+
+ $options[$longname] = $arginfo['default'];
+ }
+ else if ($type=='switch')
+ {
+ $options[$longname] = false;
+ }
+ }
+ }
+
+ return $options;
+}
+
+?>
Oops, something went wrong.

0 comments on commit 9acfda4

Please sign in to comment.