Permalink
Browse files

Added ruby 1.8.7 support, by using FasterCSV if prior to 1.9

  • Loading branch information...
jstray committed Apr 6, 2012
1 parent 50e465a commit a2a95d21f71edb57832305103d109c5503c2b80f
View
@@ -14,9 +14,9 @@ ruby -I $RUBYDIR $RUBYDIR/docs-to-terms.rb $1.csv $1-bigrams.csv $1-terms.csv
# Also writes out a list of indices for each term. These are used later to create feature names for Snappy.
ruby -I $RUBYDIR $RUBYDIR/terms-to-vec.rb $1-terms.csv $1-terms.vec $1-termlist.csv
-# Process termlist to create feature names for Snappy
+# Process termlist to create feature names for Overview
ruby -I $RUBYDIR $RUBYDIR/make-featurenames.rb $1-termlist.csv $1-featurenames.csv
-# Finally, extract URLs for Snappy
+# Finally, extract URLs/document text for the Overview doc viewer window
ruby -I $RUBYDIR $RUBYDIR/make-urls.rb $1.csv $1-urls.csv
@@ -3,7 +3,14 @@
# Looks for text in "text" column, and optional unique ID in "id" column.
# Assigns sequential UIDs if missing
-require 'csv'
+if RUBY_VERSION < "1.9"
+ require "rubygems"
+ require "faster_csv"
+ CSV = FCSV
+else
+ require "csv"
+end
+
#require 'stemmer'
require 'tf-idf_csv.rb'
require 'lex.rb'
@@ -37,7 +44,6 @@
# Read each row of the input file, parse the text field into terms, add the doc to the TFIDF database
tfidf = Tf_Idf_CSV.new
-csv_out = CSV.open(ARGV[1],"w")
CSV.foreach(ARGV[0], :headers=>true) do |row|
@@ -8,8 +8,14 @@
# - split on spaces
# - strip punctuation
+if RUBY_VERSION < "1.9"
+ require "rubygems"
+ require "faster_csv"
+ CSV = FCSV
+else
+ require "csv"
+end
-require 'csv'
require 'lex.rb'
# algorithm constants
View
@@ -1,5 +1,4 @@
require 'set'
-require 'csv'
#require 'stemmer'
#class String
@@ -1,6 +1,12 @@
# simple little file that sorts on first col, then outputs only second
-require 'csv'
+if RUBY_VERSION < "1.9"
+ require "rubygems"
+ require "faster_csv"
+ CSV = FCSV
+else
+ require "csv"
+end
# Usage. Can specify files (and a limit on rows) but not
if ARGV.length < 2
@@ -5,7 +5,13 @@
# Overview prototype
# Jonathan Stray, Feb 2012
-require 'csv'
+if RUBY_VERSION < "1.9"
+ require "rubygems"
+ require "faster_csv"
+ CSV = FCSV
+else
+ require "csv"
+end
# cheap and effective check for HTML formatting
def IsHTML(text)
@@ -4,7 +4,13 @@
# Jonathan Stray, December 2010 - December 2011
-require 'csv'
+if RUBY_VERSION < "1.9"
+ require "rubygems"
+ require "faster_csv"
+ CSV = FCSV
+else
+ require "csv"
+end
# Write document vector
# vector is a hash of term ID -> tfidf pairs, these need to be sorted, normalized, and formatted to file
@@ -1,4 +1,3 @@
-require 'csv'
require 'logger'
def count_if_gte(a,b)

0 comments on commit a2a95d2

Please sign in to comment.