Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

syntacly correct json format

  • Loading branch information...
commit 048d957bd1cf31abab721d7077217426655e5e69 1 parent 972bd2e
@pawelkl authored
Showing with 103 additions and 27 deletions.
  1. +5 −4 AccessDb.rb
  2. +11 −7 Downloader.rbw
  3. +14 −16 Etl.rb
  4. +73 −0 MetaImport.rb
View
9 AccessDb.rb
@@ -1,6 +1,7 @@
# require 'rubygems'
require 'mongo'
require 'json'
+require 'active_support'
class AccessDb
def initialize dbname, collection #, username, password
@@ -16,8 +17,8 @@ def upsert_by_id id, json
end
def upsert_by_meta json
- # print json
- @coll.update({ "hash_md5" => json[:hash_md5] }, json, :upsert => true)
+ print json
+ @coll.update({ :hash_md5 => json["hash_md5"] }, json, :upsert => true)
end
def remove json
@@ -70,7 +71,7 @@ def teardown
def test_read
json = {:hash_md5 => :sara}
- id = @coll.upsert_by_meta json
+ @coll.upsert_by_meta json
# puts id
find = @coll.find json
# assert_equal(json[:hash_md5], find.except!("_id")["hash_md5"])
@@ -81,7 +82,7 @@ def test_read
def test_read_with_hash
json = {:hash_md5 => :lolcats}
- id = @coll.upsert_by_meta json
+ @coll.upsert_by_meta json
find = @coll.find json
# json[:_id] = id
assert_equal json, find, "retrieved json file isn't exactly the same as input json"
View
18 Downloader.rbw
@@ -2,7 +2,8 @@
# require 'rubygems'
require 'curb'
require 'yaml'
-require 'json'
+# require 'json'
+require 'json/pure'
require 'digest/md5'
load 'D:\Dropbox\#code\PracaInz-Program\MovieHasher.rb' # load 'moviehasher.rb'
load 'D:\Dropbox\#code\PracaInz-Program\AccessDb.rb'
@@ -10,6 +11,7 @@ load 'D:\Dropbox\#code\PracaInz-Program\AccessDb.rb'
# include Wx
require 'optparse'
require 'pp'
+# require 'active_support'
STDOUT.sync = true; exit_requested = false; Kernel.trap( "INT" ) { exit_requested = true }
@@ -82,7 +84,7 @@ if __FILE__ == $0; pp "Options:", options; pp "ARGV:", ARGV end
=end
class Downloader
- def initialize(directory)
+ def initialize(directory,db,collection)
@PASS=nil
@COOKIE=nil
@filename=nil
@@ -91,7 +93,7 @@ class Downloader
File.exists? @target_dir # File.directory? @target_dir
@c = Curl::Easy.new
curl_setup
- @mongo = AccessDb.new "meta","meta"
+ @mongo = AccessDb.new db, collection
end
def curl_setup
@@ -117,6 +119,7 @@ class Downloader
def parse_link_info(url)
json = {}
+ # json = ActiveSupport::HashWithIndifferentAccess.new
json[:link_requested] = url
if @c.last_effective_url != url
@@ -140,7 +143,7 @@ class Downloader
json[:hash_md5] = Digest::MD5.hexdigest(File.read(@save_location))
# puts JSON.pretty_generate(json)
- json
+ JSON.generate json
end
def add_links(url_array,cred=nil,ref=nil,cookie=nil)
@@ -155,10 +158,11 @@ class Downloader
json = parse_link_info single_url
# puts json
- id = @mongo.upsert_by_meta json
+ # id =
+ @mongo.upsert_by_meta json
# puts id
# json["_id"] = id
- puts @save_location
+ # puts @save_location
File.open(@save_location + :":meta.json".to_s,"w").write json
end
end
@@ -166,7 +170,7 @@ end
if __FILE__ == $0; options[:destination] = 'c:/temp' end
-manager = Downloader.new options[:destination]
+manager = Downloader.new options[:destination],"meta","meta"
if ARGV.nil?
manager.add_link(options[:url])
View
30 Etl.rb
@@ -28,22 +28,20 @@
pp "ARGV:", ARGV
end
-# search_dir = options[:directory].to_s + '\**\*'
-# puts search_dir
-
-# Dir[options[:directory]].each{|file|
-# # do something
-# # sprawdzenie czy plik posiada metadane
-# if
-# then
- # end
-# }
-
-Find.find(options[:directory]) do |f|
- target = f + :":meta.json".to_s
- if File.exists? target; pp target end
- # puts target
- puts File.open(target,"r").read
+class MetaImport
+ attr_reader
+ def initalize
+ end
+ def import
+ Find.find(options[:directory]) do |f|
+ target = f + :":meta.json".to_s
+ if File.exists? target; pp target end
+ # puts target
+ json = File.open(target).read
+ puts JSON.pretty_generate(json)
+ json
+ end
+ end
end
=begin
View
73 MetaImport.rb
@@ -0,0 +1,73 @@
+require 'optparse'
+require 'pp'
+require 'find'
+require 'json'
+
+STDOUT.sync = true; exit_requested = false; Kernel.trap( "INT" ) { exit_requested = true }
+
+options = {}
+
+optparse = OptionParser.new do|opts|
+ opts.banner = "Usage: Downloader.rbw [options] url1 url2 ..."
+
+ opts.on( '-h', '--help', 'Display this screen' ) do
+ puts opts
+ exit
+ end
+
+ options[:directory] = []
+ opts.on( '-d', '--directory dir', "List of urls" ) do |u|
+ options[:directory] = u
+ end
+end
+
+optparse.parse!
+
+
+
+class MetaImport
+ attr_reader :path
+ def initialize(path)
+ @path = path
+ end
+ # def ==()
+ # end
+ def import
+ Find.find(@path) do |f|
+ target = f + :":meta.json".to_s
+ if File.exists? target; pp target end
+ # puts target
+ data = File.open(target).read
+ puts JSON.parse(data)
+ # puts JSON.pretty_generate(json)
+ # puts json[:link_filename_requested]
+ json
+ end
+ end
+end
+
+if __FILE__ == $0
+ options[:directory] = 'c:/temp'
+ pp "Options:", options
+ pp "ARGV:", ARGV
+end
+
+MetaImport.new(options[:directory]).import
+
+=begin
+if __FILE__ == $0
+ require 'test/unit'
+ require 'active_support/core_ext/hash'
+
+ class EtlTest < Test::Unit::TestCase
+ def setup
+ @coll = AccessDb.new "meta","meta"
+ end
+
+ def teardown
+ @coll.remove({:hash_md5 => [:sara,:ania]})
+ end
+
+ end
+end
+=end
Please sign in to comment.
Something went wrong with that request. Please try again.