Skip to content

Commit

Permalink
Merge branch 'proxy'
Browse files Browse the repository at this point in the history
  • Loading branch information
mgleon08 committed Dec 18, 2018
2 parents f1ad0e8 + a4b928a commit 06dee45
Show file tree
Hide file tree
Showing 10 changed files with 34 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Gemfile.lock
@@ -1,7 +1,7 @@
PATH
remote: .
specs:
instagram-crawler (0.1.1)
instagram-crawler (0.2.0)
colorize (~> 0.8)
http (~> 4.0)
nokogiri (~> 1.8)
Expand Down
10 changes: 10 additions & 0 deletions README.md
Expand Up @@ -56,6 +56,14 @@ instagram-crawler -u <user_name> -d -a 20181120
instagram-crawler -u <user_name> -l
```

### Proxy

`-P || --proxyname ` `-p || --port`

```ruby
instagram-crawler -u <user_name> -P http://example.com -p 1234
```

### Help

`instagram-crawler -h | --help`
Expand All @@ -70,6 +78,8 @@ options:
-d, --download Download files
-a, --after DATE Download files after this date (YYYYMMDD)
-l, --log Generate a log file in the current directory
-P, --proxyname PROXYNAME Specify proxyname of your proxy server
-p, --port PORT Specify port of your proxy server (default port: 8080)
-v, --version Show the instagram-crawler version
-h, --help Show this message
```
Expand Down
3 changes: 1 addition & 2 deletions bin/instagram-crawler
@@ -1,9 +1,8 @@
#!/usr/bin/env ruby
require_relative '../lib/instagram_crawler'

begin
raise InstagramCrawler::Errors::EnvError if ENV["sessionid"].nil?
args = InstagramCrawler::Parser::Args.new(ARGV)
raise InstagramCrawler::Errors::EnvError if ENV["sessionid"].nil?
InstagramCrawler::Logger.setting(args.log)
InstagramCrawler::Main.run
rescue => e
Expand Down
7 changes: 6 additions & 1 deletion lib/instagram_crawler/config.rb
Expand Up @@ -4,7 +4,8 @@ class Config
class << self
attr_reader :default_url, :user_name, :base_url, :base_path,
:log_path, :after_date, :parse_date
attr_accessor :download
attr_accessor :download, :proxyname
attr_writer :port

def user_name=(user_name)
@user_name = user_name
Expand All @@ -17,6 +18,10 @@ def after_date=(after_date)
@after_date = after_date
@parse_date = Time.parse(after_date).to_i
end

def port
@port ? @port.to_i : 8080
end
end
end
end
3 changes: 2 additions & 1 deletion lib/instagram_crawler/file.rb
Expand Up @@ -25,7 +25,8 @@ def download(url, dir_name, file_name)
private

def get_binary_data(url)
res = HTTP.get(url)
res = Config.proxyname ?
HTTP.via(Config.proxyname, Config.port).get(url) : HTTP.get(url)
raise Errors::HttpError, "#{res.code} #{res.reason}" if res.code != 200
res.to_s
end
Expand Down
2 changes: 2 additions & 0 deletions lib/instagram_crawler/parser/args.rb
Expand Up @@ -20,6 +20,8 @@ def parse_args
opts.on('-d', '--download', 'Download files') { |download| Config.download = true }
opts.on('-a', '--after DATE', 'Download files after this date (YYYYMMDD)') { |after_date| Config.after_date = after_date }
opts.on('-l', '--log', 'Generate a log file in the current directory') { self.log = true }
opts.on('-P', '--proxyname PROXYNAME', 'Specify proxyname of your proxy server') { |proxyname| Config.proxyname = proxyname }
opts.on('-p', '--port PORT', 'Specify port of your proxy server (default port: 8080)') { |port| Config.port = port }
opts.on('-v', '--version', 'Show the instagram-crawler version') { puts("instagram-crawler #{InstagramCrawler::VERSION}"); exit }
opts.on('-h', '--help', 'Show this message') { puts(opts); exit }
opts.parse!(@args)
Expand Down
3 changes: 2 additions & 1 deletion lib/instagram_crawler/parser/html.rb
Expand Up @@ -70,7 +70,8 @@ def loop_edges(edges)
end

def get_html(url)
res = HTTP.get(url)
res = Config.proxyname ?
HTTP.via(Config.proxyname, Config.port).get(url) : HTTP.get(url)
raise Errors::HttpError, "#{res.code} #{res.reason}" if res.code != 200
res.to_s
end
Expand Down
4 changes: 3 additions & 1 deletion lib/instagram_crawler/parser/json.rb
Expand Up @@ -47,7 +47,9 @@ def loop_edges(edges)
end

def get_json(url)
res = HTTP.cookies(sessionid: ENV["sessionid"]).get(url)
http = HTTP.cookies(sessionid: ENV["sessionid"])
res = Config.proxyname ?
http.via(Config.proxyname, Config.port).get(url) : http.get(url)
raise Errors::HttpError, "#{res.code} #{res.reason}" if res.code != 200
res.to_s
end
Expand Down
2 changes: 1 addition & 1 deletion lib/instagram_crawler/version.rb
@@ -1,3 +1,3 @@
module InstagramCrawler
VERSION = "0.1.1".freeze
VERSION = "0.2.0".freeze
end
8 changes: 6 additions & 2 deletions spec/instagram_crawler/parser/args_spec.rb
Expand Up @@ -4,22 +4,26 @@

context 'when type arguments' do
it 'should get right with short args' do
parsms = ['-u', 'marvel', '-a', '20180101', '-d', '-l']
parsms = ['-u', 'marvel', '-a', '20180101', '-d', '-l', '-P', 'http://example.com', '-p', '1234']

arg = args.new(parsms)
expect(config.user_name).to eq 'marvel'
expect(config.download).to eq true
expect(config.after_date).to eq '20180101'
expect(arg.log).to eq true
expect(config.proxyname).to eq 'http://example.com'
expect(config.port).to eq 1234
end

it 'should get right with long args' do
parsms = ['--username', 'marvel', '--after', '20180101', '--download', '--log']
parsms = ['--username', 'marvel', '--after', '20180101', '--download', '--log', '--proxyname', 'http://example.com', '--port', '1234' ]
arg = args.new(parsms)
expect(config.user_name).to eq 'marvel'
expect(config.download).to eq true
expect(config.after_date).to eq '20180101'
expect(arg.log).to eq true
expect(config.proxyname).to eq 'http://example.com'
expect(config.port).to eq 1234
end
end
end

0 comments on commit 06dee45

Please sign in to comment.