Skip to content

Commit

Permalink
Merge 079aab9 into 3f01dd1
Browse files Browse the repository at this point in the history
  • Loading branch information
ninoseki committed Oct 21, 2018
2 parents 3f01dd1 + 079aab9 commit caa849a
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 126 deletions.
55 changes: 1 addition & 54 deletions lib/miteru/cli.rb
Expand Up @@ -17,64 +17,11 @@ class CLI < Thor
method_option :verbose, type: :boolean, default: true
desc "execute", "Execute the crawler"
def execute
websites = Crawler.execute(
directory_traveling: options[:directory_traveling],
size: options[:size],
threads: options[:threads],
verbose: options[:verbose]
)
websites.each do |website|
next unless website.has_kit?

message = "#{website.url}: it might contain phishing kit(s) (#{website.compressed_files.join(', ')})."
puts message.colorize(:light_red)
post_to_slack(website.message) if options[:post_to_slack] && valid_slack_setting?
download_compressed_files(website.url, website.compressed_files, options[:download_to]) if options[:auto_download]
end
Crawler.execute options.map { |k, v| [k.to_sym, v] }.to_h
end

no_commands do
def download_compressed_files(url, compressed_files, base_dir)
compressed_files.each do |path|
target_url = "#{url}/#{path}"
begin
download_file_path = HTTPClient.download(target_url, base_dir)
if duplicated?(download_file_path, base_dir)
puts "Do not download #{target_url} because there is a same hash file in the directory (SHA256: #{sha256(download_file_path)})."
FileUtils.rm download_file_path
else
puts "Download #{target_url} as #{download_file_path}"
end
rescue Down::Error => e
puts "Failed to download: #{target_url} (#{e})"
end
end
end

def sha256(path)
digest = Digest::SHA256.file(path)
digest.hexdigest
end

def duplicated?(file_path, base_dir)
base = sha256(file_path)
sha256s = Dir.glob("#{base_dir}/*.zip").map { |path| sha256(path) }
sha256s.select { |sha256| sha256 == base }.length > 1
end

def valid_slack_setting?
ENV["SLACK_WEBHOOK_URL"] != nil
end

def post_to_slack(message)
webhook_url = ENV["SLACK_WEBHOOK_URL"]
raise ArgumentError, "Please set the Slack webhook URL via SLACK_WEBHOOK_URL env" unless webhook_url

channel = ENV["SLACK_CHANNEL"] || "#general"

payload = { text: message, channel: channel }
HTTP.post(webhook_url, json: payload)
end
end
end
end
93 changes: 78 additions & 15 deletions lib/miteru/crawler.rb
Expand Up @@ -3,12 +3,14 @@
require "csv"
require "http"
require "json"
require "thread/pool"
require "parallel"
require "uri"

module Miteru
class Crawler
attr_reader :auto_download
attr_reader :directory_traveling
attr_reader :download_to
attr_reader :size
attr_reader :threads
attr_reader :verbose
Expand All @@ -17,8 +19,11 @@ class Crawler
OPENPHISH_ENDPOINT = "https://openphish.com"
PHISHTANK_ENDPOINT = "http://data.phishtank.com"

def initialize(directory_traveling: false, size: 100, threads: 10, verbose: false)
def initialize(auto_download: false, directory_traveling: false, download_to: "/tmp", post_to_slack: false, size: 100, threads: 10, verbose: false)
@auto_download = auto_download
@directory_traveling = directory_traveling
@download_to = download_to
@post_to_slack = post_to_slack
@size = size
@threads = threads
@verbose = verbose
Expand Down Expand Up @@ -69,39 +74,97 @@ def breakdown(url)

def suspicious_urls
@suspicious_urls ||= [].tap do |arr|
urls = (urlscan_feed + openphish_feed + phishtank_feed)
urls = (urlscan_feed + openphish_feed + phishtank_feed).select { |url| url.start_with?("http://", "https://") }
urls.map { |url| breakdown(url) }.flatten.uniq.sort.each { |url| arr << url }
end
end

def execute
puts "Loaded #{suspicious_urls.length} URLs to crawl." if verbose

pool = Thread.pool(threads)
websites = []
Parallel.each(suspicious_urls, in_threads: threads) do |url|
website = Website.new(url)

if website.has_kit?
message = "#{website.url}: it might contain phishing kit(s) (#{website.compressed_files.join(', ')})."
puts message.colorize(:light_red)
post_message_to_slack(website.message) if post_to_slack? && valid_slack_setting?
download_compressed_files(website.url, website.compressed_files, download_to) if auto_download?
else
puts "#{website.url}: it doesn't contain a phishing kit." if verbose
end
break
rescue StandardError => e
puts "Failed to load #{url} (#{e})" if verbose
end
websites
end

suspicious_urls.each do |url|
pool.process do
website = Website.new(url)
if website.has_kit?
websites << website
def self.execute(auto_download: false, directory_traveling: false, download_to: "/tmp", post_to_slack: false, size: 100, threads: 10, verbose: false)
new(
auto_download: auto_download,
directory_traveling: directory_traveling,
download_to: download_to,
post_to_slack: post_to_slack,
size: size,
threads: threads,
verbose: verbose
).execute
end

def download_compressed_files(url, compressed_files, base_dir)
compressed_files.each do |path|
target_url = "#{url}/#{path}"
begin
download_file_path = HTTPClient.download(target_url, base_dir)
if duplicated?(download_file_path, base_dir)
puts "Do not download #{target_url} because there is a same hash file in the directory (SHA256: #{sha256(download_file_path)})."
FileUtils.rm download_file_path
else
puts "#{website.url}: it doesn't contain a phishing kit." if verbose
website.unbuild
puts "Download #{target_url} as #{download_file_path}"
end
rescue Down::Error => e
puts "Failed to download: #{target_url} (#{e})"
end
end
pool.shutdown
end

websites
def post_to_slack(message)
webhook_url = ENV["SLACK_WEBHOOK_URL"]
raise ArgumentError, "Please set the Slack webhook URL via SLACK_WEBHOOK_URL env" unless webhook_url

channel = ENV["SLACK_CHANNEL"] || "#general"

payload = { text: message, channel: channel }
HTTP.post(webhook_url, json: payload)
end

def post_to_slack?
@post_to_slack
end

def auto_download?
@auto_download
end

def self.execute(directory_traveling: false, size: 100, threads: 10, verbose: false)
new(directory_traveling: directory_traveling, size: size, threads: threads, verbose: verbose).execute
def valid_slack_setting?
ENV["SLACK_WEBHOOK_URL"] != nil
end

private

def sha256(path)
digest = Digest::SHA256.file(path)
digest.hexdigest
end

def duplicated?(file_path, base_dir)
base = sha256(file_path)
sha256s = Dir.glob("#{base_dir}/*.zip").map { |path| sha256(path) }
sha256s.select { |sha256| sha256 == base }.length > 1
end

def get(url)
res = HTTP.follow(max_hops: 3).get(url)
raise HTTPResponseError if res.code != 200
Expand Down
2 changes: 1 addition & 1 deletion miteru.gemspec
Expand Up @@ -36,6 +36,6 @@ Gem::Specification.new do |spec|
spec.add_dependency "down", "~> 4.5"
spec.add_dependency "http", "~> 3.3"
spec.add_dependency "oga", "~> 2.15"
spec.add_dependency "parallel", "~> 1.12"
spec.add_dependency "thor", "~> 0.19"
spec.add_dependency "thread", "~> 0.2.2"
end
52 changes: 0 additions & 52 deletions spec/cli_spec.rb
@@ -1,10 +1,7 @@
# frozen_string_literal: true

RSpec.describe Miteru::CLI do
include_context "http_server"
include_context "download_compressed_files"
subject { Miteru::CLI.new }
before(:each) { ENV.delete "SLACK_WEBHOOK_URL" }

describe "#execute" do
before do
Expand All @@ -14,53 +11,4 @@
capture(:stdout) { Miteru::CLI.start %w(execute) }
end
end

describe "#download_compressed_files" do
before { WebMock.disable! }
after { WebMock.enable! }
context "when it runs once" do
it "should download a file" do
url = "http://#{host}:#{port}/has_kit"
compressed_files = ["test.zip"]
expect(Dir.glob("#{base_dir}/*.zip").empty?).to be(true)
capture(:stdout) { subject.download_compressed_files(url, compressed_files, base_dir) }
download_files = Dir.glob("#{base_dir}/*.zip")
expect(download_files.empty?).to be(false)
expect(download_files.length).to eq(1)
end
end
context "when it runs multiple times" do
it "should remove duplicated files" do
url = "http://#{host}:#{port}/has_kit"
compressed_files = ["test.zip"]
expect(Dir.glob("#{base_dir}/*.zip").empty?).to be(true)
capture(:stdout) { subject.download_compressed_files(url, compressed_files, base_dir) }
capture(:stdout) { subject.download_compressed_files(url, compressed_files, base_dir) }
capture(:stdout) { subject.download_compressed_files(url, compressed_files, base_dir) }
download_files = Dir.glob("#{base_dir}/*.zip")
expect(download_files.empty?).to be(false)
expect(download_files.length).to eq(1)
end
end
end
describe "#valid_slack_setting?" do
context "when set ENV['SLACK_WEBHOOK_URL']" do
before { ENV["SLACK_WEBHOOK_URL"] = "test" }
it "should return true" do
expect(subject.valid_slack_setting?).to be(true)
end
end
context "when not set ENV['SLACK_WEBHOOK_URL']" do
it "should return false" do
expect(subject.valid_slack_setting?).to be(false)
end
end
end
describe "#post_to_slack" do
context "when not set ENV['SLACK_WEBHOOK_URL']" do
it "should return false" do
expect { subject.post_to_slack("test") }.to raise_error(ArgumentError)
end
end
end
end
66 changes: 62 additions & 4 deletions spec/crawler_spec.rb
Expand Up @@ -2,6 +2,10 @@

RSpec.describe Miteru::Crawler, :vcr do
include_context "http_server"
include_context "download_compressed_files"

before(:each) { ENV.delete "SLACK_WEBHOOK_URL" }

subject { Miteru::Crawler }

describe "#breakdown" do
Expand Down Expand Up @@ -31,6 +35,7 @@
end
end
end

describe "#urlscan_feed" do
context "without 'size' option" do
it "should return an Array" do
Expand All @@ -54,33 +59,86 @@
end
end
end

describe "#openphish_feed" do
it "should return an Array" do
results = subject.new.openphish_feed
expect(results).to be_an(Array)
end
end

describe "#phishtank_feed" do
it "should return an Array" do
results = subject.new.phishtank_feed
expect(results).to be_an(Array)
end
end

describe "#suspicious_urls" do
it "should return an Array" do
results = subject.new.suspicious_urls
expect(results).to be_an(Array)
expect(results.length).to eq(results.uniq.length)
end
end

describe "#download_compressed_files" do
before { WebMock.disable! }
after { WebMock.enable! }
context "when it runs once" do
it "should download a file" do
url = "http://#{host}:#{port}/has_kit"
compressed_files = ["test.zip"]
expect(Dir.glob("#{base_dir}/*.zip").empty?).to be(true)
capture(:stdout) { subject.new.download_compressed_files(url, compressed_files, base_dir) }
download_files = Dir.glob("#{base_dir}/*.zip")
expect(download_files.empty?).to be(false)
expect(download_files.length).to eq(1)
end
end
context "when it runs multiple times" do
it "should remove duplicated files" do
url = "http://#{host}:#{port}/has_kit"
compressed_files = ["test.zip"]
expect(Dir.glob("#{base_dir}/*.zip").empty?).to be(true)
capture(:stdout) { subject.new.download_compressed_files(url, compressed_files, base_dir) }
capture(:stdout) { subject.new.download_compressed_files(url, compressed_files, base_dir) }
capture(:stdout) { subject.new.download_compressed_files(url, compressed_files, base_dir) }
download_files = Dir.glob("#{base_dir}/*.zip")
expect(download_files.empty?).to be(false)
expect(download_files.length).to eq(1)
end
end
end

describe "#valid_slack_setting?" do
context "when set ENV['SLACK_WEBHOOK_URL']" do
before { ENV["SLACK_WEBHOOK_URL"] = "test" }
it "should return true" do
expect(subject.new.valid_slack_setting?).to be(true)
end
end
context "when not set ENV['SLACK_WEBHOOK_URL']" do
it "should return false" do
expect(subject.new.valid_slack_setting?).to be(false)
end
end
end

describe "#post_to_slack" do
context "when not set ENV['SLACK_WEBHOOK_URL']" do
it "should return false" do
expect { subject.new.post_to_slack("test") }.to raise_error(ArgumentError)
end
end
end

describe ".execute" do
before do
allow_any_instance_of(Miteru::Crawler).to receive(:suspicious_urls).and_return(["http://#{host}:#{port}/has_kit"])
end
it "should return an Array" do
results = subject.execute
expect(results).to be_an(Array)
expect(results.length).to eq(1)
it "should not raise any error" do
capture(:stdout) { expect { subject.execute }.to_not raise_error }
end
end
end

0 comments on commit caa849a

Please sign in to comment.