Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

can change proxy types, reads from a single file, pep 8 #1

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
# Python proxy checker
## Description
Simple multithreaded proxy checker. Takes several text files as inputs.
Simple multithreaded proxy checker. Takes a single text file as input.

## Usage
The script looks for all .txt files in some directory named "in_directory", takes all the proxies out, checks them and then puts the result to the file named "out_filename".
The input format is "ip:port" (e.g., "127.0.0.1:8080").

So, to change the input directory and the output file, you have to alter the following lines:
The script looks for all proxies in a file named "proxylist.txt", takes all the proxies out, checks them and then puts the result in a file named "checkedproxylist.txt".
The input format is "ip:port" seperated by line breaks, like:
```
127.0.0.1:8080
127.0.0.2:80
127.0.0.3:1080
```
So, to change the input file and the output file, you have to alter the following lines:
```
in_directory = './input/'
out_filename = 'output/out_filtered.txt'
filetocheck = 'proxylist.txt'
out_filename = 'checkedproxylist.txt'
```


Expand All @@ -21,10 +25,8 @@ python-proxy-checker
|-- README.md
|
|-- input
| |-- first.txt
| |-- second.txt
| |-- third.txt
| |-- proxylist.txt
|
|-- output
|-- out_filtered.txt
|-- checkedproxylist.txt
```
82 changes: 27 additions & 55 deletions proxy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Network
import urllib.request, urllib.parse, urllib.error
import urllib.request
import urllib.parse
import urllib.error
import http.cookiejar

# Concurrency
Expand All @@ -9,26 +11,26 @@

# Etc
import time
from colorama import Fore, Back, Style

# Global variables
#in_filename = 'input/3.txt'
in_directory = './input/filtered'
out_filename = 'output/out_filtered2.txt'
# in_filename = 'input/3.txt'
filetocheck = 'proxylist.txt'
out_filename = 'checkedproxylist.txt'
proxy_type = input("Enter proxy type: ")
test_url = 'http://www.google.com/humans.txt'
thread_number = 100
timeout_value = 10

ok_msg = Fore.GREEN + "OK! " + Fore.RESET
fail_msg = Fore.RED + "FAIL " + Fore.RESET
ok_msg = "OK! "
fail_msg = "FAIL "

# Stats
good_proxy_num = itertools.count()
start_time = time.time()
end_time = time.time()

# Safe print()
mylock = threading.Lock()


def sprint(*a, **b):
with mylock:
print(*a, **b)
Expand Down Expand Up @@ -58,21 +60,20 @@ def terminate(self):
self.shutdown = True



#
# Processor
#
class ProcessThread(threading.Thread):
def __init__(self, id, task_queue, out_queue):
threading.Thread.__init__(self)
self.task_queue = task_queue
self.out_queue = out_queue
self.out_queue = out_queue
self.id = id

# ...
def run(self):
while True:
task = self.task_queue.get()
task = self.task_queue.get()
result = self.process(task)

if result is not None:
Expand All @@ -81,17 +82,16 @@ def run(self):

self.task_queue.task_done()


# Do the processing job here
def process(self, task):
proxy = task
log_msg = str("Thread #%3d. Trying HTTP proxy %21s \t\t" % (self.id, proxy))
log_msg = str("Thread #%3d. Trying proxy %21s \t\t" % (self.id, proxy))

cj = http.cookiejar.CookieJar()
cj = http.cookiejar.CookieJar()
opener = urllib.request.build_opener(
urllib.request.HTTPCookieProcessor(cj),
urllib.request.HTTPRedirectHandler(),
urllib.request.ProxyHandler({ 'http' : proxy })
urllib.request.HTTPCookieProcessor(cj),
urllib.request.HTTPRedirectHandler(),
urllib.request.ProxyHandler({proxy_type: proxy})
)

try:
Expand All @@ -103,22 +103,22 @@ def process(self, task):
sprint(log_msg)
return None

log_msg += ok_msg + " Response time: %d, length=%s" % ( int((t2-t1)*1000), str(len(response)) )
log_msg += ok_msg + " Response time: %d, length=%s" % (int((t2 - t1) * 1000), str(len(response)))
sprint(log_msg)
return proxy

def terminate(self):
None
#print("Thread #%d is down..." % (self.id))
# print("Thread #%d is down..." % (self.id))


#
# Main starts here
#
# Init some stuff
input_queue = queue.Queue()
input_queue = queue.Queue()
result_queue = queue.Queue()


# Spawn worker threads
workers = []
for i in range(0, thread_number):
Expand All @@ -135,15 +135,9 @@ def terminate(self):
# Add some stuff to the input queue
start_time = time.time()

proxy_list = []
import os
for root, dirs, files in os.walk(in_directory):
for file in files:
if file.endswith(".txt"):
# read all lines from file
file_line_list = [line.rstrip('\n') for line in open(os.path.join(root, file), 'r')]
# append to proxy_list
proxy_list.extend(file_line_list)
with open(filetocheck) as proxyfile:
proxy_list = proxyfile.read().split("\n")
proxyfile.close()

for proxy in proxy_list:
input_queue.put(proxy)
Expand All @@ -158,8 +152,7 @@ def terminate(self):
input_queue.join()
result_queue.join()


#while (not input_queue.empty()):
# while (not input_queue.empty()):
# time.sleep(1)


Expand All @@ -171,38 +164,17 @@ def terminate(self):

# Print some info
good_proxy_num = float(next(good_proxy_num))
print("In: %d. Good: %d, that's %.2f%%" % (total_proxy_num, good_proxy_num, 100.0 * good_proxy_num/total_proxy_num))
print("In: %d. Good: %d, that's %.2f%%" % (total_proxy_num, good_proxy_num, 100.0 * good_proxy_num / total_proxy_num))

end_time = time.time()
print("Time elapsed: %.1f seconds." % (end_time - start_time))
print("Bye-bye!")











#############












# Read file, convert it to list of proxies.
# Add proxies to queue
# Launch N (10) threads
# When writing to the file, use lock
# When Queue is empty flash results and shutdown