From 3f2cc07cf9c7ecd8a1407619ed62461b5ec45d11 Mon Sep 17 00:00:00 2001 From: Alex Orzheshkovsky Date: Fri, 2 Dec 2016 12:21:13 +0200 Subject: [PATCH 1/9] Implement PEP8 fixes --- .idea/hide_my_python.iml | 12 ++ .idea/misc.xml | 29 ++++ .idea/modules.xml | 8 + .idea/vcs.xml | 6 + .idea/workspace.xml | 271 +++++++++++++++++++++++++++++++ arguments.py | 337 ++++++++++++++++++++------------------- connect.py | 9 +- database.py | 54 +++---- hide_my_python.py | 51 +++--- parser.py | 12 +- regex.py | 9 +- 11 files changed, 563 insertions(+), 235 deletions(-) create mode 100644 .idea/hide_my_python.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 .idea/workspace.xml diff --git a/.idea/hide_my_python.iml b/.idea/hide_my_python.iml new file mode 100644 index 0000000..6f63a63 --- /dev/null +++ b/.idea/hide_my_python.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..d4ff599 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..5476709 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000..6054fe7 --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,271 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + DEFINITION_ORDER + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1480673685514 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/arguments.py b/arguments.py index 0140890..4f2b641 100644 --- a/arguments.py +++ b/arguments.py @@ -27,177 +27,178 @@ import argparse from os import path + def create_argument_parser(): - arg_parser = argparse.ArgumentParser( - prog='hide_my_python', - description='A parser to retrieve proxies from HideMyAss!', - epilog='Go to https://hidemyass.com/proxy-list/ to see the ' - 'different available options.') - - # The user has to specify an output file - arg_parser.add_argument('-o', dest='database_file', type=str, - required=True, - help='database file where the proxies will be saved') - - # The user can specify a maximum number of proxies to retrieve - arg_parser.add_argument('-n', dest='number_of_proxies', type=int, - default=0, - help='maximum number of proxies to retrieve (default: all)') - - # The user can specify a list of countries - arg_parser.add_argument('-ct', - default=path.join(path.dirname(sys.argv[0]), 'countries_all'), - dest='countries_file', type=argparse.FileType('r'), - help='file containing the countries where the ' - 'proxies can be based (default: %(default)s)') - - # The user can specify a list of ports - arg_parser.add_argument('-p', type=int, nargs='+', dest='ports', - help='list of ports (max: 20 ports) the proxies listen on ' - '(default: every port)') - - # The user can specify a list of protocols - arg_parser.add_argument('-pr', type=str, nargs='+', - choices=['http', 'https', 'socks'], dest='protocols', - help='protocols used by the proxies ' - '(default: HTTP, HTTPS and SOCKS4/5)') - - # The user can specify the anonymity level - arg_parser.add_argument('-a', default=0, action='count', dest='anonymity', - help='flag used to determine the proxies minimum anonymity ' - 'level, e.g. -a sets the minimum anonymity level to Low, ' - '-aa to Medium, -aaa to High, etc. (default minimum level: ' - 'None)') - - arg_parser.add_argument('-ka', action='store_true', - dest='keep_alive', - help='flag used to determine if proxies with the Keep Alive ' - 'option should be returned, as they are likely honey pots ' - '(default: no)') - - # The user can specify the required speed - arg_parser.add_argument('-s', default=1, action='count', dest='speed', - help='flag used to determine the proxies minimum speed ' - 'level, e.g. -s sets the minimum speed level to Medium, ' - '-ss to Fast (default minimum level: Slow)') - - # The user can specify the connection time - arg_parser.add_argument('-c', default=1, action='count', - dest='connection_time', - help='flag used to determine the proxies minimum connection time ' - 'level, e.g. -c sets the minimum connection time level to ' - 'Medium, -cc to Fast (default minimum level: Slow)') - - arg_parser.add_argument('-v', action='store_true', dest='verbose', - help='explain what is being done') - - return arg_parser + arg_parser = argparse.ArgumentParser( + prog='hide_my_python', + description='A parser to retrieve proxies from HideMyAss!', + epilog='Go to https://hidemyass.com/proxy-list/ to see the ' + 'different available options.') + + # The user has to specify an output file + arg_parser.add_argument('-o', dest='database_file', type=str, + required=True, + help='database file where the proxies will be saved') + + # The user can specify a maximum number of proxies to retrieve + arg_parser.add_argument('-n', dest='number_of_proxies', type=int, + default=0, + help='maximum number of proxies to retrieve (default: all)') + + # The user can specify a list of countries + arg_parser.add_argument('-ct', + default=path.join(path.dirname(sys.argv[0]), 'countries_all'), + dest='countries_file', type=argparse.FileType('r'), + help='file containing the countries where the ' + 'proxies can be based (default: %(default)s)') + + # The user can specify a list of ports + arg_parser.add_argument('-p', type=int, nargs='+', dest='ports', + help='list of ports (max: 20 ports) the proxies listen on ' + '(default: every port)') + + # The user can specify a list of protocols + arg_parser.add_argument('-pr', type=str, nargs='+', + choices=['http', 'https', 'socks'], dest='protocols', + help='protocols used by the proxies ' + '(default: HTTP, HTTPS and SOCKS4/5)') + + # The user can specify the anonymity level + arg_parser.add_argument('-a', default=0, action='count', dest='anonymity', + help='flag used to determine the proxies minimum anonymity ' + 'level, e.g. -a sets the minimum anonymity level to Low, ' + '-aa to Medium, -aaa to High, etc. (default minimum level: ' + 'None)') + + arg_parser.add_argument('-ka', action='store_true', + dest='keep_alive', + help='flag used to determine if proxies with the Keep Alive ' + 'option should be returned, as they are likely honey pots ' + '(default: no)') + + # The user can specify the required speed + arg_parser.add_argument('-s', default=1, action='count', dest='speed', + help='flag used to determine the proxies minimum speed ' + 'level, e.g. -s sets the minimum speed level to Medium, ' + '-ss to Fast (default minimum level: Slow)') + + # The user can specify the connection time + arg_parser.add_argument('-c', default=1, action='count', + dest='connection_time', + help='flag used to determine the proxies minimum connection time ' + 'level, e.g. -c sets the minimum connection time level to ' + 'Medium, -cc to Fast (default minimum level: Slow)') + + arg_parser.add_argument('-v', action='store_true', dest='verbose', + help='explain what is being done') + + return arg_parser + def process_arguments(args, arg_parser): + # If the given number of proxies is negative, + # we return an error + if args.number_of_proxies < 0: + error_msg = 'argument {0}: invalid value ' \ + + '(a positive integer is required): {1}' + error_msg = error_msg.format('-n', args.number_of_proxies) + arg_parser.error(error_msg) + + # We retrieve the countries from the given file + args.countries_list = [] + for country in args.countries_file.readlines(): + country = country.rstrip() + args.countries_list.append(country) + + # If ports were specified + if args.ports: + # We delete the duplicates + args.ports = list(set(args.ports)) + # If too many ports were specified, we exit with an error + if len(args.ports) > 20: + error_msg = 'argument {0}: invalid value ' \ + + '(maximum 20 ports): {1} ports given' + error_msg = error_msg.format('-p', len(args.ports)) + arg_parser.error(error_msg) + # Otherwise, we create a comma-separated string + else: + ports_string = '' + for port in args.ports: + # If the port is in the good range, we add it + if 1 <= port and port <= 65535: + ports_string += '{0}, '.format(port) + # Otherwise, we raise an error + else: + error_msg = 'argument {0}: invalid value ' \ + + '(port must be between 1 and 65535): {1}' + error_msg = error_msg.format('-p', port) + arg_parser.error(error_msg) + # We delete the last comma + ports_string = ports_string[:-2] + args.ports = ports_string + # If no ports were specified, we do nothing + else: + args.ports = '' + + # If no protocol was specified, we consider every possible protocol + if not args.protocols: + args.protocols = ['http', 'https', 'socks'] + # Otherwise, we delete the duplicates + else: + args.protocols = list(set(args.protocols)) + + # The maximum anonymity level is 4 + if args.anonymity > 4: + args.anonymity = 4 + + # The maximum speed level is 3 + if args.speed > 3: + args.speed = 3 + + # The maximum connection time level is 3 + if args.connection_time > 3: + args.connection_time = 3 - # If the given number of proxies is negative, - # we return an error - if args.number_of_proxies < 0: - error_msg = 'argument {0}: invalid value '\ - + '(a positive integer is required): {1}' - error_msg = error_msg.format('-n', args.number_of_proxies) - arg_parser.error(error_msg) - - # We retrieve the countries from the given file - args.countries_list = [] - for country in args.countries_file.readlines(): - country = country.rstrip() - args.countries_list.append(country) - - # If ports were specified - if args.ports: - # We delete the duplicates - args.ports = list(set(args.ports)) - # If too many ports were specified, we exit with an error - if len(args.ports) > 20: - error_msg = 'argument {0}: invalid value '\ - + '(maximum 20 ports): {1} ports given' - error_msg = error_msg .format('-p', len(args.ports)) - arg_parser.error(error_msg) - # Otherwise, we create a comma-separated string - else: - ports_string = '' - for port in args.ports: - # If the port is in the good range, we add it - if 1 <= port and port <= 65535: - ports_string += '{0}, '.format(port) - # Otherwise, we raise an error - else: - error_msg = 'argument {0}: invalid value '\ - + '(port must be between 1 and 65535): {1}' - error_msg = error_msg .format('-p', port) - arg_parser.error(error_msg) - # We delete the last comma - ports_string = ports_string[:-2] - args.ports = ports_string - # If no ports were specified, we do nothing - else: - args.ports = '' - - # If no protocol was specified, we consider every possible protocol - if not args.protocols: - args.protocols = ['http', 'https', 'socks'] - # Otherwise, we delete the duplicates - else: - args.protocols = list(set(args.protocols)) - - # The maximum anonymity level is 4 - if args.anonymity > 4: - args.anonymity = 4 - - # The maximum speed level is 3 - if args.speed > 3: - args.speed = 3 - - # The maximum connection time level is 3 - if args.connection_time > 3: - args.connection_time = 3 def print_arguments(args): - # We display the number of proxies - if args.number_of_proxies > 0: - number = args.number_of_proxies - else: - number = 'all' - print('[info] number of proxies: {0}'.format(number)) - - # We display the first five countries - if len(args.countries_list) <= 5: - countries = args.countries_list - else: - countries = '{0} and {1} more' - countries = countries.format(args.countries_list[0:5], - len(args.countries_list) - 5) - print('[info] countries: {0}'.format(countries)) - - # We display the ports - if args.ports: - ports = args.ports - else: - ports = 'all' - print('[info] ports: {0}'.format(ports)) - - # We display the protocols - print('[info] protocols: {0}'.format(args.protocols)) - - # We display the anonymity levels - anonymity_levels = ['None', 'Low', 'Medium', 'High'] - if args.keep_alive: - anonymity_levels.append('High +KA') - print('[info] anonymity: {0}'.format( - anonymity_levels[args.anonymity:])) - - # We display the speed levels - speed_levels = ['Slow', 'Medium', 'High'] - print('[info] speed: {0}'.format(speed_levels[args.speed - 1:])) - - # We display the speed levels - connection_time_levels = ['Slow', 'Medium', 'High'] - print('[info] connection time: {0}'.format( - connection_time_levels[args.connection_time - 1:])) - + # We display the number of proxies + if args.number_of_proxies > 0: + number = args.number_of_proxies + else: + number = 'all' + print('[info] number of proxies: {0}'.format(number)) + + # We display the first five countries + if len(args.countries_list) <= 5: + countries = args.countries_list + else: + countries = '{0} and {1} more' + countries = countries.format(args.countries_list[0:5], + len(args.countries_list) - 5) + print('[info] countries: {0}'.format(countries)) + + # We display the ports + if args.ports: + ports = args.ports + else: + ports = 'all' + print('[info] ports: {0}'.format(ports)) + + # We display the protocols + print('[info] protocols: {0}'.format(args.protocols)) + + # We display the anonymity levels + anonymity_levels = ['None', 'Low', 'Medium', 'High'] + if args.keep_alive: + anonymity_levels.append('High +KA') + print('[info] anonymity: {0}'.format( + anonymity_levels[args.anonymity:])) + + # We display the speed levels + speed_levels = ['Slow', 'Medium', 'High'] + print('[info] speed: {0}'.format(speed_levels[args.speed - 1:])) + + # We display the speed levels + connection_time_levels = ['Slow', 'Medium', 'High'] + print('[info] connection time: {0}'.format( + connection_time_levels[args.connection_time - 1:])) diff --git a/connect.py b/connect.py index 2852b44..812d9de 100644 --- a/connect.py +++ b/connect.py @@ -25,6 +25,7 @@ import requests + def build_post_request(args): post_request = {} @@ -36,7 +37,7 @@ def build_post_request(args): post_request['p'] = args.ports # We build the protocols parameter - protocol_codes = {'http' : 0, 'https' : 1, 'socks' : 2} + protocol_codes = {'http': 0, 'https': 1, 'socks': 2} for i, protocol in enumerate(args.protocols): post_request['pr[{0}]'.format(i)] = protocol_codes[protocol] @@ -46,7 +47,7 @@ def build_post_request(args): for anonymity in range(args.anonymity, max_anonymity_level): index = anonymity - args.anonymity post_request['a[{0}]'.format(index)] = anonymity - + # We build the speed level for speed in range(args.speed, 4): index = speed - args.speed @@ -66,14 +67,14 @@ def build_post_request(args): # We return the request return post_request + def send_data(url, data=None, cookies=None, allow_redirects=True): # If we have data, we POST if data: r = requests.post(url, data=data, cookies=cookies, - allow_redirects=allow_redirects) + allow_redirects=allow_redirects) # Otherwise, we GET else: r = requests.get(url, cookies=cookies, allow_redirects=allow_redirects) return r - diff --git a/database.py b/database.py index 24e0961..71d1ad5 100644 --- a/database.py +++ b/database.py @@ -23,36 +23,36 @@ import sys import sqlite3 + def insert_in_database(cursor, proxy): + # We check if the prxoy is already in the database + cursor.execute('SELECT id FROM proxies WHERE ip=? and port=?', proxy[0:2]) - # We check if the prxoy is already in the database - cursor.execute('SELECT id FROM proxies WHERE ip=? and port=?', proxy[0:2]) + # If it is, we don't store it + if cursor.fetchone(): + return - # If it is, we don't store it - if cursor.fetchone(): - return + # Otherwise, we save it + cursor.execute('INSERT INTO proxies (ip, port, type, country, anonymity, ' + 'speed, connection_time) VALUES (?, ?, ?, ?, ?, ?, ?)', proxy) - # Otherwise, we save it - cursor.execute('INSERT INTO proxies (ip, port, type, country, anonymity, ' - 'speed, connection_time) VALUES (?, ?, ?, ?, ?, ?, ?)', proxy) def initialize_database(database_file): - # We connect to the database file - connection = sqlite3.connect(database_file) - cursor = connection.cursor() - - # We create the table where the proxies will be stored - try: - cursor.execute('CREATE TABLE proxies (id INTEGER PRIMARY KEY ' - 'AUTOINCREMENT, ip TEXT, port INTEGER, type TEXT, country TEXT, ' - 'anonymity TEXT, speed TEXT, connection_time TEXT)') - # If there's already such a table, we don't have anything to do - except sqlite3.OperationalError: - pass - # Otherwise, we save the changes - else: - connection.commit() - - # We return the connection to the database - return connection, cursor - + # We connect to the database file + connection = sqlite3.connect(database_file) + cursor = connection.cursor() + + # We create the table where the proxies will be stored + try: + cursor.execute('CREATE TABLE proxies (id INTEGER PRIMARY KEY ' + 'AUTOINCREMENT, ip TEXT, port INTEGER, type TEXT, country TEXT, ' + 'anonymity TEXT, speed TEXT, connection_time TEXT)') + # If there's already such a table, we don't have anything to do + except sqlite3.OperationalError: + pass + # Otherwise, we save the changes + else: + connection.commit() + + # We return the connection to the database + return connection, cursor diff --git a/hide_my_python.py b/hide_my_python.py index 2bdbf15..d931d5e 100755 --- a/hide_my_python.py +++ b/hide_my_python.py @@ -26,37 +26,38 @@ import parser import database + def main(): - # We create an argument parser - arg_parser = arguments.create_argument_parser() + # We create an argument parser + arg_parser = arguments.create_argument_parser() - # We parse the arguments - args = arg_parser.parse_args(sys.argv[1:]) - arguments.process_arguments(args, arg_parser) + # We parse the arguments + args = arg_parser.parse_args(sys.argv[1:]) + arguments.process_arguments(args, arg_parser) - # If the verbose mode is on, we display the arguments - if args.verbose: - arguments.print_arguments(args) + # If the verbose mode is on, we display the arguments + if args.verbose: + arguments.print_arguments(args) - # We open the database file where the proxies will be stored - connection, cursor = database.initialize_database(args.database_file) + # We open the database file where the proxies will be stored + connection, cursor = database.initialize_database(args.database_file) - try: - # We generate the proxies - for proxy in parser.generate_proxy(args): - # And we store them in the database - database.insert_in_database(cursor, proxy) - except KeyboardInterrupt: - if args.verbose: - print('') - print('[warn] received interruption signal') + try: + # We generate the proxies + for proxy in parser.generate_proxy(args): + # And we store them in the database + database.insert_in_database(cursor, proxy) + except KeyboardInterrupt: + if args.verbose: + print('') + print('[warn] received interruption signal') - # We save the changes made to the database, and close the file - connection.commit() - connection.close() + # We save the changes made to the database, and close the file + connection.commit() + connection.close() - return 0 + return 0 -if __name__ == '__main__': - main() +if __name__ == '__main__': + main() diff --git a/parser.py b/parser.py index d88ca6b..effb121 100644 --- a/parser.py +++ b/parser.py @@ -26,8 +26,8 @@ import regex import connect -def parse_ip_port(ip_port_html): +def parse_ip_port(ip_port_html): # We parse the class which won't be displayed display_none_list = regex.DISPLAY_NONE_CLASS.findall(ip_port_html) @@ -50,8 +50,8 @@ def parse_ip_port(ip_port_html): return ip, port -def parse_proxy(proxy_html): +def parse_proxy(proxy_html): # We get the chunk of code corresponding to the IP:port... ip_port_html = regex.IP_PORT_HTML.search(proxy_html).group(0) # ...and we parse it @@ -81,6 +81,7 @@ def parse_proxy(proxy_html): # We return a tuple return ip, int(port), type, country, anonymity, speed, connection_time + def generate_proxy(args): # We build the post request, using the arguments specified by the user post_request = connect.build_post_request(args) @@ -98,12 +99,12 @@ def generate_proxy(args): # When you do a search, HideMyAss! redirects you to a page. # We retrieve the result page's URL r = connect.send_data('http://proxylist.hidemyass.com/', - data=post_request, allow_redirects=False) + data=post_request, allow_redirects=False) url = 'http://proxylist.hidemyass.com{0}'.format(r.headers['Location']) # HideMyAss! checks this cookie to see if you're a legit user # (and we totally are!) - cookies = {'PHPSESSID' : r.cookies['PHPSESSID']} + cookies = {'PHPSESSID': r.cookies['PHPSESSID']} while keep_retrieving: # Even if a page doesn't exist, HideMyAss! doesn't respond @@ -128,7 +129,7 @@ def generate_proxy(args): # If a maximum number of proxies was set and we # are above this limit, we stop retrieving proxies if (not retrieve_all and - number_of_proxies > args.number_of_proxies): + number_of_proxies > args.number_of_proxies): keep_retrieving = False break # Otherwise, we generate a proxy @@ -148,4 +149,3 @@ def generate_proxy(args): # we stop retrieving proxies if not results_on_page: keep_retrieving = False - diff --git a/regex.py b/regex.py index e8eb2bb..1a701cb 100644 --- a/regex.py +++ b/regex.py @@ -28,7 +28,7 @@ # This regex corresponds to the HTML code containing the IP:port of a proxy IP_PORT_HTML = re.compile(r'