Skip to content

Commit

Permalink
add tor sessions renewal
Browse files Browse the repository at this point in the history
  • Loading branch information
sirfoga committed Dec 30, 2017
1 parent 73ebfb6 commit 83faac6
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 35 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
All notable changes to this project will be documented in this file.


### 4.7.9 - 2017-12-30

### Added
- tor sessions renewal

### 4.7.8 - 2017-12-06

### Fixed
Expand Down
3 changes: 1 addition & 2 deletions hal/internet/engines.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ def get_search_page(self, query, using_tor=False):
Get HTML source of search page of given query.
"""

query_web_page = Webpage(self.url + self.parse_query(query),
using_tor=using_tor)
query_web_page = Webpage(self.url + self.parse_query(query))
query_web_page.get_html_source(tor=using_tor) # get html source
return query_web_page.source
60 changes: 34 additions & 26 deletions hal/internet/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,15 @@

import random
import re
import socket
import time
import urllib.request
import webbrowser
from urllib.parse import urljoin

import requests
import socks
from bs4 import BeautifulSoup

import time
from stem import Signal
from stem.control import Controller

CHROME_USER_AGENT = [
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 ("
Expand Down Expand Up @@ -133,19 +132,16 @@ def is_url(candidate_url):
class Webpage(object):
""" representation of URL (web page)"""

def __init__(self, url, using_tor=False):
def __init__(self, url):
"""
:param url: string
Url of webpage
:param using_tor: bool
Whether using tor or not to fetch source page
"""

object.__init__(self)

self.url = self.parse_url(url)
self.domain = self.get_domain()
self.using_tor = using_tor

self.source = None
self.soup = None
Expand Down Expand Up @@ -195,31 +191,22 @@ def get_domain(self):
return "{uri.scheme}://{uri.netloc}/".format(
uri=urllib.request.urlparse(self.url))

def get_html_source(self, tor=False):
def get_html_source(self, to_json=False):
"""
:return: str
HTML source of webpage
"""

if tor:
try:
socks.setdefaultproxy(proxy_type=socks.PROXY_TYPE_SOCKS5,
addr="127.0.0.1", port=9050)
socket.socket = socks.socksocket
req_text = requests.get(self.url).text
except:
print(
"To be able to fetch HTML source pages via Tor the "
"following command is required:")
print("apt-get install tor && tor &")
req_text = ""
req = urllib.request.Request(self.url)
req.add_header("user-agent", random.choice(CHROME_USER_AGENT))
raw_result = urllib.request.urlopen(req).read()

if to_json:
self.source = raw_result.json()
else:
req = urllib.request.Request(self.url)
req.add_header("user-agent", random.choice(CHROME_USER_AGENT))
req_text = urllib.request.urlopen(req).read()
self.source = raw_result.text
self.soup = BeautifulSoup(self.source, "lxml")

self.source = str(req_text)
self.soup = BeautifulSoup(self.source, "lxml")
return self.source

def get_links(self, recall, timeout):
Expand Down Expand Up @@ -293,3 +280,24 @@ def download_to_file(url, local_file, headers=APP_VALID_HEADERS, cookies=None,
for chunk in req.iter_content(chunk_size):
if chunk:
local_download.write(chunk)


def get_tor_session():
session = requests.session()
# Tor uses the 9050 port as the default socks port
session.proxies = {
"http": "socks5://127.0.0.1:9050",
"https": "socks5://127.0.0.1:9050"
}
return session


def renew_connection(password):
"""
:return: void
signal TOR for a new connection
"""

with Controller.from_port(port=9051) as controller:
controller.authenticate(password=password)
controller.signal(Signal.NEWNYM)
10 changes: 5 additions & 5 deletions hal/wrappers/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@
import colorama


def handle_exceptions(function):
def handle_exceptions(func):
"""
:param function: callback function
:param func: callback function
function to wrap
:return: callback function return type
wraps callback function
"""

@functools.wraps(function)
@functools.wraps(func)
def _handle_exceptions(*args, **kwargs):
"""
:param args: *
Expand All @@ -43,7 +43,7 @@ def _handle_exceptions(*args, **kwargs):
handle exception of callback function
"""

function_name = function.__name__
function_name = func.__name__
exception_string = \
"name: " + function_name + "\n" + \
"*args: " + str(args) + "\n" + \
Expand All @@ -52,7 +52,7 @@ def _handle_exceptions(*args, **kwargs):
colorama.init() # start color mode

try:
return function(*args, **kwargs)
return func(*args, **kwargs)
except KeyboardInterrupt:
print(
colorama.Fore.RED + colorama.Style.BRIGHT + "\r[!] User "
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@

setup(
name="PyHal",
version="4.7.8",
version="4.7.9",
author="sirfoga",
author_email="sirfoga@protonmail.com",
description="A multipurpose library to perform great stuff in the most "
Expand Down Expand Up @@ -73,7 +73,7 @@
"sklearn",
"statsmodels",
"httplib2",
"oauth2client"
"oauth2client", 'stem'
],
test_suite="tests"
)

0 comments on commit 83faac6

Please sign in to comment.