Skip to content

Commit

Permalink
Merge pull request #739 from rix1337/dev
Browse files Browse the repository at this point in the history
v.18.0.5
  • Loading branch information
rix1337 committed Apr 17, 2023
2 parents 95b4663 + 863f13d commit 0fd8a9e
Show file tree
Hide file tree
Showing 18 changed files with 109 additions and 83 deletions.
8 changes: 6 additions & 2 deletions .github/Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,20 @@

### Changelog FeedCrawler:

- **18.0.5** Verwende Locks, um die das geteilte manager.dict() bei parallelen Zugriffen sicher zu verändern (#738).
- **18.0.5** Werte, die verschlüsselt sein sollen, aber es noch nicht sind, werden nun beim Lesen verschlüsselt.
- **18.0.5** Weitere Wording-Verbesserungen im Web-Interface.
- **18.0.4** Hostnamen, Passwörter, Hashes und API-Keys werden nun verschlüsselt in der `FeedCrawler.ini` gespeichert
- Dadurch kann die `FeedCrawler.ini` nun öffentlich geteilt werden
- Ab sofort müssen Fehlermeldungen immer die komplette `FeedCrawler.ini` enthalten.
- Da Key und IV des Verschlüsselungsverfahrens in der `FeedCrawler.db` gespeichert werden, sollte diese Datei nicht
gemeinsam mit der `FeedCrawler.ini` geteilt werden.
- **18.0.3** Verbessertes Wording
- Die Titel der Suchlisten im Web-Interface beschreiben nun besser, welchen Inhalt sie berücksichtigen.
- Die Titel der Listen für die Feed-Suche im Web-Interface beschreiben nun besser, welchen Inhalt sie berücksichtigen.
- Folgen werden nun als "Folge" und nicht mehr als "Episode" bezeichnet.
- **18.0.3** Verbesserte Zuordnung von Feed-Einträgen zur Downloadseite bei SF.
- **18.0.3** Erzwinge erfolgreiche Verbindung zum JDownloader bei jedem Aufruf über My JDownloader (#735).
- **18.0.3** Fehlerbehebung im Abruf der Serien-API (#732, Danke @9Mad-Max5)
- **18.0.3** Erzwinge erfolgreiche Verbindung zum JDownloader bei jedem Aufruf über My JDownloader (#735, Danke @jankete)
- **18.0.2** Die Konsole der Desktop-GUI behält aus Performancegründen nur die letzten 999 ausgegebenen Zeilen.
- **18.0.2** Nutze ausschließlich manager.dict() um Objekte zwischen Prozessen zu teilen.
- **18.0.2** SF/FF werden nicht mehr als dauerhaft von Cloudflare blockierte Seiten behandelt.
Expand Down
17 changes: 10 additions & 7 deletions feedcrawler/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
def start_feedcrawler():
with multiprocessing.Manager() as manager:
shared_state_dict = manager.dict()
shared_state.set_shared_dict(shared_state_dict)
shared_state_lock = manager.Lock()
shared_state.set_state(shared_state_dict, shared_state_lock)

if gui.enabled:
window = gui.create_main_window()
Expand Down Expand Up @@ -74,7 +75,7 @@ def start_feedcrawler():
log_level = logging.__dict__[
arguments.log_level] if arguments.log_level in logging.__dict__ else logging.INFO

shared_state.set_log_level(log_level)
shared_state.update("log_level", log_level)
shared_state.set_logger()

hostnames = CrawlerConfig('Hostnames')
Expand Down Expand Up @@ -192,7 +193,7 @@ def clean_up_hostname(host, string):
else:
FeedDb('cdc').reset()

process_web_server = multiprocessing.Process(target=web_server, args=(shared_state_dict,))
process_web_server = multiprocessing.Process(target=web_server, args=(shared_state_dict, shared_state_lock,))
process_web_server.start()

if arguments.delay:
Expand All @@ -201,14 +202,16 @@ def clean_up_hostname(host, string):
time.sleep(delay)

if not shared_state.values["test_run"]:
process_feed_crawler = multiprocessing.Process(target=feed_crawler, args=(shared_state_dict,))
process_feed_crawler = multiprocessing.Process(target=feed_crawler,
args=(shared_state_dict, shared_state_lock,))
process_feed_crawler.start()

process_watch_packages = multiprocessing.Process(target=watch_packages, args=(shared_state_dict,))
process_watch_packages = multiprocessing.Process(target=watch_packages,
args=(shared_state_dict, shared_state_lock,))
process_watch_packages.start()

if not arguments.docker and gui.enabled:
gui.main_gui(window, shared_state_dict)
gui.main_gui(window, shared_state_dict, shared_state_lock)

sys.stdout = sys.__stdout__
process_web_server.terminate()
Expand All @@ -232,7 +235,7 @@ def signal_handler(sig, frame):
while True:
time.sleep(1)
else:
feed_crawler(shared_state_dict)
feed_crawler(shared_state_dict, shared_state_lock)
process_web_server.terminate()
sys.exit(0)

Expand Down
2 changes: 1 addition & 1 deletion feedcrawler/external_sites/feed_search/content_shows.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def periodical_task(self):
else:
if self._SITE == "SF" and not shared_state.values["sf_blocked"]:
print("SF hat den Feed-Anruf während der Feed-Suche blockiert.")
shared_state.values["sf_blocked"] = True
shared_state.update("sf_blocked", True)
else:
shared_state.logger.debug("Feed ist leer - breche die Suche für diesen Feed ab!")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def ww_post_url_headers(url, headers=False):
response["text"]:
if not shared_state.values["ww_blocked"]:
print("WW hat den Feed-Anruf während der Feed-Suche blockiert.")
shared_state.values["ww_blocked"] = True
shared_state.update("ww_blocked", True)
return ""
return response
except:
Expand All @@ -121,7 +121,7 @@ def ww_get_download_links(self, content, title):
if not shared_state.values["ww_blocked"]:
print(
"WW hat den Link-Abruf für " + title + " blockiert. Eine spätere Anfrage hat möglicherweise Erfolg!")
shared_state.values["ww_blocked"] = True
shared_state.update("ww_blocked", True)
return False
links = BeautifulSoup(response, "html.parser").findAll("div", {"id": "download-links"})
for link in links:
Expand Down
6 changes: 3 additions & 3 deletions feedcrawler/jobs/feed_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,13 @@ def search_pool():
]


def feed_crawler(shared_state_dict):
def feed_crawler(shared_state_dict, shared_state_lock):
if gui.enabled:
sys.stdout = gui.AppendToPrintQueue(shared_state_dict)
sys.stdout = gui.AppendToPrintQueue(shared_state_dict, shared_state_lock)
else:
sys.stdout = Unbuffered(sys.stdout)

shared_state.set_shared_dict(shared_state_dict)
shared_state.set_state(shared_state_dict, shared_state_lock)
shared_state.set_logger()
logger = shared_state.logger

Expand Down
6 changes: 3 additions & 3 deletions feedcrawler/jobs/package_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@
from feedcrawler.providers.sqlite_database import FeedDb


def watch_packages(shared_state_dict):
def watch_packages(shared_state_dict, shared_state_lock):
if gui.enabled:
sys.stdout = gui.AppendToPrintQueue(shared_state_dict)
sys.stdout = gui.AppendToPrintQueue(shared_state_dict, shared_state_lock)
else:
sys.stdout = Unbuffered(sys.stdout)

shared_state.set_shared_dict(shared_state_dict)
shared_state.set_state(shared_state_dict, shared_state_lock)
shared_state.set_logger()

crawljobs = CrawlerConfig('Crawljobs')
Expand Down
6 changes: 4 additions & 2 deletions feedcrawler/providers/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
class CrawlerConfig(object):
_DEFAULT_CONFIG = {
'FeedCrawler': [
("auth_user", "str", ""),
("auth_user", "secret", ""),
("auth_hash", "secret", ""),
("myjd_user", "str", ""),
("myjd_user", "secret", ""),
("myjd_pass", "secret", ""),
("myjd_device", "str", ""),
("myjd_auto_update", "bool", "False"),
Expand Down Expand Up @@ -200,6 +200,8 @@ def _get_from_config(self, scope, key):
final_payload = "".join(filter(lambda c: c in string.printable, decrypted_payload))
return final_payload
else: ## Loaded value is not encrypted, return as is
if len(value) > 0:
self.save(key, value)
return value
elif [param for param in self._DEFAULT_CONFIG[self._section] if param[0] == key and param[1] == 'bool']:
return True if len(res) and res[0].strip('\'"').lower() == 'true' else False
Expand Down
27 changes: 19 additions & 8 deletions feedcrawler/providers/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def create_main_window():


@check_gui_enabled
def main_gui(window, shared_state_dict):
def main_gui(window, shared_state_dict, shared_state_lock):
if not window:
print("GUI-Fenster falsch initialisiert.")
window = create_main_window()
Expand All @@ -100,7 +100,7 @@ def main_gui(window, shared_state_dict):
while True:
event, values = window.read(timeout=500)

print_from_queue(shared_state_dict)
print_from_queue(shared_state_dict, shared_state_lock)

if event == tray.key:
event = values[event] # use the System Tray's event as if was from the window
Expand Down Expand Up @@ -319,26 +319,37 @@ def flush(self):
pass


def update_shared_dict_with_lock(shared_dict, shared_lock, key, value):
shared_lock.acquire()
try:
shared_dict[key] = value
finally:
shared_lock.release()


class AppendToPrintQueue(object):
def __init__(self, shared_state_dict):
def __init__(self, shared_state_dict, shared_state_lock):
self.shared_state_dict = shared_state_dict
self.shared_state_lock = shared_state_lock
try:
self.shared_state_dict["print_queue"]
except KeyError:
self.shared_state_dict["print_queue"] = ''
update_shared_dict_with_lock(self.shared_state_dict, self.shared_state_lock, "print_queue", '')

def write(self, s):
self.shared_state_dict["print_queue"] += s
update_shared_dict_with_lock(self.shared_state_dict, self.shared_state_lock, "print_queue",
self.shared_state_dict["print_queue"] + s)

def flush(self):
self.shared_state_dict["print_queue"] += ''
update_shared_dict_with_lock(self.shared_state_dict, self.shared_state_lock, "print_queue",
self.shared_state_dict["print_queue"] + '')


def print_from_queue(shared_state_dict):
def print_from_queue(shared_state_dict, shared_state_lock):
try:
output = shared_state_dict["print_queue"]
if len(output) > 0:
print(output)
shared_state_dict["print_queue"] = ''
update_shared_dict_with_lock(shared_state_dict, shared_state_lock, "print_queue", '')
except KeyError:
pass
6 changes: 3 additions & 3 deletions feedcrawler/providers/http_requests/cache_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@ def cache_returned_values(*args, **kwargs):
cached_response = None
if cached_response:
try:
shared_state.values["request_cache_hits"] += 1
shared_state.update("request_cache_hits", shared_state.values["request_cache_hits"] + 1)
except KeyError:
shared_state.values["request_cache_hits"] = 1
shared_state.update("request_cache_hits", 1)
return cached_response
else:
#
value = func(*args, **kwargs)
if caching_allowed:
shared_state.values["request_" + function_arguments_hash] = value
shared_state.update("request_" + function_arguments_hash, value)
return value

return cache_returned_values
Expand Down
75 changes: 40 additions & 35 deletions feedcrawler/providers/shared_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,35 +14,41 @@
from feedcrawler.providers.config import CrawlerConfig

values = {}
lock = None
logger = None


def set_shared_dict(manager_dict):
def set_state(manager_dict, manager_lock):
global values
global lock
values = manager_dict
lock = manager_lock


def set_initial_values(test_run, remove_cloudflare_time):
def update(key, value):
global values
values["test_run"] = test_run
values["remove_cloudflare_time"] = remove_cloudflare_time
values["ww_blocked"] = False
values["sf_blocked"] = False
values["user_agent"] = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) ' \
'Chrome/111.0.0.0 Safari/537.36'
global lock
lock.acquire()
try:
values[key] = value
finally:
lock.release()


def set_files(configpath):
global values
values["configfile"] = os.path.join(configpath, "FeedCrawler.ini")
values["dbfile"] = os.path.join(configpath, "FeedCrawler.db")
values["log_file"] = os.path.join(configpath, 'FeedCrawler.log')
values["log_file_debug"] = os.path.join(configpath, 'FeedCrawler_DEBUG.log')
def set_initial_values(test_run, remove_cloudflare_time):
update("test_run", test_run)
update("remove_cloudflare_time", remove_cloudflare_time)
update("ww_blocked", False)
update("sf_blocked", False)
update("user_agent", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) ' \
'Chrome/111.0.0.0 Safari/537.36')


def set_log_level(log_level):
global values
values["log_level"] = log_level
def set_files(config_path):
update("configfile", os.path.join(config_path, "FeedCrawler.ini"))
update("dbfile", os.path.join(config_path, "FeedCrawler.db"))
update("log_file", os.path.join(config_path, 'FeedCrawler.log'))
update("log_file_debug", os.path.join(config_path, 'FeedCrawler_DEBUG.log'))


def set_logger():
Expand Down Expand Up @@ -74,13 +80,11 @@ def set_logger():


def set_sites():
global values
values["sites"] = ["FX", "SF", "DW", "HW", "FF", "BY", "NK", "NX", "WW", "SJ", "DJ", "DD"]
update("sites", ["FX", "SF", "DW", "HW", "FF", "BY", "NK", "NX", "WW", "SJ", "DJ", "DD"])


def set_device(new_device):
global values
values["device"] = new_device
update("device", new_device)


def check_device(device):
Expand All @@ -92,8 +96,6 @@ def check_device(device):


def connect_device():
global values

device = False
conf = CrawlerConfig('FeedCrawler')
myjd_user = str(conf.get('myjd_user'))
Expand All @@ -112,14 +114,13 @@ def connect_device():
pass

if check_device(device):
values["device"] = device
update("device", device)
return True
else:
return False


def get_device():
global values
attempts = 0

while True:
Expand All @@ -130,7 +131,7 @@ def get_device():
pass
attempts += 1

values["device"] = False
update("device", False)

if attempts % 10 == 0:
print(
Expand All @@ -144,16 +145,20 @@ def get_device():


def set_connection_info(local_address, port, prefix, docker):
global values
values["local_address"] = local_address
values["port"] = port
values["prefix"] = prefix
values["docker"] = docker
update("local_address", local_address)
update("port", port)
update("prefix", prefix)
update("docker", docker)


def clear_request_cache():
global values
for key in list(values.keys()):
if key.startswith('request_'):
values.pop(key)
values["request_cache_hits"] = 0
global lock
lock.acquire()
try:
for key in list(values.keys()):
if key.startswith('request_'):
values.pop(key)
finally:
lock.release()
update("request_cache_hits", 0)
2 changes: 1 addition & 1 deletion feedcrawler/providers/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


def get_version():
return "18.0.4"
return "18.0.5"


def create_version_file():
Expand Down

0 comments on commit 0fd8a9e

Please sign in to comment.