In [3]:
from requests import get
from requests.exceptions import RequestException
from contextlib import closing, suppress
from bs4 import BeautifulSoup
import re
import zipfile
import io
import pathlib

In [2]:
class Scraper:
    
    _language = 'Python'
    _parser = 'html.parser'
    
    def __init__(self, year, round_nr, problem_nr, save_path):
        self.save_path = save_path
        self.url = 'https://www.go-hero.net/jam/' + f'{year}/solutions/{round_nr}/{problem_nr}/{self._language}'
        self.download_links = []
    
    def download_all(self):
        pathlib.Path(self.save_path).mkdir(parents=True, exist_ok=True)
        self.get_all_download_links()
        self._download_files()
        
    def get_all_download_links(self):
        main_page = simple_get(self.url)
        self._save_download_links_from_page(main_page)
        
        page = 1
        sub_page = simple_get(self.url + f'/partial/{page}')
        while sub_page is not None:
            self._save_download_links_from_page(sub_page)
            page += 1
            sub_page = simple_get(self.url + f'/partial/{page}')
            
    def _save_download_links_from_page(self, url_content):
        html = BeautifulSoup(url_content, self._parser)
        for link_text in html.select('a'):
            link = self._extract_download_link_from_text(link_text)
            if link is not None:
                self.download_links.append(link)
    
    @staticmethod
    def _extract_download_link_from_text(text):
        try:
            download_link = "https://code" + re.search('"http://code(.*?)"', str(text)).group(1)
            download_link = re.sub("&amp;", "&", download_link)
            return download_link
        
        except AttributeError:
            return None
        
    def _download_files(self):
        for nr, link in enumerate(self.download_links):
            zip_link = get(link, stream=True)
            with zipfile.ZipFile(io.BytesIO(zip_link.content)) as zip_file:
                with open(f'{self.save_path}/{nr}.py',"wb") as extracted_file:
                    extracted_file.write(zip_file.read(zip_file.namelist()[0]))
                
                