<a href="https://colab.research.google.com/github/koted0/Project-Gozle/blob/main/Project_Gozle.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# What works
## How to use it 📄


*   Launch every cell till instances
*   In instances cell after ...


## Works✅
1.   Adding downloads from URL, one by one

Example:
```
d = Downloader()
d.add_url("Paste here your URL with quotes")
d.add_url("Another link to file that you want to download")
...
```

2.   Adding downloads from file.

Example:
```
d = Downloader()
d.add_download_from_url("Paste here filename that you uploaded to the root of storage.")
```
Example of file that you upload
```
downloads.txt
https://download.visualstudio.microsoft.com/download/pr/10912113/5da66ddebb0ad32ebd4b922fd82e8e25/vcredist_x86.exe
https://vscode.download.prss.microsoft.com/dbazure/download/stable/0ee08df0cf4527e40edc9aa28f4b5bd38bbff2b2/VSCodeUserSetup-x64-1.85.1.exe
https://youtu.be/lYSmz07i9SM?si=jkbRxm9llkpPWzcI
...
```
2.   Torrent support
3.   Proper Filenaming
4.   Checking if file were allready downloaded (download will be skipped if same file allready exists in storage)
5.   4.9Gb split file archiving
6.   Auto Login to Gozle disk (You must save the **username** and **password** details of your Gozle account in Secrets.)
7.   Uploading up to 3 files simultaneously (so, up to 15+Gb files for free accounts)

## Bugs 🛠
*   If archive is presented, new files not archiving even if they are different.

## Not work or feautured ❌
*   Torrent Magnet links support
*   Various GUI implementations for non-intuitive parts
*   Multipaging upload for Gozle. In theory can upload up to CAP of DISK.
*   Getting filesize to calculate total size for uploading.
*   User-friendly CLI
*   [Multithreading](https://colab.research.google.com/drive/1s9y5D_JlCOT43FQ30vwSXGH50b2waDNa#scrollTo=Qa7U4zLT8ZWj&uniqifier=1)

In [None]:
# @title Install packages
!pip install selenium yt-dlp libtorrent
!apt install chromium-chromedriver -y
!rm -R sample_data/
#Media content delivery solution

In [None]:
# @title Python Imports
import requests
import re
import os
import libtorrent as lt
import sys
import time
import logging
import datetime

from fnmatch import fnmatch
from requests.structures import CaseInsensitiveDict
# from yt_dlp import YoutubeDL
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from google.colab import userdata
from queue import Queue
from selenium.common import NoSuchElementException, TimeoutException

In [None]:
# @title Downloader
FORMAT = "%(asctime)s :: %(name)s :: %(levelname)-8s :: %(message)s"
logging.basicConfig(level=logging.INFO, format=FORMAT, force=True)

class Downloader:
    def __init__(self):
        self.download_list: Queue = Queue()
        self.video_quality: dict = { "hd": "res:720", "fhd": "res:1080", 'uhd': "res:2160" }
        self.youtube_pattern = re.compile(r"(?:https?:\/\/)?(?:www\.)?youtu\.?be(?:\.com)?\/?.*(?:watch|embed|playlist)?(?:.*v=|v\/|\/|list=)([\w\-_]+)\&?")


    def _get_headers(self, url: str) -> requests.Response:
        with requests.head(url) as response:
            try:
                return response.headers
            except Exception:
                logging.warning("No headers in response")

    def _get_filename_from_header(self, header: CaseInsensitiveDict) -> str | None:
        try:
            content_disposition = header.get("content-disposition", "")
            if "filename=" in content_disposition:
                return content_disposition.split("filename=")[-1].strip('"')
            else:
                logging.warning("No filename in headers")
        except Exception:
            logging.warning("Unexpected error occurred while extracting filename")

    def _get_filename_from_url(self, url: str) -> str:
        filename = url.split("/")[-1]
        return filename if "?" not in filename else filename.split("?")[0]

    def _get_proper_name(self, url: str) -> str:
        headers = self._get_headers(url)
        if headers:
            filename = self._get_filename_from_header(headers)
            if filename:
                return filename
            return self._get_filename_from_url(url)

    def _is_file_exists(self, filename: str, path: str) -> bool:
        file_path = os.path.join(path, filename)
        return os.path.exists(file_path)

    def _download_other(self, url: str) -> None:
        path = "/content/others/"
        filename = self._get_proper_name(url)
        if self._is_file_exists(filename, path):
            logging.info(f"File: {filename} exists. Skipping")
        else:
            !curl -o "{path}""{filename}" -L --create-dirs "{url}"

    def _download_youtube(self, url: str, quality_key: str) -> None:
        path = "/content/videos/"
        quality = self.video_quality[quality_key] if quality_key else "res:720"
        !yt-dlp -S "{quality}" "{url}" -P "{path}"

    def add_url(self, url: str, quality_key: str = None) -> None:
        """Add a URL to the download list."""
        self.download_list.put((url, quality_key))

    def add_downloads_from_file(self, path2file: str) -> None:
        with open(path2file, 'r') as file:
            [self.add_url(url.strip()) for url in file]

    def download(self) -> None:
        """Download all URLs in the download list."""
        while not self.download_list.empty():
            url, quality_key = self.download_list.get()
            if re.match(self.youtube_pattern, url):
                self._download_youtube(url, quality_key)
            else:
                self._download_other(url)
        else:
            logging.info("Downloading Finished")


In [None]:
# @title Uploader (Gozle Disk [Selenium])
class Uploader:
    def __init__(self):
        self.GOZLE_URL = "https://disk.gozle.com.tm/login"
        self._gozle_username = userdata.get("username")
        self._gozle_password = userdata.get("password")
        self.archive_paths: dict = {}
        self.directories = ["/content/others/", "/content/videos/",
                            "/content/torrents/"]

    def _set_webdriver_options(self) -> None:
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-dev-shm-usage')
        self.driver = webdriver.Chrome(options=chrome_options)
        self.driver.maximize_window()
        self.wait = WebDriverWait(self.driver, 10)

    def login(self) -> None:
        self._set_webdriver_options()
        self.driver.get(self.GOZLE_URL)
        self.wait.until(EC.presence_of_element_located((By.NAME, 'email'))).send_keys(self._gozle_username)
        self.driver.find_element(By.NAME,value='password').send_keys(self._gozle_password)
        self.driver.find_element(By.CSS_SELECTOR, 'button[type = \'submit\']').click()
        logging.info("Logged Succesfull")

    #TODO: Make GUI slider for upload and ETA?
    def check_uploads(self) -> None:
        try:
            uploading_menu = self.wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="root"]/div[2]/div[5]/div[2]/div')))
        except (NoSuchElementException, TimeoutException):
            return
        uploading_items = uploading_menu.find_elements(By.XPATH, "./div")
        # logging.info(f'Uploading: {len(uploading_items)} Items, with status:')
        for div_N in uploading_items:
            item_name = div_N.find_element(By.XPATH, './div[2]/div[1]/div').text
            item_status = div_N.find_element(By.XPATH, './div[2]/div[2]').text
        return item_name, item_status

    def _print_uploading_status(self) -> None:
        try:
            name, status = self.check_uploads()
            logging.info(f"Item: {name}, Status: {status}")
        except TypeError:
            logging.warning("Nothing Uploading")

    def _has_visible_files(self, directory: str) -> bool:
        return any(True for _ in os.scandir(directory))


    def _archive_data(self, directory: str) -> None:
        dir_name = directory.split('/')[2]
        timestamp = datetime.datetime.now().strftime("%d-%m_%H:%M")
        archive_name = f"{dir_name}_{timestamp}.zip"
        logging.info("Starting archiving")
        !zip -r -j -m -s 5000m {archive_name} {directory}

    def _update_uploads_list(self) -> None:
        uploading_items = self.check_uploads()
        cwd = os.getcwd()
        files = [file for file in os.listdir(cwd) if not file.startswith('.') \
            and (fnmatch(file, '*.z*'))]
        for file in files:
            if not uploading_items or file not in uploading_items:
                self.archive_paths[file] = os.path.join(cwd, file)
            else:
                logging.info("File is allready uploading, skipping.")

    def upload(self) -> None:
        [self._archive_data(directory) for directory in self.directories
         if os.path.exists(directory) and self._has_visible_files(directory)]
        self._update_uploads_list()
        if not self.archive_paths:
            return

        for filename in list(self.archive_paths.keys()):
            path = self.archive_paths.pop(filename)
            self.wait.until(EC.presence_of_element_located((By.XPATH, "//div//div[2]//div//div//div//button[starts-with(@id, ':r')]"))).click()
            self.driver.find_element(By.XPATH, "//div[@data-value='uploadFiles']").click()
            self.driver.find_element(By.CSS_SELECTOR, value='input[type = \'file\']').send_keys(path)
            logging.info(f"Uploading {filename}")
        time.sleep(5)
        self._print_uploading_status()

    def refresh_page(self) -> None:
        self.driver.refresh()

    def screenshot(self) -> None:
        self.driver.save_screenshot("uploading.png")

In [None]:
# @title Torrent Client
def download_torrent(filename):
    ses = lt.session({"listen_interfaces": "0.0.0.0:6881, [::]:6881"})

    state_str = [
    "queued", "checking", "downloading metadata", "downloading",
    "finished", "seeding", "allocating", "checking fastresume"]

    info = lt.torrent_info(filename)
    h = ses.add_torrent({"ti": info, "save_path": "/content/torrents"})
    s = h.status()
    print("starting", s.name)

    while s.state != lt.torrent_status.seeding:
        s = h.status()
        print('\r%.2f%% complete (down: %.1f kB/s up: %.1f kB/s peers: %d) %s' % (
            s.progress * 100, s.download_rate / 1024, s.upload_rate / 1024,
            s.num_peers, state_str[s.state]), end=' ')

        sys.stdout.flush()
        time.sleep(1)
    print("\n", h.status().name, "complete")

# Instances

In [None]:
d = Downloader()
u = Uploader()

Use ▶️ button  in cell above ⬆️ to run cell and add downloads in cell below ⬇️

```
d.add_url("URL")
```
```
d.add_downloads_from_file("filename")
```
```
d.download()
```

In [None]:
d.add_url("")

In [None]:
d.add_downloads_from_file("downloads.txt")

In [None]:
d.download()

In [None]:
u.login()

In [None]:
u.upload()

In [None]:
u._print_uploading_status()