# Wiki Download

Use the Mediawiki API to download all of the content from the Miniscript wiki.

In [1]:
%pip install requests
%pip install GitPython

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.
Defaulting to user installation because normal site-packages is not writeable
Collecting GitPython
  Downloading GitPython-3.1.40-py3-none-any.whl.metadata (12 kB)
Collecting gitdb<5,>=4.0.1 (from GitPython)
  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->GitPython)
  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)
Downloading GitPython-3.1.40-py3-none-any.whl (190 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m568.6 kB/s[0m eta [36m0:00:00[0m [36m0:00:01[0mm
[?25hDownloading gitdb-4.0.11-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m161.5 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading smmap-5.0.1-py3-none-any.whl (24 kB)
Installing collected 

In [3]:
# Import all the things!

import git
import os
import requests
from urllib.parse import urljoin

In [5]:
def download_mediawiki_content(url, output_folder):
    api_url = urljoin(url, "w/api.php")
    print(f"Connecting to API at {api_url}.")

    # Get a list of all pages
    params = {
        'action': 'query',
        'format': 'json',
        'list': 'allpages',
        'aplimit': 'max'
    }

    response = requests.get(api_url, params=params)
    data = response.json()

    # Download each page content
    for page in data['query']['allpages']:
        page_title = page['title']
        page_params = {
            'action': 'query',
            'format': 'json',
            'titles': page_title,
            'prop': 'revisions',
            'rvprop': 'content'
        }

        page_response = requests.get(api_url, params=page_params)
        page_data = page_response.json()

        # Save the content to a file
        if 'pages' in page_data['query']:
            file_path = f"{output_folder}/{page_title}.txt"
            directory = os.path.dirname(file_path)
            # Check if the directory already exists
            if not os.path.exists(directory):
                # Create the directory and its parents if they don't exist
                os.makedirs(directory)

            page_content = next(iter(page_data['query']['pages'].values()))['revisions'][0]['*']
            with open(file_path, 'w', encoding='utf-8') as file:
                file.write(page_content)

def download_git_repository(repo_url, destination_folder):
    # Clone the Git repository to the specified destination folder
    repo = git.Repo.clone_from(repo_url, destination_folder)

def download_other_content(url, output_folder):
    filename = os.path.basename(url)
    response = requests.get(url)
    page_content = response.text
    
    file_path = f"{output_folder}/{filename}"
    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(page_content)

def ensure_output_folder(output_folder):
    # Check if the directory already exists
    if not os.path.exists(output_folder):
        # Create the directory and its parents if they don't exist
        os.makedirs(output_folder)

if __name__ == "__main__":
    output_folder = "./data/docs"
    ensure_output_folder(output_folder)
    #download_mediawiki_content("https://miniscript.org", "./data/docs/wiki")
    #download_other_content("https://raw.githubusercontent.com/JoeStrout/miniscript/master/QuickRef.md", output_folder)
    #download_other_content("https://miniscript.org/files/MiniScript-Manual.pdf", output_folder)
    #download_git_repository("git@github.com:treytomes/micro-hack.git", os.path.join(output_folder, "micro-hack"))
    download_git_repository("git@github.com:JoeStrout/minimicro-sysdisk.git", os.path.join(output_folder, "micromicro-sysdisk"))
