In [None]:
import requests, re
from bs4 import BeautifulSoup

session = requests.session()
# No user agent. Wattpad now blocks all user agents containing "Python".
session.headers['User-Agent'] = ''

# Used by Android app normally
# Example parameters are what Android provides
API_STORYINFO = 'https://www.wattpad.com/api/v3/stories/' #9876543?drafts=0&include_deleted=1

# Used by website and Android app normally
API_STORYTEXT = 'https://www.wattpad.com/apiv2/storytext' # ?id=23456789
# Webpage uses a page parameter: ?id=23456789&page=1
# Android uses these parameters: ?id=23456789&increment_read_count=1&include_paragraph_id=1&output=text_zip

API_CHAPTERINFO = 'https://www.wattpad.com/apiv2/info' # ?id=23456789
# Documented api
API_GETCATEGORIES = 'https://www.wattpad.com/apiv2/getcategories'

ILLEAGAL_FILENAME_CHARACTERS = str.maketrans(r'.<>:"/\|?*^', '-----------')
# Fixup the categories data, this could probably be cached too
categories = session.get(API_GETCATEGORIES).json()
categories = {int(k): v for k, v in categories.items()}

def get_story_id(url):
    # Extract the id number from the url
    match = re.search(r'\d+', url)
    if not match:
        return None
    # Check if it's a valid id of a story
    url_id = match.group()
    storyinfo_req = session.get(API_STORYINFO + url_id)
    if storyinfo_req.ok:
        return url_id
    # If not, check if it's a chapter id and retrieve the story id
    chapterinfo_req = session.get(API_CHAPTERINFO, params={'id': url_id})
    if not chapterinfo_req.ok:
        return None
    story_url = chapterinfo_req.json()['url']
    story_id = re.search(r'\d+', story_url).group()
    return story_id

def download_story(story_id):
    # TODO: probably use {'drafts': 0, 'include_deleted': 0}
    storyinfo = session.get(API_STORYINFO + story_id, params={'drafts': 1, 'include_deleted': 1}).json()
    return storyinfo

def main(url):
    story_id = get_story_id(url)
    print(story_id)
    title = download_story(story_id)["title"]
    print(title)
    for part in download_story(story_id)["parts"]:
        chapter_id = part['id']
        chapter_html = session.get(API_STORYTEXT, params={'id': chapter_id, 'output': 'json'}).json()['text']
        chap = BeautifulSoup(chapter_html).text
        with open(f"{title}", "a", encoding = "utf-8") as f:
            f.write(chap) 