Skip to content

Commit

Permalink
Now downloading the navigation directly from the Kotlin project, no n…
Browse files Browse the repository at this point in the history
…eed to update sites.txt anymore.
  • Loading branch information
phxql committed Feb 28, 2016
1 parent 4fc4091 commit 8bfb022
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 58 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# IntelliJ
*.iml
.idea/
7 changes: 1 addition & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,5 @@ To download the EPub, [click here](https://github.com/phxql/kotlin-one-epub/raw/
## Run it yourself

1. Install [Pandoc](http://pandoc.org/installing.html)
1. `pip install PyYAML`
1. Run `python convert.py`

## TODOs

1. Write a script to generate the `sites.txt` file.
1. Write a script to merge them together. Currently I'm using Calibre for this. Install the EPubMerge-Plugin (http://www.mobileread.com/forums/showthread.php?t=169744), delete the kotlin.epub file and execute `calibre-debug --run-plugin EpubMerge -- *.epub`. The merged file is called merge.epub.

48 changes: 42 additions & 6 deletions convert.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,53 @@
# Install pandoc (http://johnmacfarlane.net/pandoc/)
# TODO: Merge epubs on commandline. I've imported all the epubs into calibre and then used the epubmerge plugin (https://code.google.com/p/epubmerge/) to get one epub file
# pip install PyYAML

import subprocess
import yaml
import urllib2
import os

with open("sites.txt") as f:
sites = f.readlines()
navigation_url = 'https://raw.githubusercontent.com/JetBrains/kotlin-web-site/master/_data/_nav.yml'
base_url = 'https://raw.githubusercontent.com/JetBrains/kotlin-web-site/master'
tmp_path = '/tmp/kotlin-one-epub/'

cleanedSites = [s.rstrip() for s in sites]
joinedSites = ' '.join(cleanedSites)
print("Fetching navigation...")

response = urllib2.urlopen(navigation_url).read()

print("Parsing navigation...")
navigation = yaml.safe_load(response)
reference = navigation['reference']
# Access content
content = [r['content'] for r in reference if r['title'] != 'Reference'] # Exclude 'Reference' section
# Flatmap list of lists
content = reduce(list.__add__, content)
# Extract first key of dictionary
urls = [c.iterkeys().next() for c in content]
# Add base url and use markdown file
urls = [base_url + u.replace('.html', '.md', 1) for u in urls]

# Download the pages
if not os.path.exists(tmp_path):
os.makedirs(tmp_path)

tmp_files = []
for i, url in enumerate(urls):
print("Downloading " + url + "...")
url_content = urllib2.urlopen(base_url + url).read()
# Remove strange words from the content
url_content = url_content.replace("{: .keyword }", "")

filename = tmp_path + str(i) + ".md"
tmp_file = open(filename, 'w')
tmp_file.write(url_content)
tmp_file.close()
tmp_files.append(filename)

# Run pandoc
print("Running pandoc...")

command = "pandoc -s --toc --from=markdown --to=epub3 --output=kotlin.epub title.md " + joinedSites
joined_files = ' '.join(tmp_files)
command = "pandoc -s --toc --from=markdown --to=epub3 --output=kotlin.epub title.md " + joined_files
p = subprocess.Popen(command, shell=True)
p.wait()

Expand Down
Binary file modified kotlin.epub
Binary file not shown.
46 changes: 0 additions & 46 deletions sites.txt

This file was deleted.

0 comments on commit 8bfb022

Please sign in to comment.