# Notebook to generate fastai visual docs

In [2]:
!pip install requests > /dev/null
!pip install beautifulsoup4 > /dev/null
!pip install fastai --upgrade > /dev/null

## 1. Export SVG versions of the PowerPoint slides

1. Open "fastai_docs_v2.7.8.pptx" in Microsoft PowerPoint
2. Save as ... > SVG Format > export all slides, in the current directory
3. .svg files are generated in the ./fastai_docs_v2.7.8 subdirectory

In [4]:
version = "2.7.8"

In [5]:
from fastai.data.all import *

p = Path(f"./fastai_docs_v{version}")
files = get_files(p, extensions=".SVG")
files

(#76) [Path('fastai_docs_v2.7.8/Diapositive1.SVG'),Path('fastai_docs_v2.7.8/Diapositive10.SVG'),Path('fastai_docs_v2.7.8/Diapositive11.SVG'),Path('fastai_docs_v2.7.8/Diapositive12.SVG'),Path('fastai_docs_v2.7.8/Diapositive13.SVG'),Path('fastai_docs_v2.7.8/Diapositive14.SVG'),Path('fastai_docs_v2.7.8/Diapositive15.SVG'),Path('fastai_docs_v2.7.8/Diapositive16.SVG'),Path('fastai_docs_v2.7.8/Diapositive17.SVG'),Path('fastai_docs_v2.7.8/Diapositive18.SVG')...]

## 2. Rename the SVG files with slides titles

1. Open "titles.csv" in a table editor (separator ';' and UTF8 encoding)
2. Update the contents to match the latest version of  "fastai_docs_v2.7.8.pptx" (and save if needed)
  - column 0 : slide index (starting at 1) 
  - column 1 : section title
  - column 2 : slide title
3. Execute the code below to rename all svg files

You must be careful if you change a title in this table: the deep links to a specific svg picture will change => any user with a specific bookmark will be redirected to the top of the page.

In [6]:
titles = pd.read_csv("titles.csv", sep=';', header=None)
titles

Unnamed: 0,0,1,2
0,1,Concepts,Concepts - Data loading
1,2,Concepts,Concepts - Model training
2,3,Concepts,Concepts - Learner lifecycle
3,4,Learner,Learner - Create an instance
4,5,Learner,"Learner - Init, Attributes"
...,...,...,...
71,72,Summary,Summary - DataBlock
72,73,Summary,Summary - DataLoaders
73,74,Summary,Summary - Learner
74,75,Summary,Implementation notes


In [7]:
# Extract slide number from the file names generated by PowerPoint
slide_numbers_and_files = L(zip(files.map(lambda n: int(n.name[11-len(n.name):-4])),files))

# Create suitable file names from the slides titles
# Rename all svg files with their number and title
target_filenames = L()
for num,file in slide_numbers_and_files:
    safe_title = titles[2].iloc[num-1].replace(' - ','-').replace(',','').replace(' ','_').replace('/','_')
    target_filename = ('0' if num<=9 else '')+str(num)+'_'+safe_title+'.svg'
    target_filenames.append((num,target_filename))
    file.rename(p/target_filename)
                                                                                                   
# Add these filenames to the titles table for later use                                                                                                   
target_filenames.sort(key=lambda t:t[0])
titles[3] = target_filenames.itemgot(1)
titles

# Reload list of files
files = get_files(p, extensions=".SVG")

Move images to the subdirectory ./images/fastaidocs :

In [15]:
import shutil

destpath = Path('./images/fastaidocs')
destpath.mkdir(parents=True, exist_ok=True) 

for src_file in p.glob('*.*'):
    shutil.copy(src_file, destpath)

shutil.rmtree(p)
p = destpath

## 3. Insert links to fastai docs or code in the SVG files

Note : this is implemented as a post-processing step because PowerPoint doesn't support links in its SVG export feature (May 2021).

### 3.1 Collect all links and anchors from https://docs.fast.ai in a single csv file (optional)

This is useful in the next step to speed up the process of assigning a useful link to each visual element in the slides.

With the generated "docs.fast.ai.csv" file opened in a table editor (separator ';' and UTF8 encoding), you can locate very quickly the link you need and copy-paste it.

To do this, we simply crawl the docs.fast.ai website ...

**IMPORTANT : first, rename your 'docs.fast.ai.csv' file to 'docs.fast.ai_old.csv'** before running the cell below, if you want to be able to compare links for the new version of the docs and links for the old version of the docs.

In [38]:
import requests
from bs4 import BeautifulSoup

rooturl = "https://docs.fast.ai/"

html = requests.get(url=rooturl).text
page = BeautifulSoup(html, 'html.parser')

title1 = ""
title2 = ""

with open('docs.fast.ai.csv', 'w') as csvfile:
    writer = csv.writer(csvfile, delimiter=';', quotechar='\\', quoting=csv.QUOTE_MINIMAL)
    
    for link in page.find("div",attrs={"class":"sidebar-menu-container"}).find_all("a"): 
        if link.has_attr('class') and ('sidebar-item-toggle' in link['class']): continue        
        txt = link.string
        if not link.has_attr('href'): 
            url = "#"
        else:
            url = link['href'].lstrip('.')
        depth = len(list(link.parents))
        if depth==9:
            title1 = txt
            title2 = ""
            if url=="#": continue
        elif depth==11 and link.has_attr('data-bs-toggle'):
            title2 = txt
            continue
        elif depth==11 and url!="#":
            title2 = ""
        
        print(f"{depth} - {txt} - {url} => {title1} - {title2}")

        baseurl = f"https://docs.fast.ai{url}"
        writer.writerow([title1,title2,txt,baseurl])
        print("Crawling : "+baseurl+"\r")
        
        html2 = requests.get(url=baseurl).text
        page2 = BeautifulSoup(html2, 'html.parser')

        for header in page2.find_all(["h2","h3","h4"]):
            strs = list(header.strings)
            if len(strs)==1:
                txt2 = strs[0]
            else:
                txt2 = "".join(strs[:-1])
            if txt2 == "On this page": continue
            if header.has_attr('id'):
                url2 = header['id']
            elif header.has_attr('data-anchor-id'):
                url2 = header['data-anchor-id']
            else:
                continue
            depth = int(header.name[1:])

            writer.writerow(['']*depth+[txt2,f"{baseurl}#{url2}"])

9 - Welcome to fastai - /index.html => Welcome to fastai - 
Crawling : https://docs.fast.ai/index.html
9 - Quick start - /quick_start.html => Quick start - 
Crawling : https://docs.fast.ai/quick_start.html
11 - Tutorials - /tutorial.html => Tutorials - 
Crawling : https://docs.fast.ai/tutorial.html
13 - Computer vision intro - /tutorial.vision.html => Tutorials - Beginner
Crawling : https://docs.fast.ai/tutorial.vision.html
13 - Text transfer learning - /tutorial.text.html => Tutorials - Beginner
Crawling : https://docs.fast.ai/tutorial.text.html
13 - Tabular training - /tutorial.tabular.html => Tutorials - Beginner
Crawling : https://docs.fast.ai/tutorial.tabular.html
13 - Collaborative filtering tutorial - /tutorial.collab.html => Tutorials - Beginner
Crawling : https://docs.fast.ai/tutorial.collab.html
13 - Data block tutorial - /tutorial.datablock.html => Tutorials - Intermediate
Crawling : https://docs.fast.ai/tutorial.datablock.html
13 - Training Imagenette - /tutorial.imagenette

11 - Comet.ml - /callback.comet.html => Integrations - 
Crawling : https://docs.fast.ai/callback.comet.html
11 - Tensorboard - /callback.tensorboard.html => Integrations - 
Crawling : https://docs.fast.ai/callback.tensorboard.html
11 - Hugging Face Hub - /huggingface.html => Integrations - 
Crawling : https://docs.fast.ai/huggingface.html
11 - Pull requests made easy - /dev-setup.html => fastai Development - 
Crawling : https://docs.fast.ai/dev-setup.html
11 - git Notes - /dev/git.html => fastai Development - 
Crawling : https://docs.fast.ai/dev/git.html
11 - fastai Abbreviation Guide - /dev/abbr.html => fastai Development - 
Crawling : https://docs.fast.ai/dev/abbr.html
11 - fastai coding style - /dev/style.html => fastai Development - 
Crawling : https://docs.fast.ai/dev/style.html
11 - Working with GPU - /dev/gpu.html => fastai Development - 
Crawling : https://docs.fast.ai/dev/gpu.html
11 - Notes For Developers - /dev/develop.html => fastai Development - 
Crawling : https://docs.fa

In [60]:
oldlinks = set()
oldfile = open('docs.fast.ai_old.csv', 'r')
for line in oldfile.readlines():
    link = line.split(';')[-1].strip('\n')
    oldlinks.add(link)

In [61]:
newlinks = set()
newfile = open('docs.fast.ai.csv', 'r')
for line in newfile.readlines():
    link = line.split(';')[-1].strip('\n')
    newlinks.add(link)

In [62]:
missinglinks = set()
for oldlink in oldlinks:
    if "docs.fast.ai" in oldlink: # migration to v2.7.8
        oldlink = oldlink.lower()
    if not oldlink in newlinks:
        missinglinks.add(oldlink)
        
print(f"{len(missinglinks)} links from the previous version of the docs don't exist anymore in the new version of the docs")
missinglinks

85 links from the previous version of the docs don't exist anymore in the new version of the docs


{'https://docs.fast.ai/',
 'https://docs.fast.ai/#about-fastai',
 'https://docs.fast.ai/#contributing',
 'https://docs.fast.ai/#docker-containers',
 'https://docs.fast.ai/#installing',
 'https://docs.fast.ai/#learning-fastai',
 'https://docs.fast.ai/#migrating-from-other-libraries',
 'https://docs.fast.ai/#tests',
 'https://docs.fast.ai/#windows-support',
 'https://docs.fast.ai/callback.fp16.html#problems-with-half-precision:',
 'https://docs.fast.ai/callback.fp16.html#the-solution:-mixed-precision-training',
 "https://docs.fast.ai/callback.fp16.html#what's-half-precision?",
 'https://docs.fast.ai/callback.hook.html#what-are-hooks?',
 'https://docs.fast.ai/callback.neptune.html#how-to-use?',
 'https://docs.fast.ai/callback.progress.html#learner.no_bar',
 'https://docs.fast.ai/callback.wandb.html#example-of-use:',
 'https://docs.fast.ai/data.external.html#download_data',
 'https://docs.fast.ai/data.external.html#download_url',
 'https://docs.fast.ai/data.external.html#downloading',
 'ht

In [63]:
usedlinks = set()
usedlinksdf = pd.read_csv('links.csv', sep=';', header=None, encoding='utf-8', keep_default_na=False)
for usedlink in usedlinksdf[2]:
    if len(usedlink)>=6 and usedlink[0]=='h':
        usedlinks.add(usedlink)

In [64]:
notfoundlinks = usedlinks.difference(newlinks)

print(f"{len(notfoundlinks)} links used in links.csv can't be found in the new version of the docs")
notfoundlinks

97 links used in links.csv can't be found in the new version of the docs


{'https://docs.fast.ai/callback.progress.html#learner.no_bar',
 'https://docs.fast.ai/data.external.html#download_data',
 'https://docs.fast.ai/data.external.html#download_url',
 'https://docs.fast.ai/data.external.html#file_extract',
 'https://docs.fast.ai/optimizer.html#lars/larc',
 'https://docs.fast.ai/test_utils.html#show_install',
 'https://docs.fast.ai/text.models.awdlstm.html#awd_qrnn',
 'https://docs.fast.ai/text.models.qrnn.html#qrnn',
 'https://docs.fast.ai/text.models.qrnn.html#qrnnlayer',
 'https://fastcore.fast.ai/dispatch.html#retain_types',
 'https://github.com/fastai/fastai/blob/301016c5d3de2bdb5269121bd0716538d85f7409/fastai/data/load.py#L145',
 'https://github.com/fastai/fastai/blob/301016c5d3de2bdb5269121bd0716538d85f7409/fastai/data/load.py#L146',
 'https://github.com/fastai/fastai/blob/301016c5d3de2bdb5269121bd0716538d85f7409/fastai/data/load.py#L91',
 'https://github.com/fastai/fastai/blob/a099769a8ffed48127ab7ba6422d133edc21dc71/fastai/callback/azureml.py#L13',


# 3.2 Update links table

In [8]:
# Utility functions to parse SVG xml files
from xml.dom.minidom import parse

def get_descendant_nodes(context_node, predicate):
    if not context_node:
        yield None
    for child in context_node.childNodes:
        if predicate(child):
            yield child
        yield from get_descendant_nodes(child, predicate)

def get_text_value(context_node, default=None):
    texts_nodes = get_descendant_nodes(context_node, lambda n: n.nodeType == n.TEXT_NODE)
    text_value = ' '.join([str.strip(t.nodeValue) for t in texts_nodes])
    return text_value if text_value else default

1. Refresh the links table in "links.csv" by running the code below. 

The idea is :
  - scan all SVG files
  - locate all text elements in the SVG file
  - check if the text element was already referenced in the links table
    - if it wasn't : add it to the table and flag it "NEW"
    - if it was : flag it "OK"
  - after all SVG files were scanned, mark all remaining text elements in the table "REMOVED" 

In [9]:
links = pd.read_csv('links.csv', sep=';', header=None, encoding='utf-8', keep_default_na=False)

In [10]:
links[3] = "REMOVED"

for file in files:
    dom = parse(open(file, encoding='utf-8'))
    for idx,textElt in enumerate(dom.getElementsByTagName('text')):
        text = get_text_value(textElt).replace('"','')
        rows = links[(links[0]==file.name) & (links[1]==text)]   
        if len(rows.index)>0:
            links.loc[(links[0]==file.name) & (links[1]==text),3] = "OK"
        else:
            links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
    print(f"{file.name} -> {idx} labels\r")

links = links.reset_index().sort_values([0,'index']).drop(columns='index')

links[3].value_counts()

  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


01_Concepts-Data_loading.svg -> 35 labels
02_Concepts-Model_training.svg -> 39 labels
03_Concepts-Learner_lifecycle.svg -> 30 labels
04_Learner-Create_an_instance.svg -> 21 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


05_Learner-Init_Attributes.svg -> 52 labels
06_Learner-Training_methods.svg -> 24 labels
07_Learner-Inference_methods.svg -> 22 labels
08_Diagnostics-How_to_debug.svg -> 13 labels
09_Show-Inputs_targets_predictions.svg -> 25 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


10_Show-Images.svg -> 17 labels
11_Show-Text_points_boxes_tables.svg -> 14 labels
12_Plot-Training_loop.svg -> 15 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


13_Model_evaluation-Interpretation.svg -> 23 labels
14_Metrics-1_2.svg -> 26 labels
15_Metrics-2_2.svg -> 49 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


16_Learner-Training_loop_1_2.svg -> 35 labels
17_Learner-Training_loop_2_2.svg -> 28 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


18_Learner-Customize_training_loop.svg -> 20 labels
19_Learner-Callbacks_1_2.svg -> 30 labels
20_Learner-Callbacks_2_2.svg -> 20 labels
21_Learner-Context_managers.svg -> 25 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


22_Learner-Public_methods_call_tree.svg -> 22 labels
23_Learner-validate_and_Recorder.svg -> 11 labels
24_Learner-get_preds_and_Loss_function.svg -> 13 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


25_Learner-predict_and_DataLoader.svg -> 14 labels
26_Learner-show_results_and_DataLoader.svg -> 15 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


27_DataLoaders-Create_an_instance.svg -> 36 labels
28_DataLoaders-Interface.svg -> 38 labels
29_DataLoader-Interface.svg -> 22 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


30_DataLoader-Init.svg -> 26 labels
31_DataLoader-iter()_and_next().svg -> 35 labels
32_DataLoader-TfmdDL.svg -> 24 labels
33_DataLoader-TfmdDL_subclasses.svg -> 19 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


34_Dataset-Interface.svg -> 32 labels
35_Dataset-TfmdLists.svg -> 28 labels
36_Dataset-Datasets.svg -> 26 labels
37_Dataset-Tabular_datasets.svg -> 26 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


38_DataBlock.svg -> 25 labels
39_DataBlock-Init_data_pipeline.svg -> 22 labels
40_TransformBlocks-Labels.svg -> 14 labels
41_TransformBlocks-Vision.svg -> 19 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


42_TransformBlocks-Text.svg -> 13 labels
43_Download_datasets_and_models.svg -> 5 labels
44_Download-Directories_config.svg -> 8 labels
45_Dataset_Model_Learner-Directories.svg -> 23 labels
46_Get_items.svg -> 16 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


47_Splitters.svg -> 21 labels
48_Getters.svg -> 11 labels
49_Transforms_for_labels.svg -> 25 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


50_Type_Transforms-Vision.svg -> 19 labels
51_Item_Transforms-Vision.svg -> 18 labels
52_Data_augmentation-Vision_1_4.svg -> 2 labels
53_Data_augmentation-Vision_2_4.svg -> 1 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


54_Data_augmentation-Vision_3_4.svg -> 1 labels
55_Data_augmentation-Vision_4_4.svg -> 2 labels
56_Type_Transforms-Text.svg -> 18 labels
57_Item_Transforms-Text.svg -> 5 labels
58_Tabular_datasets_and_transforms.svg -> 25 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


59_Optimizer.svg -> 18 labels
60_Optimizers.svg -> 9 labels
61_Optimizer-Hyperparameters_scheduling.svg -> 17 labels
62_Create_model-Vision.svg -> 21 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


63_Create_Model-Text_GAN.svg -> 13 labels
64_Modules-Functions_Shapes_Pooling.svg -> 27 labels
65_Modules-Combine_layers_In_Out.svg -> 24 labels
66_Modules-Activations_Norms.svg -> 32 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)


67_Modules-Convolutions_Attention.svg -> 22 labels
68_Modules-Text_sequences_Dropout.svg -> 19 labels
69_Modules-Unet_GAN_Tabular.svg -> 21 labels
70_Loss_function-Interface.svg -> 15 labels
71_Loss_functions-Classification_Regression.svg -> 20 labels
72_Summary-DataBlock.svg -> 28 labels


  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:file.name, 1:text, 2:'?', 3:"NEW"}, ignore_index=True)
  links = links.append({0:fi

73_Summary-DataLoaders.svg -> 15 labels
74_Summary-Learner.svg -> 13 labels
75_Implementation_notes.svg -> 1 labels
76_Distributed_Training.svg -> 3 labels


OK         1498
NEW         114
REMOVED      96
Name: 3, dtype: int64

In [11]:
links.to_csv('links.csv', sep=';', header=None, index=False, encoding='utf-8-sig')

2. Open "links.csv" in a table editor (separator ';' and **UTF8 encoding**)
  - column 0 : SVG file name
  - column 1 : text element
  - column 2 : link to fastai doc -> '?', then URL or empty if no link
  - column 3 : line status -> NEW, OK, REMOVED 

Note : be careful to open and save the CSV file in **UTF-8 encoding** !


3. Locate the new lines with 'NEW' in column 3 or the lines not yet documented with '?' in column 2
  - add a link to fastai doc in column 2 (or leave the cell empty)
  - use 'docs.fast.ai.csv' to quickly find the doc URL
  - you can try to locate a similar text element with the status REMOVED to copy the previously selected link
  - optionnaly reorder the lines to group the elements with links at the top for each SVG file


4. After all lines have been updated :
  - delete all REMOVED lines
  - drop the colum 3 (status)
  - save a new version of 'links.csv'

In [12]:
links = pd.read_csv('links.csv', sep=';', header=None, encoding='utf-8', keep_default_na=False)

### 3.3 Process SVG files : add links

In [13]:
def wrap_with_link(dom, textElt, url):
    parent = textElt.parentNode
    link = dom.createElement('a')
    link.setAttributeNS("http://www.w3.org/1999/xlink", "href", url)    
    link.setAttributeNS("http://www.w3.org/1999/xlink", "target", "_top") 
    if textElt.previousSibling.tagName=="rect":
        rectElt = textElt.previousSibling
        parent.removeChild(rectElt)
        parent.replaceChild(link, textElt)        
        link.appendChild(rectElt)
        link.appendChild(textElt)
    else:
        parent.replaceChild(link, textElt)
        link.appendChild(textElt)

In [71]:
for file in files:
    dom = parse(open(file, encoding='utf-8'))
    print(file.name)
    changed = False
    for idx,textElt in enumerate(dom.getElementsByTagName('text')):
        text = get_text_value(textElt).replace('"','')
        rows = links[(links[0]==file.name) & (links[1]==text)]   
        if len(rows.index)>0:
            url = rows.iloc[0,2]
            if len(url)>0 and url!='?':
                if url[0]=='#':
                    url = "/fastaidocs/"+url
                wrap_with_link(dom,textElt,url)
                changed = True
    if changed:
        with open(file,'w') as f:
            f.write(dom.toxml())
            print(" -> updated")

01_Concepts-Data_loading.svg
 -> updated
02_Concepts-Model_training.svg
 -> updated
03_Concepts-Learner_lifecycle.svg
 -> updated
04_Learner-Create_an_instance.svg
 -> updated
05_Learner-Init_Attributes.svg
 -> updated
06_Learner-Training_methods.svg
 -> updated
07_Learner-Inference_methods.svg
 -> updated
08_Diagnostics-How_to_debug.svg
 -> updated
09_Show-Inputs_targets_predictions.svg
 -> updated
10_Show-Images.svg
 -> updated
11_Show-Text_points_boxes_tables.svg
 -> updated
12_Plot-Training_loop.svg
 -> updated
13_Model_evaluation-Interpretation.svg
 -> updated
14_Metrics-1_2.svg
 -> updated
15_Metrics-2_2.svg
 -> updated
16_Learner-Training_loop_1_2.svg
 -> updated
17_Learner-Training_loop_2_2.svg
 -> updated
18_Learner-Customize_training_loop.svg
 -> updated
19_Learner-Callbacks_1_2.svg
 -> updated
20_Learner-Callbacks_2_2.svg
 -> updated
21_Learner-Context_managers.svg
 -> updated
22_Learner-Public_methods_call_tree.svg
 -> updated
23_Learner-validate_and_Recorder.svg
 -> update

## 4. Generate the HTML page and menu

In [52]:
menuhtml = f"<h1>fastai v{version}</h1>\n"
menuhtml += "<a href=\"https://www.linkedin.com/in/laurent-prudhon\"><ul><li><small>forums.fast.ai: @laurentprudhon</small></li><li><small>Twitter : @prudholu</small></li></ul></a>\n"
for title1 in titles[1].unique():
    menuhtml += f"<h2>{title1}</h2>\n"
    menuhtml += "<ul>"
    for title2 in titles[2][titles[1]==title1]:
        num = titles[3][(titles[1]==title1) & (titles[2]==title2)].item()[:2]
        menuhtml += f"<li><a href=\"#{num}\">{title2}</a></li>\n"
    menuhtml += "</ul>"

In [53]:
mainhtml = ""
for title1 in titles[1].unique():
    mainhtml += f"<h2>{title1}</h2>\n"
    for title2 in titles[2][titles[1]==title1]:
        num = titles[3][(titles[1]==title1) & (titles[2]==title2)].item()[:2]
        mainhtml += f"<p><object id=\"{num}\" type=\"image/svg+xml\" data=\"/images/fastaidocs/{titles[3][(titles[1]==title1) & (titles[2]==title2)].item()}\"></object></p>\n"

In [54]:
main1html = """
<html>
<head>
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-172638611-1"></script>
<script>
  window.dataLayer = window.dataLayer || [];
  function gtag(){dataLayer.push(arguments);}
  gtag('js', new Date());
  gtag('config', 'UA-172638611-1');
</script>
<style>
/* The sidenav */
.sidenav {
  padding: 10px;
  height: 100%;
  width: 300px;
  position: fixed;
  z-index: 1;
  top: 0;
  left: 0;
  color: #7F7F7F;
  background-color: #F7F7F7;
  overflow-x: hidden;
  border-right: solid 1px;
}

ul {
  padding-left: 10px;
}

li {
  list-style-type: none;
  padding-left: 0px;
  padding-bottom: 5px;
}

a {
  outline: none;
  text-decoration: none;
  color: #7F7F7F;
}

a:hover {
  border-bottom: 1px solid;
}

/* Page content */
.main {
  margin-left: 300px; /* Same as the width of the sidenav */
  padding: 0px 10px;
}

.main h2 {
  color: #7F7F7F;
  background-color: #F7F7F7;
  font-size: 50px;
  border-top: 1px solid;
  border-bottom: 1px solid;
  padding: 10px;
}

.main img {
  width: 100%;
  border: 1px solid #E7E7E7;
}

</style>
</head>

<body>

<div class="sidenav">

"""

In [55]:
main2html = """
</div>

<div class="main">
"""

In [56]:
main3html = """
</div>

</body>

</html>"""

In [57]:
text_file = open("index.html", "w")
text_file.write(main1html + menuhtml + main2html + mainhtml + main3html)
text_file.close()

# 5. Publish the result

1. Move all *_old.csv and temporary work files to a subdirectory ./update_to_vx.y.z
2. Commit and push the changes to Github
3. Checkout 
4. 