# Convert PDF of gSlides to Images (PNG)
- store 'architectures.pdf' in /vertex-ai-mlops/architectures
- run this notebook in /vertext-ai-mlops/architectures
- slides are stored as slide_X.png in /vertext-ai-mlops/architectures/slides
- thumbnails are stored as tn_X.png in /vertext-ai-mlops/architectures/thumbnails (/plain, /prepared, and /playbutton)

---
## Setup

In [1]:
!ls

'Create Images.ipynb'   notebooks   overview   slides   thumbnails


In [2]:
!pip install pdf2image -q -U

In [3]:
!conda install -c conda-forge poppler -y -q

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: /opt/conda

  added / updated specs:
    - poppler


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    boost-cpp-1.74.0           |       h312852a_4        16.3 MB  conda-forge
    bzip2-1.0.8                |       h7f98852_4         484 KB  conda-forge
    cairo-1.16.0               |    h6cf1ce9_1008         1.5 MB  conda-forge
    font-ttf-dejavu-sans-mono-2.37|       hab24e00_0         388 KB  conda-forge
    font-ttf-inconsolata-3.000 |       h77eed37_0          94 KB  conda-forge
    font-ttf-source-code-pro-2.038|       h77eed37_0         684 KB  conda-forge
    font-ttf-ubuntu-0.83       |       hab24e00_0         1.9 MB  conda-forge
    fontconfig-2.13.94         |       ha180cfb_0         323 KB  conda-forge
    fonts-conda-

In [1]:
!pip install PyPDF2 -U -q

In [1]:
from pdf2image import convert_from_path
from PIL import Image
import os, glob
from PyPDF2 import PdfFileReader

---
## Mapping

In [2]:
notebooks = []
for nb in glob.glob('../*.ipynb'):
    notebooks.append(nb.split(' - ')[0][3:])

In [3]:
notebooks.sort()

In [4]:
notebooks = ['readme'] + notebooks

In [5]:
notebooks

['readme',
 '00',
 '01',
 '02a',
 '02b',
 '02c',
 '03a',
 '03b',
 '03c',
 '04',
 '04a',
 '04b',
 '04c',
 '04d',
 '04e',
 '04f',
 '04g',
 '05',
 '05a',
 '05b',
 '05c',
 '05d',
 '05e',
 '05f',
 '05g',
 '05h',
 '05i',
 '06a',
 '10',
 '11',
 '99']

In [6]:
len(notebooks)

31

---
## Architectures.pdf

In [7]:
with open('architectures.pdf', 'rb') as f:
    pdf = PdfFileReader(f)
    pages = pdf.getNumPages()
pages

121

In [8]:
for p in range(1, pages+1):
    image = convert_from_path('architectures.pdf',size=(1920, 1080), first_page = p, last_page = p)
    p2 = (p-2) % 4 # the 0-4 slide number within the section
    if p == 1: section = 'header'
    else: section = notebooks[(p-2) // 4]
    if section != 'header':
        if p2 == 1: image[0].save(f"slides/{section}_arch.png")
        if p2 == 2: image[0].save(f"slides/{section}_console.png")
        if p2 == 3: image[0].save(f"thumbnails/plain/{section}.png")
    print("page = ", p, ", section = ", section, "p2 = ", p2)

page =  1 , section =  header p2 =  3
page =  2 , section =  readme p2 =  0
page =  3 , section =  readme p2 =  1
page =  4 , section =  readme p2 =  2
page =  5 , section =  readme p2 =  3
page =  6 , section =  00 p2 =  0
page =  7 , section =  00 p2 =  1
page =  8 , section =  00 p2 =  2
page =  9 , section =  00 p2 =  3
page =  10 , section =  01 p2 =  0
page =  11 , section =  01 p2 =  1
page =  12 , section =  01 p2 =  2
page =  13 , section =  01 p2 =  3
page =  14 , section =  02a p2 =  0
page =  15 , section =  02a p2 =  1
page =  16 , section =  02a p2 =  2
page =  17 , section =  02a p2 =  3
page =  18 , section =  02b p2 =  0
page =  19 , section =  02b p2 =  1
page =  20 , section =  02b p2 =  2
page =  21 , section =  02b p2 =  3
page =  22 , section =  02c p2 =  0
page =  23 , section =  02c p2 =  1
page =  24 , section =  02c p2 =  2
page =  25 , section =  02c p2 =  3
page =  26 , section =  03a p2 =  0
page =  27 , section =  03a p2 =  1
page =  28 , section =  03a p2

---
## Thumbnails.pdf

/prepared versions
- add the architecture slide to the plain version

In [9]:
for filename in os.listdir('thumbnails/plain'):
    
    if not (filename.endswith('.png')):
        continue
    if filename == 'readme.png':
        thumb = Image.open(f'thumbnails/plain/{filename}')
        thumb.save(f'thumbnails/prepared/{filename}')
        continue
    
    # grab plain thumbnail
    thumb = Image.open(f'thumbnails/plain/{filename}')
    tWidth, tHeight = thumb.size
    
    # grab related architecture slide
    slide = Image.open(f"slides/{filename.split('.')[0]}_arch.png").convert("RGBA")
    sWidth, sHeight = slide.size
    slide = slide.resize((int(tWidth/1.6), int(tHeight/1.6)))
    
    # save the prepared version with architecture added to plain thumbnail
    thumb.paste(slide, (int(tWidth/3), int(tHeight/7)), slide)
    thumb.save(f'thumbnails/prepared/{filename}')

/playbutton versions
- add playbutton to the prepapared versions

In [10]:
playbutton = Image.open('thumbnails/logo_youtube_color_1x_web_512dp.png').convert("RGBA")
pbWidth, pbHeight = playbutton.size
#playbutton.show()

In [11]:
for filename in os.listdir('thumbnails/prepared'):
    if not (filename.endswith('.png')):
        continue
    tn = Image.open(f'thumbnails/prepared/{filename}')
    tnWidth, tnHeight = tn.size
    print(filename)
    tn.paste(playbutton, (int(tnWidth/2 - pbWidth/2), int(tnHeight/2 - pbHeight/2)), playbutton)
    tn.save(f'thumbnails/playbutton/{filename}')

01.png
04b.png
06a.png
05d.png
05a.png
04g.png
05c.png
02b.png
04c.png
02c.png
10.png
03c.png
11.png
05h.png
03b.png
03a.png
05i.png
readme.png
04d.png
00.png
05e.png
05b.png
05.png
02a.png
04f.png
05g.png
05f.png
04a.png
04e.png
04.png
