# Riset Konversi PDF ke Gambar

**Tujuan**
- Membandingkan kecepatan convert file pdf ke file gambar
- 

**Parameter:**
- PDF Converter Libraries: pdf2image, pdfium, pymupdf
- DPI: 300, 250, 200, 150, 100, 50, 20, 10, 5
- Waktu

**Base perbandingan:**
- 300 DPI
- Waktu untuk 300 DPI

**CSV Column for Comparisons:**
- datetime
- library_name
- dpi
- converting_time
- output_folder_size

In [6]:
import sys
import os
from pdf2image import convert_from_path
import pymupdf
import pypdfium2 as pdfium
import time
from datetime import datetime

sys.path.append(os.path.abspath('..'))
from src.helper import get_folder_size, log

In [5]:
dpi_list = [300, 250, 200, 150, 100, 50, 20, 10, 5]

for dpi in dpi_list:
    print(f"Converting to {dpi} dpi images...")
    start_time = time.time()
    os.mkdir(f'../outputs/pdf2img_{dpi}dpi')
    convert_from_path(
        pdf_path='../datasets/statistik-indonesia-2024.pdf',
        dpi=dpi,
        fmt='jpg',
        output_folder=f'../outputs/pdf2img_{dpi}dpi'
    )
    converting_time = time.time() - start_time
    output_folder_size = get_folder_size(f'../outputs/pdf2img_{dpi}dpi')
    log('../datasets/result-log.csv', f"{str(datetime.now())},pdf2img,{dpi},{converting_time},{output_folder_size}")
    print(f"{str(datetime.now())},pdf2img,{dpi},{converting_time},{output_folder_size}")
    del start_time, output_folder_size, converting_time

Converting to 250 dpi images...
2024-10-23 22:47:04.467436,pdf2img,250,148.6288459300995,278.72
Converting to 200 dpi images...
2024-10-23 22:48:53.715457,pdf2img,200,109.24184608459473,201.25
Converting to 150 dpi images...
2024-10-23 22:50:19.190190,pdf2img,150,85.46749496459961,130.42
Converting to 100 dpi images...
2024-10-23 22:51:10.012915,pdf2img,100,50.81601595878601,68.7
Converting to 50 dpi images...
2024-10-23 22:51:40.285650,pdf2img,50,30.266543865203857,20.35
Converting to 20 dpi images...
2024-10-23 22:52:02.882091,pdf2img,20,22.5888090133667,4.14
Converting to 10 dpi images...
2024-10-23 22:52:21.305952,pdf2img,10,18.41783571243286,1.55
Converting to 5 dpi images...
2024-10-23 22:52:39.279597,pdf2img,5,17.96633791923523,0.82


In [4]:
dpi_list = [300, 250, 200, 150, 100, 50, 20, 10, 5]
for dpi in dpi_list:
    print(f"Converting to {dpi} dpi images...")
    start_time = time.time()
    os.mkdir(f'../outputs/pymupdf_{dpi}dpi')
    doc = pymupdf.open('../datasets/statistik-indonesia-2024.pdf')  # open document
    for page in doc:  # iterate through the pages
        pix = page.get_pixmap(dpi=dpi)  # render page to an image
        pix.save(f"../outputs/pymupdf_{dpi}dpi/page-%i.jpg" % page.number)  # store image as a jpg
        
    converting_time = time.time() - start_time
    output_folder_size = get_folder_size(f'../outputs/pymupdf_{dpi}dpi')
    log('../datasets/result-log.csv', f"{str(datetime.now())},pymupdf,{dpi},{converting_time},{output_folder_size}")
    print(f"{str(datetime.now())},pymupdf,{dpi},{converting_time},{output_folder_size}")
    del start_time, output_folder_size, converting_time

Converting to 300 dpi images...
2024-10-24 07:39:12.615716,pymupdf,300,383.2682309150696,636.07
Converting to 250 dpi images...
2024-10-24 07:43:45.015801,pymupdf,250,272.3929591178894,495.85
Converting to 200 dpi images...
2024-10-24 07:46:53.458470,pymupdf,200,188.43637895584106,361.91
Converting to 150 dpi images...
2024-10-24 07:48:51.866998,pymupdf,150,118.40183925628662,237.54
Converting to 100 dpi images...
2024-10-24 07:49:54.865291,pymupdf,100,62.991759061813354,130.74
Converting to 50 dpi images...
2024-10-24 07:50:23.189483,pymupdf,50,28.318040132522583,41.49
Converting to 20 dpi images...
2024-10-24 07:50:40.380795,pymupdf,20,17.18501305580139,8.83
Converting to 10 dpi images...
2024-10-24 07:50:56.412142,pymupdf,10,16.02527904510498,2.95
Converting to 5 dpi images...
2024-10-24 07:51:14.158162,pymupdf,5,17.739565134048462,1.24


In [8]:
pdf = pdfium.PdfDocument("../datasets/statistik-indonesia-2024.pdf")

dpi_list = [300, 250, 200, 150, 100, 50, 20, 10, 5]

for dpi in dpi_list:
    print(f"Converting to {dpi} dpi images...")
    start_time = time.time()
    os.mkdir(f'../outputs/pdfium_{dpi}dpi')
    for i in range(len(pdf)):
        bitmap = pdf[i].render(
            scale = 1/72 * dpi,    # 72dpi resolution
            rotation = 0, # no additional rotation
        )
        pil_image = bitmap.to_pil()
        pil_image.save(f"../outputs/pdfium_{dpi}dpi/page-{i+1}.jpg")

    converting_time = time.time() - start_time
    output_folder_size = get_folder_size(f'../outputs/pdfium_{dpi}dpi')
    log('../datasets/result-log.csv', f"{str(datetime.now())},pdfium,{dpi},{converting_time},{output_folder_size}")
    print(f"{str(datetime.now())},pdfium,{dpi},{converting_time},{output_folder_size}")
    del start_time, output_folder_size, converting_time

Converting to 300 dpi images...
2024-10-24 08:05:48.449504,pdfium,300,144.2276690006256,346.47
Converting to 250 dpi images...
2024-10-24 08:07:36.379627,pdfium,250,107.9222469329834,266.49
Converting to 200 dpi images...
2024-10-24 08:08:54.918299,pdfium,200,78.5314691066742,192.51
Converting to 150 dpi images...
2024-10-24 08:09:48.254962,pdfium,150,53.32326817512512,124.04
Converting to 100 dpi images...
2024-10-24 08:10:24.633738,pdfium,100,36.371926069259644,65.69
Converting to 50 dpi images...
2024-10-24 08:10:50.676569,pdfium,50,26.036309003829956,19.0
Converting to 20 dpi images...
2024-10-24 08:11:14.679918,pdfium,20,23.996803283691406,4.04
Converting to 10 dpi images...
2024-10-24 08:11:42.763905,pdfium,10,28.078026056289673,1.56
Converting to 5 dpi images...
2024-10-24 08:12:23.792623,pdfium,5,41.02177810668945,0.81


In [7]:
round(2.432343802, ndigits=2)

2.43

In [3]:
import datetime

In [4]:
str(datetime.datetime.now())

'2024-10-23 21:54:52.756655'

In [8]:
with open('../outputs/result-log.csv', 'a') as f:
    f.writelines("\n10,ewr,ewew.e,rew,10")