<a href="https://colab.research.google.com/github/lakshmanok/lakblogs/blob/main/dataframe_to_pdf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Drawing a PDF table in Python

Using Matplotlib

In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

In [12]:
# Generate dataframe for quick demo
import string
nrows = 100
df = pd.DataFrame(np.random.randint(0,100,size=(nrows, 26)), columns=list(string.ascii_uppercase), index=range(nrows))
df

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,...,Q,R,S,T,U,V,W,X,Y,Z
0,19,19,89,44,30,49,43,18,0,3,...,22,23,56,68,53,85,12,28,49,20
1,34,97,87,36,98,9,5,81,92,0,...,62,80,55,63,37,55,7,45,66,7
2,79,91,61,71,69,52,77,98,34,99,...,75,86,54,38,86,27,57,67,37,50
3,45,60,41,39,62,99,40,23,0,87,...,6,12,74,71,48,3,81,84,94,39
4,34,99,42,67,30,13,57,27,62,13,...,82,56,84,87,6,93,86,26,88,87
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,54,38,99,77,94,94,8,14,9,29,...,84,98,16,40,65,91,58,9,34,13
96,13,26,74,35,70,87,3,82,33,98,...,99,22,36,30,39,79,37,17,66,1
97,69,40,97,37,88,68,88,0,32,10,...,39,86,86,50,16,1,68,66,63,51
98,49,58,19,87,69,93,41,44,32,50,...,89,28,38,88,20,40,43,91,83,18


In [13]:
def _draw_as_table(df, pagesize):
    alternating_colors = [['white'] * len(df.columns), ['lightgray'] * len(df.columns)] * len(df)
    alternating_colors = alternating_colors[:len(df)]
    fig, ax = plt.subplots(figsize=pagesize)
    ax.axis('tight')
    ax.axis('off')
    the_table = ax.table(cellText=df.values,
                        rowLabels=df.index,
                        colLabels=df.columns,
                        rowColours=['lightblue']*len(df),
                        colColours=['lightblue']*len(df.columns),
                        cellColours=alternating_colors,
                        loc='center')
    return fig
  

def dataframe_to_pdf(df, filename, numpages=(1, 1), pagesize=(11, 8.5)):
  with PdfPages(filename) as pdf:
    nh, nv = numpages
    rows_per_page = len(df) // nh
    cols_per_page = len(df.columns) // nv
    for i in range(0, nh):
        for j in range(0, nv):
            page = df.iloc[(i*rows_per_page):min((i+1)*rows_per_page, len(df)),
                           (j*cols_per_page):min((j+1)*cols_per_page, len(df.columns))]
            fig = _draw_as_table(page, pagesize)
            if nh > 1 or nv > 1:
                # Add a part/page number at bottom-center of page
                fig.text(0.5, 0.5/pagesize[0],
                         "Part-{}x{}: Page-{}".format(i+1, j+1, i*nv + j + 1),
                         ha='center', fontsize=8)
            pdf.savefig(fig, bbox_inches='tight')
            
            plt.close()

In [14]:
dataframe_to_pdf(df, 'test_1.pdf')
dataframe_to_pdf(df, 'test_6.pdf', numpages=(3, 2))

In [15]:
!ls -l

total 196
drwxr-xr-x 1 root root  4096 Jul  6 13:22 sample_data
-rw-r--r-- 1 root root 60964 Jul 12 18:54 test_1.pdf
-rw-r--r-- 1 root root 73409 Jul 12 18:54 test_6.pdf
-rw-r--r-- 1 root root 61143 Jul 12 16:35 test.pdf


In [16]:
from IPython.display import IFrame
IFrame("test_1.pdf", width=600, height=300)

In [17]:
from IPython.display import IFrame
IFrame("test_6.pdf", width=600, height=300)