# Pandas presentation tips I wish I knew earlier

## Setup

In [1]:
import os
import platform
import random
from platform import python_version

import jupyterlab
import numpy as np
import pandas as pd
import lxml

print("System")
print("os name: %s" % os.name)
print("system: %s" % platform.system())
print("release: %s" % platform.release())
print()
print("Python")
print("version: %s" % python_version())
print()
print("Python Packages")
print("jupterlab==%s" % jupyterlab.__version__)
print("pandas==%s" % pd.__version__)
print("numpy==%s" % np.__version__)
print("lxml==%s" % lxml.__version__)

System
os name: posix
system: Darwin
release: 19.2.0

Python
version: 3.8.0

Python Packages
jupterlab==1.2.4
pandas==1.0.0
numpy==1.18.0
lxml==4.5.0




In [2]:
n = 10
df = pd.DataFrame(
    {
        "col1": np.random.random_sample(n),
        "col2": np.random.random_sample(n),
        "col3": [[random.randint(0, 10) for _ in range(random.randint(3, 5))] for _ in range(n)],
    }
)
df.shape

(10, 3)

In [3]:
df_html = df.to_html()

In [4]:
with open('analysis.html', 'w') as f:
    f.write(df_html)

In [17]:
df_list = pd.read_html('analysis.html')
df_list

[   Unnamed: 0      col1      col2             col3
 0           0  0.579586  0.403048    [0, 1, 10, 8]
 1           1  0.670832  0.535344        [5, 1, 4]
 2           2  0.941318  0.321249  [9, 4, 2, 6, 8]
 3           3  0.527311  0.777454     [3, 4, 9, 0]
 4           4  0.263490  0.442874   [10, 9, 2, 10]
 5           5  0.242746  0.660936        [2, 7, 0]
 6           6  0.067894  0.692753     [2, 3, 8, 5]
 7           7  0.481788  0.106727  [0, 5, 7, 3, 8]
 8           8  0.757051  0.517097  [1, 9, 4, 3, 5]
 9           9  0.105228  0.101313  [6, 0, 8, 6, 8]]

In [18]:
df.to_latex()

'\\begin{tabular}{lrrl}\n\\toprule\n{} &      col1 &      col2 &             col3 \\\\\n\\midrule\n0 &  0.579586 &  0.403048 &    [0, 1, 10, 8] \\\\\n1 &  0.670832 &  0.535344 &        [5, 1, 4] \\\\\n2 &  0.941318 &  0.321249 &  [9, 4, 2, 6, 8] \\\\\n3 &  0.527311 &  0.777454 &     [3, 4, 9, 0] \\\\\n4 &  0.263490 &  0.442874 &   [10, 9, 2, 10] \\\\\n5 &  0.242746 &  0.660936 &        [2, 7, 0] \\\\\n6 &  0.067894 &  0.692753 &     [2, 3, 8, 5] \\\\\n7 &  0.481788 &  0.106727 &  [0, 5, 7, 3, 8] \\\\\n8 &  0.757051 &  0.517097 &  [1, 9, 4, 3, 5] \\\\\n9 &  0.105228 &  0.101313 &  [6, 0, 8, 6, 8] \\\\\n\\bottomrule\n\\end{tabular}\n'

You can use it with print to get a nicely formatted output.

In [19]:
print(df.to_latex())

\begin{tabular}{lrrl}
\toprule
{} &      col1 &      col2 &             col3 \\
\midrule
0 &  0.579586 &  0.403048 &    [0, 1, 10, 8] \\
1 &  0.670832 &  0.535344 &        [5, 1, 4] \\
2 &  0.941318 &  0.321249 &  [9, 4, 2, 6, 8] \\
3 &  0.527311 &  0.777454 &     [3, 4, 9, 0] \\
4 &  0.263490 &  0.442874 &   [10, 9, 2, 10] \\
5 &  0.242746 &  0.660936 &        [2, 7, 0] \\
6 &  0.067894 &  0.692753 &     [2, 3, 8, 5] \\
7 &  0.481788 &  0.106727 &  [0, 5, 7, 3, 8] \\
8 &  0.757051 &  0.517097 &  [1, 9, 4, 3, 5] \\
9 &  0.105228 &  0.101313 &  [6, 0, 8, 6, 8] \\
\bottomrule
\end{tabular}



In [20]:
print(df.to_markdown())

|    |      col1 |     col2 | col3            |
|---:|----------:|---------:|:----------------|
|  0 | 0.579586  | 0.403048 | [0, 1, 10, 8]   |
|  1 | 0.670832  | 0.535344 | [5, 1, 4]       |
|  2 | 0.941318  | 0.321249 | [9, 4, 2, 6, 8] |
|  3 | 0.527311  | 0.777454 | [3, 4, 9, 0]    |
|  4 | 0.26349   | 0.442874 | [10, 9, 2, 10]  |
|  5 | 0.242746  | 0.660936 | [2, 7, 0]       |
|  6 | 0.0678942 | 0.692753 | [2, 3, 8, 5]    |
|  7 | 0.481788  | 0.106727 | [0, 5, 7, 3, 8] |
|  8 | 0.757051  | 0.517097 | [1, 9, 4, 3, 5] |
|  9 | 0.105228  | 0.101313 | [6, 0, 8, 6, 8] |


In [21]:
df.to_excel('analysis.xlsx', index=False)

In [22]:
pd.read_excel('analysis.xlsx')

Unnamed: 0,col1,col2,col3
0,0.579586,0.403048,"[0, 1, 10, 8]"
1,0.670832,0.535344,"[5, 1, 4]"
2,0.941318,0.321249,"[9, 4, 2, 6, 8]"
3,0.527311,0.777454,"[3, 4, 9, 0]"
4,0.26349,0.442874,"[10, 9, 2, 10]"
5,0.242746,0.660936,"[2, 7, 0]"
6,0.067894,0.692753,"[2, 3, 8, 5]"
7,0.481788,0.106727,"[0, 5, 7, 3, 8]"
8,0.757051,0.517097,"[1, 9, 4, 3, 5]"
9,0.105228,0.101313,"[6, 0, 8, 6, 8]"


In [24]:
print(df.to_string())

       col1      col2             col3
0  0.579586  0.403048    [0, 1, 10, 8]
1  0.670832  0.535344        [5, 1, 4]
2  0.941318  0.321249  [9, 4, 2, 6, 8]
3  0.527311  0.777454     [3, 4, 9, 0]
4  0.263490  0.442874   [10, 9, 2, 10]
5  0.242746  0.660936        [2, 7, 0]
6  0.067894  0.692753     [2, 3, 8, 5]
7  0.481788  0.106727  [0, 5, 7, 3, 8]
8  0.757051  0.517097  [1, 9, 4, 3, 5]
9  0.105228  0.101313  [6, 0, 8, 6, 8]
