In [10]:
pip install polars

In addition, using fork() with Python in general is a recipe for mysterious
deadlocks and crashes.

The most likely reason you are seeing this error is because you are using the
multiprocessing module on Linux, which uses fork() by default. This will be
fixed in Python 3.14. Until then, you want to use the "spawn" context instead.

See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.

or by setting POLARS_ALLOW_FORKING_THREAD=1.

  pid, fd = os.forkpty()


Note: you may need to restart the kernel to use updated packages.


In [11]:
pip install great_tables

Note: you may need to restart the kernel to use updated packages.


In [1]:
from great_tables import GT, html
# This is a table present in great_tables
from great_tables.data import airquality

airquality_m = airquality.head(10).assign(Year=1973)
gt_airquality = (
    GT(airquality_m)
    .tab_header(
        title="New York Air Quality Measurements",
        subtitle="Daily measurements in New York City (May 1-10, 1973)",
    )
    .tab_spanner(label="Time", columns=["Year", "Month", "Day"])
    .tab_spanner(label="Measurement", columns=["Ozone", "Solar_R", "Wind", "Temp"])
    .cols_move_to_start(columns=["Year", "Month", "Day"])
    .cols_label(
        Ozone=html("Ozone,<br>ppbV"),
        Solar_R=html("Solar R.,<br>cal/m<sup>2</sup>"),
        Wind=html("Wind,<br>mph"),
        Temp=html("Temp,<br>&deg;F"),
    )
)

gt_airquality

New York Air Quality Measurements,New York Air Quality Measurements,New York Air Quality Measurements,New York Air Quality Measurements,New York Air Quality Measurements,New York Air Quality Measurements,New York Air Quality Measurements
"Daily measurements in New York City (May 1-10, 1973)","Daily measurements in New York City (May 1-10, 1973)","Daily measurements in New York City (May 1-10, 1973)","Daily measurements in New York City (May 1-10, 1973)","Daily measurements in New York City (May 1-10, 1973)","Daily measurements in New York City (May 1-10, 1973)","Daily measurements in New York City (May 1-10, 1973)"
Time,Time,Time,Measurement,Measurement,Measurement,Measurement
Year,Month,Day,"Ozone, ppbV","Solar R., cal/m2","Wind, mph","Temp, °F"
1973,5,1,41.0,190.0,7.4,67
1973,5,2,36.0,118.0,8.0,72
1973,5,3,12.0,149.0,12.6,74
1973,5,4,18.0,313.0,11.5,62
1973,5,5,,,14.3,56
1973,5,6,28.0,,14.9,66
1973,5,7,23.0,299.0,8.6,65
1973,5,8,19.0,99.0,13.8,59
1973,5,9,8.0,19.0,20.1,61
1973,5,10,,194.0,8.6,69


In [2]:
#formatting values

from great_tables.data import exibble
from great_tables import vals

gt_ex = GT(exibble[["num", "date", "time", "currency"]].head(5))
gt_ex

num,date,time,currency
0.1111,2015-01-15,13:35,49.95
2.222,2015-02-15,14:40,17.95
33.33,2015-03-15,15:45,1.39
444.4,2015-04-15,16:50,65100.0
5550.0,2015-05-15,17:55,1325.81


The values within the table body, specifically those within the body cells, can be formatted with a large selection of fmt_*() methods like fmt_number(), fmt_integer(), fmt_scientific(), and more.

fmt_number() method to obtain formatted values have a fixed level of decimal precision and grouping separators.

In [3]:
# formatting num to 2 place decimal number
gt_ex = gt_ex.fmt_number(columns="num", decimals=2)

# formatting time to time style
gt_ex = (
    gt_ex.fmt_time(columns="time", time_style="h_m_p")
)

# formatting date to day, date, month, year style 
gt_ex = gt_ex.fmt_date(columns="date", date_style="wday_day_month_year")

# formatting currency to type of currency
gt_ex = gt_ex.fmt_currency(columns="currency", currency = "USD")

gt_ex

num,date,time,currency
0.11,Thursday 15 January 2015,1:35 PM,$49.95
2.22,Sunday 15 February 2015,2:40 PM,$17.95
33.33,Sunday 15 March 2015,3:45 PM,$1.39
444.4,Wednesday 15 April 2015,4:50 PM,"$65,100.00"
5550.0,Friday 15 May 2015,5:55 PM,"$1,325.81"


Nano Plots

In [5]:
import polars as pl
random_numbers_df = pl.DataFrame(
    {
        "example": ["Row " + str(x) for x in range(1, 5)],
        "numbers": [
            "20 23 6 7 37 23 21 4 7 16",
            "2.3 6.8 9.2 2.42 3.5 12.1 5.3 3.6 7.2 3.74",
            "-12 -5 6 3.7 0 8 -7.4",
            "2 0 15 7 8 10 1 24 17 13 6",
        ],
    }
)
print(random_numbers_df)

GT(random_numbers_df).fmt_nanoplot(columns="numbers")

shape: (4, 2)
┌─────────┬─────────────────────────────────┐
│ example ┆ numbers                         │
│ ---     ┆ ---                             │
│ str     ┆ str                             │
╞═════════╪═════════════════════════════════╡
│ Row 1   ┆ 20 23 6 7 37 23 21 4 7 16       │
│ Row 2   ┆ 2.3 6.8 9.2 2.42 3.5 12.1 5.3 … │
│ Row 3   ┆ -12 -5 6 3.7 0 8 -7.4           │
│ Row 4   ┆ 2 0 15 7 8 10 1 24 17 13 6      │
└─────────┴─────────────────────────────────┘


example,numbers
Row 1,3742023673723214716
Row 2,12.12.302.306.809.202.423.5012.15.303.607.203.74
Row 3,8.00−12.0−12.0−5.006.003.7008.00−7.40
Row 4,2402015781012417136


In [6]:
GT(random_numbers_df).fmt_nanoplot(columns="numbers", reference_line="mean")

example,numbers
Row 1,16.43742023673723214716
Row 2,5.6212.12.302.306.809.202.423.5012.15.303.607.203.74
Row 3,−0.968.00−12.0−12.0−5.006.003.7008.00−7.40
Row 4,9.362402015781012417136


In [7]:
GT(random_numbers_df).fmt_nanoplot(columns="numbers", reference_area=["min", "median"])

example,numbers
Row 1,3742023673723214716
Row 2,12.12.302.306.809.202.423.5012.15.303.607.203.74
Row 3,8.00−12.0−12.0−5.006.003.7008.00−7.40
Row 4,2402015781012417136


In [8]:
GT(random_numbers_df).fmt_nanoplot(columns="numbers", autoscale=True)

example,numbers
Row 1,37-122023673723214716
Row 2,37.0−12.02.306.809.202.423.5012.15.303.607.203.74
Row 3,37.0−12.0−12.0−5.006.003.7008.00−7.40
Row 4,37-122015781012417136


In [9]:
from great_tables import nanoplot_options

(
    GT(random_numbers_df)
    .fmt_nanoplot(
        columns="numbers",
        options=nanoplot_options(
            data_point_radius=8,
            data_point_stroke_color="black",
            data_point_stroke_width=2,
            data_point_fill_color="white",
            data_line_type="straight",
            data_line_stroke_color="brown",
            data_line_stroke_width=2,
            data_area_fill_color="orange",
            vertical_guide_stroke_color="green",
        ),
    )
)

example,numbers
Row 1,3742023673723214716
Row 2,12.12.302.306.809.202.423.5012.15.303.607.203.74
Row 3,8.00−12.0−12.0−5.006.003.7008.00−7.40
Row 4,2402015781012417136


In [10]:
GT(random_numbers_df).fmt_nanoplot(columns="numbers", plot_type="bar")

example,numbers
Row 1,3702023673723214716
Row 2,12.102.306.809.202.423.5012.15.303.607.203.74
Row 3,8.00−12.0−12.0−5.006.003.7008.00−7.40
Row 4,2402015781012417136


In [11]:
(
    GT(random_numbers_df)
    .fmt_nanoplot(
        columns="numbers",
        plot_type="bar",
        autoscale=True,
        reference_line="min",
        reference_area=[0, "max"],
        options=nanoplot_options(
            data_bar_stroke_color="gray",
            data_bar_stroke_width=2,
            data_bar_fill_color="orange",
            data_bar_negative_stroke_color="blue",
            data_bar_negative_stroke_width=1,
            data_bar_negative_fill_color="lightblue",
            reference_line_color="pink",
            reference_area_fill_color="bisque",
            vertical_guide_stroke_color="blue",
        ),
    )
)

example,numbers
Row 1,4.0037-122023673723214716
Row 2,2.3037.0−12.02.306.809.202.423.5012.15.303.607.203.74
Row 3,−12.037.0−12.0−12.0−5.006.003.7008.00−7.40
Row 4,037-122015781012417136


In [12]:
weather_2 = pl.DataFrame(
    {
        "station": ["Station " + str(x) for x in range(1, 4)],
        "temperatures": [
            {
                "x": [6.1, 8.0, 10.1, 10.5, 11.2, 12.4, 13.1, 15.3],
                "y": [24.2, 28.2, 30.2, 30.5, 30.5, 33.1, 33.5, 32.7],
            },
            {
                "x": [7.1, 8.2, 10.3, 10.75, 11.25, 12.5, 13.5, 14.2],
                "y": [18.2, 18.1, 20.3, 20.5, 21.4, 21.9, 23.1, 23.3],
            },
            {
                "x": [6.3, 7.1, 10.3, 11.0, 12.07, 13.1, 15.12, 16.42],
                "y": [15.2, 17.77, 21.42, 21.63, 25.23, 26.84, 27.2, 27.44],
            },
        ]
    }
)

(
    GT(weather_2)
    .fmt_nanoplot(
        columns="temperatures",
        plot_type="line",
        expand_x=[5, 16],
        expand_y=[10, 40],
        options=nanoplot_options(
            show_data_area=False,
            show_data_line=False
        )
    )
)

station,temperatures
Station 1,401024.228.230.230.530.533.133.532.7
Station 2,401018.218.120.320.521.421.923.123.3
Station 3,401015.217.821.421.625.226.827.227.4


Colorizing the data

In [13]:
from great_tables import GT
import polars as pl

simple_df = pl.DataFrame(
    {
        "integer": [1, 2, 3, 4, 5],
        "float": [2.3, 1.3, 5.1, None, 4.4],
        "category": ["one", "two", "three", "one", "three"],
    }
)

GT(simple_df).data_color()

integer,float,category
1,2.3,one
2,1.3,two
3,5.1,three
4,,one
5,4.4,three


In [18]:
GT(simple_df).data_color(palette=["blue", "black"], na_color="white")

integer,float,category
1,2.3,one
2,1.3,two
3,5.1,three
4,,one
5,4.4,three


In [21]:
from great_tables import html
from great_tables.data import sza
import polars.selectors as cs

sza_pivot = (
    pl.from_pandas(sza)
    .filter((pl.col("latitude") == "20") & (pl.col("tst") <= "1200"))
    .select(pl.col("*").exclude("latitude"))
    .drop_nulls()
    .pivot(values="sza", index="month", on="tst", sort_columns=True)
)

(
    GT(sza_pivot, rowname_col="month")
    .data_color(
        domain=[90, 0],
        palette=["rebeccapurple", "yellow", "orange"],
        na_color="white",
    )
    .tab_header(
        title="Solar Zenith Angles from 05:30 to 12:00",
        subtitle=html("Average monthly values at latitude of 20&deg;N."),
    )
)

Solar Zenith Angles from 05:30 to 12:00,Solar Zenith Angles from 05:30 to 12:00,Solar Zenith Angles from 05:30 to 12:00,Solar Zenith Angles from 05:30 to 12:00,Solar Zenith Angles from 05:30 to 12:00,Solar Zenith Angles from 05:30 to 12:00,Solar Zenith Angles from 05:30 to 12:00,Solar Zenith Angles from 05:30 to 12:00,Solar Zenith Angles from 05:30 to 12:00,Solar Zenith Angles from 05:30 to 12:00,Solar Zenith Angles from 05:30 to 12:00,Solar Zenith Angles from 05:30 to 12:00,Solar Zenith Angles from 05:30 to 12:00,Solar Zenith Angles from 05:30 to 12:00,Solar Zenith Angles from 05:30 to 12:00
Average monthly values at latitude of 20°N.,Average monthly values at latitude of 20°N.,Average monthly values at latitude of 20°N.,Average monthly values at latitude of 20°N.,Average monthly values at latitude of 20°N.,Average monthly values at latitude of 20°N.,Average monthly values at latitude of 20°N.,Average monthly values at latitude of 20°N.,Average monthly values at latitude of 20°N.,Average monthly values at latitude of 20°N.,Average monthly values at latitude of 20°N.,Average monthly values at latitude of 20°N.,Average monthly values at latitude of 20°N.,Average monthly values at latitude of 20°N.,Average monthly values at latitude of 20°N.
Unnamed: 0_level_2,0530,0600,0630,0700,0730,0800,0830,0900,0930,1000,1030,1100,1130,1200
jan,,,,84.9,78.7,72.7,66.1,61.5,56.5,52.1,48.3,45.5,43.6,43.0
feb,,,88.9,82.5,75.8,69.6,63.3,57.7,52.2,47.4,43.1,40.0,37.8,37.2
mar,,,85.7,78.8,72.0,65.2,58.6,52.3,46.2,40.5,35.5,31.4,28.6,27.7
apr,,88.5,81.5,74.4,67.4,60.3,53.4,46.5,39.7,33.2,26.9,21.3,17.2,15.5
may,,85.0,78.2,71.2,64.3,57.2,50.2,43.2,36.1,29.1,26.1,15.2,8.8,5.0
jun,89.2,82.7,76.0,69.3,62.5,55.7,48.8,41.9,35.0,28.1,21.1,14.2,7.3,2.0
jul,88.8,82.3,75.7,69.1,62.3,55.5,48.7,41.8,35.0,28.1,21.2,14.3,7.7,3.1
aug,,83.8,77.1,70.2,63.3,56.4,49.4,42.4,35.4,28.3,21.3,14.3,7.3,1.9
sep,,87.2,80.2,73.2,66.1,59.1,52.1,45.1,38.1,31.3,24.7,18.6,13.7,11.6
oct,,,84.1,77.1,70.2,63.3,56.5,49.9,43.5,37.5,32.0,27.4,24.3,23.1


In [26]:
from great_tables import GT, exibble

gt_ex = (
    GT(exibble.head(5), rowname_col="row", groupname_col="group")
    .tab_header("THE HEADING", "(a subtitle)")
    .tab_stubhead("THE STUBHEAD")
    .tab_source_note("THE SOURCE NOTE")
)

gt_ex

THE HEADING,THE HEADING,THE HEADING,THE HEADING,THE HEADING,THE HEADING,THE HEADING,THE HEADING
(a subtitle),(a subtitle),(a subtitle),(a subtitle),(a subtitle),(a subtitle),(a subtitle),(a subtitle)
THE STUBHEAD,num,char,fctr,date,time,datetime,currency
grp_a,grp_a,grp_a,grp_a,grp_a,grp_a,grp_a,grp_a
row_1,0.1111,apricot,one,2015-01-15,13:35,2018-01-01 02:22,49.95
row_2,2.222,banana,two,2015-02-15,14:40,2018-02-02 14:33,17.95
row_3,33.33,coconut,three,2015-03-15,15:45,2018-03-03 03:44,1.39
row_4,444.4,durian,four,2015-04-15,16:50,2018-04-04 15:55,65100.0
grp_b,grp_b,grp_b,grp_b,grp_b,grp_b,grp_b,grp_b
row_5,5550.0,,five,2015-05-15,17:55,2018-05-05 04:00,1325.81
THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE


In [27]:
(
    gt_ex
    .tab_options(
        container_width = "100%",
        table_background_color="lightblue",
        heading_background_color = "gold",
        column_labels_background_color="aquamarine",
        row_group_background_color="lightyellow",
        stub_background_color="lightgreen",
        source_notes_background_color="#f1e2af",
    )
)

THE HEADING,THE HEADING,THE HEADING,THE HEADING,THE HEADING,THE HEADING,THE HEADING,THE HEADING
(a subtitle),(a subtitle),(a subtitle),(a subtitle),(a subtitle),(a subtitle),(a subtitle),(a subtitle)
THE STUBHEAD,num,char,fctr,date,time,datetime,currency
grp_a,grp_a,grp_a,grp_a,grp_a,grp_a,grp_a,grp_a
row_1,0.1111,apricot,one,2015-01-15,13:35,2018-01-01 02:22,49.95
row_2,2.222,banana,two,2015-02-15,14:40,2018-02-02 14:33,17.95
row_3,33.33,coconut,three,2015-03-15,15:45,2018-03-03 03:44,1.39
row_4,444.4,durian,four,2015-04-15,16:50,2018-04-04 15:55,65100.0
grp_b,grp_b,grp_b,grp_b,grp_b,grp_b,grp_b,grp_b
row_5,5550.0,,five,2015-05-15,17:55,2018-05-05 04:00,1325.81
THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE


Premade themes

In [28]:
gt_ex.opt_stylize(style=1, color="blue")

THE HEADING,THE HEADING,THE HEADING,THE HEADING,THE HEADING,THE HEADING,THE HEADING,THE HEADING
(a subtitle),(a subtitle),(a subtitle),(a subtitle),(a subtitle),(a subtitle),(a subtitle),(a subtitle)
THE STUBHEAD,num,char,fctr,date,time,datetime,currency
grp_a,grp_a,grp_a,grp_a,grp_a,grp_a,grp_a,grp_a
row_1,0.1111,apricot,one,2015-01-15,13:35,2018-01-01 02:22,49.95
row_2,2.222,banana,two,2015-02-15,14:40,2018-02-02 14:33,17.95
row_3,33.33,coconut,three,2015-03-15,15:45,2018-03-03 03:44,1.39
row_4,444.4,durian,four,2015-04-15,16:50,2018-04-04 15:55,65100.0
grp_b,grp_b,grp_b,grp_b,grp_b,grp_b,grp_b,grp_b
row_5,5550.0,,five,2015-05-15,17:55,2018-05-05 04:00,1325.81
THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE,THE SOURCE NOTE
