# Table Layout with `MTable`

`MTable` is the base class for creating publication-quality tables from pandas (Multiindex) DataFrames. `MTable` provides flexible table formatting for any structured data.


In [16]:
# Import necessary libraries
import sys
import numpy as np
import pandas as pd

# Force reload of maketables module
if 'maketables' in sys.modules:
    del sys.modules['maketables']
    if 'maketables.mtable' in sys.modules:
        del sys.modules['maketables.mtable']

import maketables as mt


## Basic Usage

The simplest way to create a table is to pass a pandas DataFrame to `MTable()`:

In [2]:
df = pd.DataFrame(np.random.randn(4, 4).round(2), columns=["A", "B", "C", "D"])

# Create table
mt.MTable(df)

Unnamed: 0,A,B,C,D
0.0,-0.49,-0.03,-0.16,1.44
1.0,-0.51,1.33,0.79,1.35
2.0,-0.47,-0.01,1.05,0.82
3.0,1.29,-1.2,0.46,1.11
,,,,




When the respective dataframe has a mutiindex for the columns, columns spanners are generated from the index. The row index can also be a multiindex (of at most two levels). In this case the first index level is used to generate group rows (for instance using the index name as headline and separating the groups by a horizontal line) and the second index level is used to generate the row labels.

In [3]:
# Create a multiindex dataframe with random data
row_index = pd.MultiIndex.from_tuples(
    [
        ("Group 1", "Variable 1"),
        ("Group 1", "Variable 2"),
        ("Group 1", "Variable 3"),
        ("Group 2", "Variable 4"),
        ("Group 2", "Variable 5"),
        ("Group 3", "Variable 6"),
    ]
)

col_index = pd.MultiIndex.from_product([["A", "B"], ["X", "Y"], ["High", "Low"]])
df = pd.DataFrame(np.random.randn(6, 8).round(3), index=row_index, columns=col_index)

t=mt.MTable(df=df, caption="This is a caption", notes="These are notes")

In [17]:
# Create a multiindex dataframe with random data
row_index = pd.MultiIndex.from_tuples(
    [
        ("Group 1", "Variable 1"),
        ("Group 1", "V2"),
        ("Group 1", "V3"),
        ("Group 2", "V4"),
        ("Group 2", "V5"),
        ("Group 3", "V6"),
    ]
)

col_index = pd.MultiIndex.from_product([["A", "B"], ["X", "Y"], ["High", "Mid", "Low"]])
df = pd.DataFrame(np.random.randn(6, 12).round(3), index=row_index, columns=col_index)

t=mt.MTable(df=df, caption="This is a caption", notes="These are notes", rgroup_sep="b")
t.save(type="typst", file_name="../output/table.typ", replace=True)

This is a caption,This is a caption,This is a caption,This is a caption,This is a caption,This is a caption,This is a caption,This is a caption,This is a caption,This is a caption,This is a caption,This is a caption,This is a caption
Unnamed: 0_level_1,A,A,A,A,A,A,B,B,B,B,B,B
Unnamed: 0_level_2,X,X,X,Y,Y,Y,X,X,X,Y,Y,Y
Unnamed: 0_level_3,High,Mid,Low,High,Mid,Low,High,Mid,Low,High,Mid,Low
Group 1,Group 1,Group 1,Group 1,Group 1,Group 1,Group 1,Group 1,Group 1,Group 1,Group 1,Group 1,Group 1
Variable 1,1.509,0.606,0.509,-0.43,-1.484,-0.208,1.207,-1.527,-1.055,0.126,-0.985,-0.793
V2,-0.424,-1.366,-0.974,0.703,-1.307,-1.598,-0.63,0.732,-0.197,-0.465,0.044,-1.746
V3,0.495,0.383,-0.409,1.006,-1.127,1.128,0.154,-0.337,-0.02,-0.107,1.22,-1.077
Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2
V4,-0.763,1.036,-0.596,0.901,1.699,-0.402,0.681,-1.154,0.755,0.199,0.473,-0.646
V5,-1.661,0.503,-2.063,-0.317,0.941,0.192,1.948,1.025,-0.654,0.027,-0.047,-1.582
Group 3,Group 3,Group 3,Group 3,Group 3,Group 3,Group 3,Group 3,Group 3,Group 3,Group 3,Group 3,Group 3
V6,-0.94,-0.561,-2.036,0.047,-0.249,-1.673,0.404,0.716,0.227,-0.324,0.591,0.68
These are notes,These are notes,These are notes,These are notes,These are notes,These are notes,These are notes,These are notes,These are notes,These are notes,These are notes,These are notes,These are notes


In [19]:
# Also save the same table to LaTeX format to compare column widths
t.save(type="tex", file_name="../output/table_latex_default.tex", replace=True)

# And with explicit tab_width for tabularx
t.save(type="tex", file_name="../output/table_latex_tabularx.tex", replace=True, tex_style={"tab_width": r"\linewidth"})


This is a caption,This is a caption,This is a caption,This is a caption,This is a caption,This is a caption,This is a caption,This is a caption,This is a caption,This is a caption,This is a caption,This is a caption,This is a caption
Unnamed: 0_level_1,A,A,A,A,A,A,B,B,B,B,B,B
Unnamed: 0_level_2,X,X,X,Y,Y,Y,X,X,X,Y,Y,Y
Unnamed: 0_level_3,High,Mid,Low,High,Mid,Low,High,Mid,Low,High,Mid,Low
Group 1,Group 1,Group 1,Group 1,Group 1,Group 1,Group 1,Group 1,Group 1,Group 1,Group 1,Group 1,Group 1
Variable 1,1.509,0.606,0.509,-0.43,-1.484,-0.208,1.207,-1.527,-1.055,0.126,-0.985,-0.793
V2,-0.424,-1.366,-0.974,0.703,-1.307,-1.598,-0.63,0.732,-0.197,-0.465,0.044,-1.746
V3,0.495,0.383,-0.409,1.006,-1.127,1.128,0.154,-0.337,-0.02,-0.107,1.22,-1.077
Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2,Group 2
V4,-0.763,1.036,-0.596,0.901,1.699,-0.402,0.681,-1.154,0.755,0.199,0.473,-0.646
V5,-1.661,0.503,-2.063,-0.317,0.941,0.192,1.948,1.025,-0.654,0.027,-0.047,-1.582
Group 3,Group 3,Group 3,Group 3,Group 3,Group 3,Group 3,Group 3,Group 3,Group 3,Group 3,Group 3,Group 3
V6,-0.94,-0.561,-2.036,0.047,-0.249,-1.673,0.404,0.716,0.227,-0.324,0.591,0.68
These are notes,These are notes,These are notes,These are notes,These are notes,These are notes,These are notes,These are notes,These are notes,These are notes,These are notes,These are notes,These are notes


In [18]:
# Debug: check what colspec is being generated
import inspect
tex_output = t._output_tex()
print("First 50 lines of LaTeX output:")
print("\n".join(tex_output.split("\n")[:15]))


First 50 lines of LaTeX output:
\renewcommand\cellalign{t}
\begin{table}[htbp]
\centering
\caption{This is a caption}
\smallskip
\begin{threeparttable}
\begingroup
\renewcommand\arraystretch{1}
\setlength{\tabcolsep}{3pt}
\begin{tabularx}{\linewidth}{@{}>{\raggedright\arraybackslash}l>{\centering\arraybackslash}X>{\centering\arraybackslash}X>{\centering\arraybackslash}X>{\centering\arraybackslash}X>{\centering\arraybackslash}X>{\centering\arraybackslash}X>{\centering\arraybackslash}X>{\centering\arraybackslash}X>{\centering\arraybackslash}X>{\centering\arraybackslash}X>{\centering\arraybackslash}X>{\centering\arraybackslash}X}
\toprule
 & \multicolumn{6}{c}{A} & \multicolumn{6}{c}{B} \\
\cmidrule(lr){2-7} \cmidrule(lr){8-13}
 & \multicolumn{3}{c}{X} & \multicolumn{3}{c}{Y} & \multicolumn{3}{c}{X} & \multicolumn{3}{c}{Y} \\
\cmidrule(lr){2-4} \cmidrule(lr){5-7} \cmidrule(lr){8-10} \cmidrule(lr){11-13}


In [5]:
# Example: Correlation Table with Simulated Data and Significance Stars

# Simulate data for 8 variables
np.random.seed(42)
n = 500

data = {
    'income': np.random.normal(50000, 15000, n),
    'age': np.random.normal(40, 12, n),
    'education': np.random.normal(14, 3, n),
    'experience': np.random.normal(15, 8, n),
    'hours_worked': np.random.normal(40, 8, n),
    'job_satisfaction': np.random.normal(7, 2, n),
    'productivity': np.random.normal(75, 15, n),
    'tenure': np.random.normal(8, 5, n)
}

sim_df = pd.DataFrame(data)

# Calculate correlation matrix and p-values
from scipy.stats import pearsonr

variables = sim_df.columns
corr_matrix = pd.DataFrame(index=variables, columns=variables)
p_values = pd.DataFrame(index=variables, columns=variables)

for i, var1 in enumerate(variables):
    for j, var2 in enumerate(variables):
        if i <= j:  # Only calculate for upper triangle and diagonal
            if i == j:
                corr_matrix.loc[var1, var2] = 1.0
                p_values.loc[var1, var2] = 0.0
            else:
                corr, pval = pearsonr(sim_df[var1], sim_df[var2])
                corr_matrix.loc[var1, var2] = corr
                p_values.loc[var1, var2] = pval

# Format correlations with significance stars
def add_stars(corr_val, p_val):
    if pd.isna(corr_val):
        return ''
    corr_str = f'{float(corr_val):.2f}'
    if corr_val == 1.0:
        return corr_str
    if float(p_val) < 0.01:
        return corr_str + '***'
    elif float(p_val) < 0.05:
        return corr_str + '**'
    elif float(p_val) < 0.10:
        return corr_str + '*'
    else:
        return corr_str

# Apply formatting
corr_display = pd.DataFrame(index=corr_matrix.index, columns=corr_matrix.columns)
for i in corr_matrix.index:
    for j in corr_matrix.columns:
        corr_display.loc[i, j] = add_stars(corr_matrix.loc[i, j], p_values.loc[i, j])

# Define variable labels
var_labels = {
    'income': 'Income',
    'age': 'Age',
    'education': 'Education',
    'experience': 'Experience',
    'hours_worked': 'Hours Worked',
    'job_satisfaction': 'Job Satisfaction',
    'productivity': 'Productivity',
    'tenure': 'Tenure'
}

# Relabel the correlation matrix
corr_display_labeled = corr_display.rename(index=var_labels, columns=var_labels)

# Create table with custom column widths
ct= mt.MTable(
    corr_display_labeled,
    caption="Correlation Matrix of Labor Market Variables",
    notes="Pearson correlation coefficients. * p<0.10, ** p<0.05, *** p<0.01",
    tab_label="tab:correlation",
    gt_style={"first_col_width": "150px", "table_width": "100%"},
    tex_style={"first_col_width": "3cm"},
    docx_style={"first_col_width": "2in"}
)

## Exporting Tables

### Save to LaTeX

Export your table to a LaTeX .tex file:

In [6]:
# Save correlation table to LaTeX
corr_table.save(type='tex', file_name='../output/correlation_table.tex')

NameError: name 'corr_table' is not defined

### Save to Word (docx)

Export to Microsoft Word format:

In [None]:
# Save to Word document
corr_table.save(type='docx', file_name='../output/correlation_table.docx')

### Save to HTML

Export to HTML format:

In [None]:
# Save to HTML
corr_table.save(type='html', file_name='../output/correlation_table.html')