# Formatting Tables in Pandas


In [58]:
# Import pandas
import pandas as pd
# Import numpy
import numpy as np



## The data
Let's create some simulated data for two widgets, A and B. We'll create a dataframe for each widget, then concatenate them together. We'll also sort the dataframe by month and reset the index.

In [59]:
# simulated data for widget A
df_a = pd.DataFrame(
    {
        'Month':pd.date_range(
            start = '01-01-2012',
            end = '31-12-2022',
            freq = 'MS'
        ),
        'Quotes':np.random.randint(
            low = 1_000_000,
            high = 2_500_000,
            size = 132
        ),
        'Numbers':np.random.randint(
            low = 300_000,
            high = 500_000,
            size = 132
        ),
        'Amounts':np.random.randint(
            low = 750_000,
            high = 1_250_000,
            size = 132
        )
    }
)

df_a['Product'] = 'A'

# simulated data for widget B
df_b = pd.DataFrame(
    {
        'Month':pd.date_range(
            start = '01-01-2012',
            end = '31-12-2022',
            freq = 'MS'
        ),
        'Quotes':np.random.randint(
            low = 100_000,
            high = 800_000,
            size = 132
        ),
        'Numbers':np.random.randint(
            low = 10_000,
            high = 95_000,
            size = 132
        ),
        'Amounts':np.random.randint(
            low = 450_000,
            high = 750_000,
            size = 132
        )
    }
)

df_b['Product'] = 'B'

# put it together & sort
df = pd.concat([df_a,df_b],axis = 0)
df.sort_values(by = 'Month',inplace = True)
df.reset_index(drop = True,inplace = True)

Let’s calculate a few “interesting” statistics — average sale amounts and product conversion:

In [60]:
# average sale
df['Average sale'] = df['Amounts'] / df['Numbers']

# conversion
df['Product conversion'] = df['Numbers'] / df['Quotes']

In [61]:
df.head(3)

Unnamed: 0,Month,Quotes,Numbers,Amounts,Product,Average sale,Product conversion
0,2012-01-01,2146725,407150,772616,A,1.89762,0.189661
1,2012-01-01,337194,34423,684239,B,19.877378,0.102087
2,2012-02-01,2480546,431656,1081002,A,2.504314,0.174017


## Date Formatting
There’s arguably nothing __wrong__ with the formatting, but it could be better. For instance, since all the monthly data is reflected as at the first of each month, there’s probably little sense in keeping the day element of each Month entry as it tells the reader very little.

In [None]:
# format the date as YYYY-MM
# df.style.format({'Month':'{:%Y-%m}'})
disp = df.iloc[:3]
disp.style.format({'Month':'{:%Y-%m}'})
display(disp)
breakpoint()
# display(df.head(3))

Unnamed: 0,Month,Quotes,Numbers,Amounts,Product,Average sale,Product conversion
0,2012-01-01,2146725,407150,772616,A,1.89762,0.189661
1,2012-01-01,337194,34423,684239,B,19.877378,0.102087
2,2012-02-01,2480546,431656,1081002,A,2.504314,0.174017


In [63]:
# display(df.head(3))

Now, we can improve readability even further by using the name of each month rather than the month number, and we can do this __*without having to alter the underlying data*__.

In [64]:
# format the date as YYYY-MM-DD
df.style.format({'Month':'{:%B %Y}'})
display(df.iloc[:3])

Unnamed: 0,Month,Quotes,Numbers,Amounts,Product,Average sale,Product conversion
0,2012-01-01,2146725,407150,772616,A,1.89762,0.189661
1,2012-01-01,337194,34423,684239,B,19.877378,0.102087
2,2012-02-01,2480546,431656,1081002,A,2.504314,0.174017
