# Reference

In [1]:
from piper import piper
from piper.verbs import *
from piper.defaults import *
from piper.styler import *

piper version 0.0.7, last run: Friday, 19 February 2021 12:06:50


## helper functions

In [2]:
def clean_merge(df, headers, fillna=0):
    '''
    '''

    logger.info('Clearing null values...')
    df.fillna(fillna, inplace=True)

    logger.info('Setting integer value data types...')
    for x in headers:
        if df[x].dtype != 'object':
            df[x] = pd.to_numeric(df[x], downcast='integer')

    return df

# Merging (joining) two tables

In [3]:
# https://youtu.be/_5EhKDCm9VY

In [4]:
headers = ['account', 'type', 'balance']
headers2 = [x + '2' for x in headers]

f = 'inputs/data merge testing.xlsx'
sheet_name = 'Example #1'

df1 = pd.read_excel(f, sheet_name=sheet_name, usecols='A:C')
df2 = pd.read_excel(f, sheet_name=sheet_name, usecols='F:H', header=0, names=headers2)

## Inner join

In [5]:
merge_df = pd.merge(df1, df2, how='inner', left_on=['account'], right_on=['account2'])
head(merge_df).style.hide_index()

13 rows, 6 columns


account,type,balance,account2,type2,balance2
1,checking,87,1,checking,87
2,checking,36,2,savings,36
3,checking,82,3,checking,82
4,checking,76,4,checking,76


## Outer join

In [6]:
merge_df = (pd.merge(df1, df2, how='outer',
                    left_on=['account'], right_on=['account2'])
              .sort_values(by='account', ascending=False))
head(merge_df, 4)

14 rows, 6 columns


Unnamed: 0,account,type,balance,account2,type2,balance2
12,13,savings,20,13.0,checking,54.0
13,13,savings,20,13.0,savings,20.0
11,12,savings,31,12.0,savings,33.0
10,11,savings,16,11.0,savings,30.0


# Styling examples

### Highlight (including null) values

In [7]:
(head(merge_df, 5).style
                  .apply(highlight_values, criteria=['savings'],
                         subset=['type', 'type2'])
                  .apply(highlight_values, criteria=['checking'],
                         subset=['type', 'type2'], color='blue')
                  .apply(highlight_values, criteria=[20, 54],
                         subset=['balance', 'balance2'], color='purple')
                  .apply(highlight_values, criteria=[20, 54],
                         subset=['balance', 'balance2'], type='background', color='orange')
                  .hide_index()
                  .highlight_null(null_color='lightgreen')
)

14 rows, 6 columns


account,type,balance,account2,type2,balance2
13,savings,20,13.0,checking,54.0
13,savings,20,13.0,savings,20.0
12,savings,31,12.0,savings,33.0
11,savings,16,11.0,savings,30.0
10,checking,54,,,


### Highlight (True/False) values

In [8]:
merge_df['account2 is null'] = merge_df['account2'].isna()
merge_df = clean_merge(merge_df, headers+headers2)
(head(merge_df, 5).style
                  .apply(highlight_values, criteria=[False],
                         subset=['account2 is null', 'balance2', 'type2'],
                         color='red')
                  .apply(highlight_values, criteria=[False],
                         subset=['account2 is null', 'balance2', 'type2'],
                         type='background', color='yellow')
)

Clearing null values...
Setting integer value data types...
14 rows, 7 columns


Unnamed: 0,account,type,balance,account2,type2,balance2,account2 is null
12,13,savings,20,13,checking,54,False
13,13,savings,20,13,savings,20,False
11,12,savings,31,12,savings,33,False
10,11,savings,16,11,savings,30,False
9,10,checking,54,0,0,0,True


### Background gradient values

In [9]:
(merge_df.pipe(pd.DataFrame.sort_values, by='balance2', ascending=False)
                           .style                     
                           .background_gradient(subset='balance2', cmap='coolwarm')
                           .apply(highlight_values, criteria=[33, 76], subset=['balance'],
                                  color='yellow')
                           .apply(highlight_values, criteria=[33, 76], subset=['balance'],
                                  type='background', color='blue')
                           .apply(highlight_values, criteria=[4, 13], subset=['account'],
                                  color='red')
                           .apply(highlight_values, criteria=[4, 13], subset=['account'],
                                  type='background', color='lightpink')
                           .hide_index()
)

account,type,balance,account2,type2,balance2,account2 is null
1,checking,87,1,checking,87,False
3,checking,82,3,checking,82,False
8,checking,33,8,savings,76,False
4,checking,76,4,checking,76,False
13,savings,20,13,checking,54,False
6,checking,27,6,checking,43,False
2,checking,36,2,savings,36,False
12,savings,31,12,savings,33,False
5,savings,76,5,checking,33,False
11,savings,16,11,savings,30,False


### heatmap

In [10]:
xx = merge_df.style.background_gradient(cmap='viridis').hide_index()
display(xx)
xx.to_excel('outputs/test4.xlsx', index=False)

account,type,balance,account2,type2,balance2,account2 is null
13,savings,20,13,checking,54,False
13,savings,20,13,savings,20,False
12,savings,31,12,savings,33,False
11,savings,16,11,savings,30,False
10,checking,54,0,0,0,True
9,checking,18,9,checking,18,False
8,checking,33,8,savings,76,False
7,checking,42,7,checking,27,False
6,checking,27,6,checking,43,False
5,savings,76,5,checking,33,False


### Highlight max/min values

In [11]:
(df2.sort_values(by='balance2', ascending=True).style
    .apply(highlight_max, subset='type2', color='orange') 
    .apply(highlight_max, subset='type2', type='background', color='black') 
    .apply(highlight_min, subset='balance2', color='orange')  
    .apply(highlight_min, subset='balance2', type='background', color='grey') 
    .hide_index()
)

account2,type2,balance2
9,checking,18
13,savings,20
7,checking,27
11,savings,30
5,checking,33
12,savings,33
2,savings,36
6,checking,43
13,checking,54
4,checking,76


### Highlight max/min values - standalone example

In [12]:
import pandas as pd
import numpy as np

np.random.seed(24)
df = pd.DataFrame({'A': np.linspace(1, 10, 10)})
df2 = pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))

df = pd.concat([df, df2], axis=1)
df.iloc[0, 2] = np.nan

(df.style.apply(highlight_max, subset=['B', 'E'], color='red', axis=0)
         .apply(highlight_max, subset=['B', 'E'], type='background', color='yellow', axis=0))

Unnamed: 0,A,B,C,D,E
0,1.0,1.329212,,-0.31628,-0.99081
1,2.0,-1.070816,-1.438713,0.564417,0.295722
2,3.0,-1.626404,0.219565,0.678805,1.889273
3,4.0,0.961538,0.104011,-0.481165,0.850229
4,5.0,1.453425,1.057737,0.165562,0.515018
5,6.0,-1.336936,0.562861,1.392855,-0.063328
6,7.0,0.121668,1.207603,-0.00204,1.627796
7,8.0,0.354493,1.037528,-0.385684,0.519818
8,9.0,1.686583,-1.325963,1.428984,-2.089354
9,10.0,-0.12982,0.631523,-0.586538,0.29072


### Format options

In [13]:
(merge_df[:10]
 .style

 .format({"account": "{:.0f}", "balance": "{:.0f}",
          "account2": "{:.4f}", "balance2": "{:.0f}"})
 .hide_index()
 .set_caption("This is a caption that is modified").set_table_styles([{
    'selector': 'caption',
    'props': [('color', 'blue'), ('font-size', '18px')
    ]
}])
)#.to_excel('outputs/test2.xlsx', index=False, freeze_panes=(1, 0), verbose=True)

account,type,balance,account2,type2,balance2,account2 is null
13,savings,20,13.0,checking,54,False
13,savings,20,13.0,savings,20,False
12,savings,31,12.0,savings,33,False
11,savings,16,11.0,savings,30,False
10,checking,54,0.0,0,0,True
9,checking,18,9.0,checking,18,False
8,checking,33,8.0,savings,76,False
7,checking,42,7.0,checking,27,False
6,checking,27,6.0,checking,43,False
5,savings,76,5.0,checking,33,False


### data bars 

In [14]:
(merge_df
 .style
 .bar(subset=["account",], color='lightgreen')
 .bar(subset=["balance2",], color='lightblue')
 .format({"account": "{:.0f}", "balance": "{:.0f}",
          "account2": "{:.4f}", "balance2": "{:.0f}"})
 .hide_index()
 .highlight_null(null_color='blue')
 .highlight_max(color='green')
 .highlight_min(color='#cd4f39')
 .set_caption('this is a test')
)#.to_excel('outputs/test2.xlsx', index=False, freeze_panes=(1, 0), verbose=True)

account,type,balance,account2,type2,balance2,account2 is null
13,savings,20,13.0,checking,54,False
13,savings,20,13.0,savings,20,False
12,savings,31,12.0,savings,33,False
11,savings,16,11.0,savings,30,False
10,checking,54,0.0,0,0,True
9,checking,18,9.0,checking,18,False
8,checking,33,8.0,savings,76,False
7,checking,42,7.0,checking,27,False
6,checking,27,6.0,checking,43,False
5,savings,76,5.0,checking,33,False


### highlight rows (including null values)

In [15]:
import pandas as pd
import numpy as np

# np.random.seed(24)
df = pd.DataFrame({'A': np.linspace(1, 10, 10)})
df2 = pd.DataFrame(6+np.random.randn(8, 4), columns=list('BCDE'))

df = pd.concat([df, df2], axis=1)
df.iloc[0, 2] = np.nan

In [16]:
(df.style.hide_index()
         .apply(highlight_rows, column='B', operator='>=', criteria=5, axis=1)
         .apply(highlight_rows, column='B', operator='>=', criteria=5, axis=1,
                type='background', color='yellow')   
)

A,B,C,D,E
1.0,7.264103,,4.029712,6.803906
2.0,7.03055,6.118098,5.978147,6.046841
3.0,4.371247,5.607639,7.700973,7.06133
4.0,6.695804,5.564011,5.667058,6.602135
5.0,6.108789,6.036767,5.461037,6.499178
6.0,5.288048,5.762999,6.857119,4.117648
7.0,6.420705,4.930825,3.423141,4.775077
8.0,4.843248,6.931304,6.984257,8.226159
9.0,,,,
10.0,,,,


In [17]:
%run style_utils2.py

(df.style.hide_index()
         .highlight_null(null_color='yellow')
         .apply(highlight_null_rows, column='B', operator='=', axis=1, color='yellow')
         .apply(highlight_null_rows, column='B', operator='=', axis=1,
                type='background', color='blue')   
)

File `'style_utils2.py'` not found.


A,B,C,D,E
1.0,7.264103,,4.029712,6.803906
2.0,7.03055,6.118098,5.978147,6.046841
3.0,4.371247,5.607639,7.700973,7.06133
4.0,6.695804,5.564011,5.667058,6.602135
5.0,6.108789,6.036767,5.461037,6.499178
6.0,5.288048,5.762999,6.857119,4.117648
7.0,6.420705,4.930825,3.423141,4.775077
8.0,4.843248,6.931304,6.984257,8.226159
9.0,,,,
10.0,,,,


## DataFrame formats

### AS/400 Style

In [18]:
as400_style = {'background-color': 'black', 'color': 'lightgreen', 'font-size': '140%'}

xl_output = (head(merge_df, 8).style.set_properties(**as400_style)
                  .hide_index()
                  .apply(highlight_max, subset=['account2', 'balance', 'account2 is null'],
                         color='yellow'))
xl_output

14 rows, 7 columns


account,type,balance,account2,type2,balance2,account2 is null
13,savings,20,13,checking,54,False
13,savings,20,13,savings,20,False
12,savings,31,12,savings,33,False
11,savings,16,11,savings,30,False
10,checking,54,0,0,0,True
9,checking,18,9,checking,18,False
8,checking,33,8,savings,76,False
7,checking,42,7,checking,27,False


### Alternative style

In [19]:
xl_output = (head(merge_df, 8).style.set_table_styles(get_default_style())
             .apply(highlight_min, subset='balance', color='yellow')
             .apply(highlight_min, subset='balance', type='background', color='blue')
             .apply(highlight_max, subset='balance', color='yellow')
             .apply(highlight_max, subset='balance', type='background', color='green')
#                   .hide_index()
            )
xl_output

14 rows, 7 columns


Unnamed: 0,account,type,balance,account2,type2,balance2,account2 is null
12,13,savings,20,13,checking,54,False
13,13,savings,20,13,savings,20,False
11,12,savings,31,12,savings,33,False
10,11,savings,16,11,savings,30,False
9,10,checking,54,0,0,0,True
8,9,checking,18,9,checking,18,False
7,8,checking,33,8,savings,76,False
6,7,checking,42,7,checking,27,False


In [20]:
xx = df.groupby(['A', 'B','C']).agg(Total=pd.NamedAgg('D', 'sum'))
(head(xx, 8).style.set_table_styles(get_default_style()))

7 rows, 1 columns


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Total
A,B,C,Unnamed: 3_level_1
2.0,7.030550333953391,6.118097935763157,5.978147
3.0,4.371247140873944,5.607639409516747,7.700973
4.0,6.6958035738954935,5.564011429755853,5.667058
5.0,6.108788958914832,6.036766928625714,5.461037
6.0,5.288048243042802,5.76299903072288,6.857119
7.0,6.420705086190522,4.930824663172313,3.423141
8.0,4.843248140139934,6.931304270876204,6.984257


In [21]:
xl_output.to_excel('outputs/test.xlsx', index=False, engine='openpyxl')

In [22]:
head(df)

10 rows, 5 columns


Unnamed: 0,A,B,C,D,E
0,1.0,7.264103,,4.029712,6.803906
1,2.0,7.03055,6.118098,5.978147,6.046841
2,3.0,4.371247,5.607639,7.700973,7.06133
3,4.0,6.695804,5.564011,5.667058,6.602135
