## 視覺化dataFrame(Table Visualization)
- 注意,在github上,無法顯示顏色
  

## 格式化值,index,column
- 必需install jinja2
- format()
- format_index()


In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl

df = pd.DataFrame({
    "strings": ["Adam", "Mike"],
    "ints": [1, 3],
    "floats": [1.123, 1000.23]
})
display(df)
s = df.style\
.format(precision=3, thousands=",", decimal=".")\
.format_index(str.upper, axis=1)\
.relabel_index(["row 1", "row 2"], axis=0)
print(type(s))
s 

Unnamed: 0,strings,ints,floats
0,Adam,1,1.123
1,Mike,3,1000.23


<class 'pandas.io.formats.style.Styler'>


Unnamed: 0,STRINGS,INTS,FLOATS
row 1,Adam,1,1.123
row 2,Mike,3,1000.23


In [2]:
#綜合操作
weather_df = pd.DataFrame(np.random.rand(10,2)*5,
                        index=pd.date_range(start="2021-01-01", periods=10),
                        columns=["Tokyo", "Taipei"])

display(weather_df)

Unnamed: 0,Tokyo,Taipei
2021-01-01,3.095402,2.059252
2021-01-02,0.310231,3.660137
2021-01-03,4.118586,1.534978
2021-01-04,3.218222,3.79889
2021-01-05,3.388959,2.049797
2021-01-06,0.804281,1.389957
2021-01-07,4.886042,4.103464
2021-01-08,1.45435,1.101795
2021-01-09,0.335084,0.79402
2021-01-10,2.355521,2.46233


In [3]:
weather_df = pd.DataFrame(np.random.rand(10,2)*5,
                        index=pd.date_range(start="2021-01-01", periods=10),
                        columns=["Tokyo", "Taipei"])

def rain_condition(v):
    if v < 1.75:
        return "Dry"
    elif v < 2.75:
        return "Rain"
    return "Heavy Rain"

def make_pretty(styler):
    styler.set_caption("Weather Conditions")
    styler.format(rain_condition)
    styler.format_index(lambda v: v.strftime("%A"))
    styler.background_gradient(axis=None, vmin=1, vmax=5, cmap="PuBuGn")
    return styler

weather_df.loc["2021-01-04":"2021-01-08"].style.pipe(make_pretty)

Unnamed: 0,Tokyo,Taipei
Monday,Heavy Rain,Rain
Tuesday,Rain,Heavy Rain
Wednesday,Heavy Rain,Rain
Thursday,Rain,Heavy Rain
Friday,Heavy Rain,Heavy Rain


## 隱藏資料
- #### hide()

In [4]:
df = pd.DataFrame(np.random.randn(5,5))
display(df)

df.style \
.hide(subset=[0, 2, 4], axis=0)\
.hide(subset=[0, 2, 4], axis=1)

Unnamed: 0,0,1,2,3,4
0,-0.436499,0.048386,-0.93427,-1.920437,-1.536939
1,-0.543443,0.814937,-1.211349,0.247469,0.876943
2,-1.096822,-0.018163,0.252432,2.087205,-2.25532
3,-0.780251,-0.157119,0.335175,0.930006,0.88825
4,-1.368661,-0.373109,-0.038115,0.412994,-0.902049


Unnamed: 0,1,3
1,0.814937,0.247469
3,-0.157119,0.930006


In [5]:
show = [0, 2, 4]
df.style\
.hide([row for row in df.index if row not in show], axis=0)\
.hide([col for col in df.columns if col not in show], axis=1)

Unnamed: 0,0,2,4
0,-0.436499,-0.93427,-1.536939
2,-1.096822,0.252432,-2.25532
4,-1.368661,-0.038115,-0.902049


## 連結資料
- concat()
- 2個以上的Styler,可以被連結,但必需要相同的欄位
- 非常適合統計的加總和平均

In [6]:
display(df.agg(["sum","mean"]))
summary_style = df.agg(["sum", "mean"]).style\
                                    .format(precision=3)\
                                    .relabel_index(["總分","平均"])
display(summary_style)
df.style.format(precision=3).concat(summary_style)

Unnamed: 0,0,1,2,3,4
sum,-4.225677,0.314932,-1.596127,1.757237,-2.929115
mean,-0.845135,0.062986,-0.319225,0.351447,-0.585823


Unnamed: 0,0,1,2,3,4
總分,-4.226,0.315,-1.596,1.757,-2.929
平均,-0.845,0.063,-0.319,0.351,-0.586


Unnamed: 0,0,1,2,3,4
0,-0.436,0.048,-0.934,-1.92,-1.537
1,-0.543,0.815,-1.211,0.247,0.877
2,-1.097,-0.018,0.252,2.087,-2.255
3,-0.78,-0.157,0.335,0.93,0.888
4,-1.369,-0.373,-0.038,0.413,-0.902
總分,-4.226,0.315,-1.596,1.757,-2.929
平均,-0.845,0.063,-0.319,0.351,-0.586


In [7]:
## Styler支援HTML
df = pd.DataFrame([[38.0, 2.0, 18.0, 22.0, 21, np.nan],[19, 439, 6, 452, 226,232]],
                  index=pd.Index(['Tumour (Positive)', 'Non-Tumour (Negative)'], name='Actual Label:'),
                  columns=pd.MultiIndex.from_product([['Decision Tree', 'Regression', 'Random'],['Tumour', 'Non-Tumour']], names=['Model:', 'Predicted:']))
df.style

Model:,Decision Tree,Decision Tree,Regression,Regression,Random,Random
Predicted:,Tumour,Non-Tumour,Tumour,Non-Tumour,Tumour,Non-Tumour
Actual Label:,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Tumour (Positive),38.0,2.0,18.0,22.0,21,
Non-Tumour (Negative),19.0,439.0,6.0,452.0,226,232.0


In [8]:
s = df.style.format('{:.0f}').hide([("Random","Tumour"),("Random","Non-Tumour")],axis="columns")
display(s)
print(s.to_html())

Model:,Decision Tree,Decision Tree,Regression,Regression
Predicted:,Tumour,Non-Tumour,Tumour,Non-Tumour
Actual Label:,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Tumour (Positive),38,2,18,22
Non-Tumour (Negative),19,439,6,452


<style type="text/css">
</style>
<table id="T_54503">
  <thead>
    <tr>
      <th class="index_name level0" >Model:</th>
      <th id="T_54503_level0_col0" class="col_heading level0 col0" colspan="2">Decision Tree</th>
      <th id="T_54503_level0_col2" class="col_heading level0 col2" colspan="2">Regression</th>
    </tr>
    <tr>
      <th class="index_name level1" >Predicted:</th>
      <th id="T_54503_level1_col0" class="col_heading level1 col0" >Tumour</th>
      <th id="T_54503_level1_col1" class="col_heading level1 col1" >Non-Tumour</th>
      <th id="T_54503_level1_col2" class="col_heading level1 col2" >Tumour</th>
      <th id="T_54503_level1_col3" class="col_heading level1 col3" >Non-Tumour</th>
    </tr>
    <tr>
      <th class="index_name level0" >Actual Label:</th>
      <th class="blank col0" >&nbsp;</th>
      <th class="blank col1" >&nbsp;</th>
      <th class="blank col2" >&nbsp;</th>
      <th class="blank col3" >&nbsp;</th>
    </tr>
  </thead>
  <tbody>
    <tr>
  

### 3個主要的方法,增加css的Styler
- #### set_table_style()
- #### set_td_classes()
- #### apply(), applymap(),apply_index(), applymap_index()

In [9]:
cell_hover = { # for row hover use <tr> instead of <td>
    'selector':'td:hover',
    'props':[('background-color','#ffffb3')]
}

index_names = {
    'selector':'.index_name',
    'props':'font-style:italic; color:darkgrey; font-weight:normal;'
}
headers = {
    'selector':'th:not(.index_name)',
    'props':'background-color:#000066; color:white;'
}
s.set_table_styles([cell_hover, index_names, headers])

Model:,Decision Tree,Decision Tree,Regression,Regression
Predicted:,Tumour,Non-Tumour,Tumour,Non-Tumour
Actual Label:,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Tumour (Positive),38,2,18,22
Non-Tumour (Negative),19,439,6,452


In [10]:
s.set_table_styles([
    {'selector':'th.col_heading','props':'text-align:center;'},
    {'selector':'th.col_heading.level0','props':'font-size:1.5em'},
    {'selector':'td','props':'text-align:center; font-weight:bold;'}
],overwrite=False)
s

Model:,Decision Tree,Decision Tree,Regression,Regression
Predicted:,Tumour,Non-Tumour,Tumour,Non-Tumour
Actual Label:,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Tumour (Positive),38,2,18,22
Non-Tumour (Negative),19,439,6,452


In [11]:
s.set_table_styles({
    ('Regression', 'Tumour'): [{'selector': 'th', 'props': 'border-left: 1px solid white'},
                               {'selector': 'td', 'props': 'border-left: 1px solid #000066'}]
}, overwrite=False, axis=0)

Model:,Decision Tree,Decision Tree,Regression,Regression
Predicted:,Tumour,Non-Tumour,Tumour,Non-Tumour
Actual Label:,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Tumour (Positive),38,2,18,22
Non-Tumour (Negative),19,439,6,452


In [12]:
#set_td_classes()
s.set_table_styles([  # create internal CSS classes
    {'selector': '.true', 'props': 'background-color: #e6ffe6;'},
    {'selector': '.false', 'props': 'background-color: #ffe6e6;'},
], overwrite=False)
cell_color = pd.DataFrame([['true ', 'false ', 'true ', 'false '],
                           ['false ', 'true ', 'false ', 'true ']],
                          index=df.index,
                          columns=df.columns[:4])
s.set_td_classes(cell_color)

Model:,Decision Tree,Decision Tree,Regression,Regression
Predicted:,Tumour,Non-Tumour,Tumour,Non-Tumour
Actual Label:,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Tumour (Positive),38,2,18,22
Non-Tumour (Negative),19,439,6,452


## Styler Functions
使用下列方法,傳遞styler function
- applymap()-elementwise
- apply() - (column/row/table-wise)


In [13]:
#Styler Functions
#處理資料

import numpy as np
np.random.seed(0)
df2 = pd.DataFrame(np.random.randn(10,4), columns=['A', 'B', 'C', 'D'])
display(df2.style)

Unnamed: 0,A,B,C,D
0,1.764052,0.400157,0.978738,2.240893
1,1.867558,-0.977278,0.950088,-0.151357
2,-0.103219,0.410599,0.144044,1.454274
3,0.761038,0.121675,0.443863,0.333674
4,1.494079,-0.205158,0.313068,-0.854096
5,-2.55299,0.653619,0.864436,-0.742165
6,2.269755,-1.454366,0.045759,-0.187184
7,1.532779,1.469359,0.154947,0.378163
8,-0.887786,-1.980796,-0.347912,0.156349
9,1.230291,1.20238,-0.387327,-0.302303


In [14]:
#applymap傳入的function,第1個參數必需是純值,必需傳出字串
def style_negative(v, props=''):
    return props if v < 0 else None

s2 = df2.style.applymap(style_negative, props='color:red;')\
        .applymap(lambda v:'opacity:20%' if(v<0.3) and (v>-0.3) else None)
s2

Unnamed: 0,A,B,C,D
0,1.764052,0.400157,0.978738,2.240893
1,1.867558,-0.977278,0.950088,-0.151357
2,-0.103219,0.410599,0.144044,1.454274
3,0.761038,0.121675,0.443863,0.333674
4,1.494079,-0.205158,0.313068,-0.854096
5,-2.55299,0.653619,0.864436,-0.742165
6,2.269755,-1.454366,0.045759,-0.187184
7,1.532779,1.469359,0.154947,0.378163
8,-0.887786,-1.980796,-0.347912,0.156349
9,1.230291,1.20238,-0.387327,-0.302303


In [15]:
df2['A'] == np.nanmax(df2['A'].values)

0    False
1    False
2    False
3    False
4    False
5    False
6     True
7    False
8    False
9    False
Name: A, dtype: bool

In [16]:
np.where(df2['A'] == np.nanmax(df2['A'].values),'color:red','')

array(['', '', '', '', '', '', 'color:red', '', '', ''], dtype='<U9')

In [17]:
#apply傳入function,第一個參數必需是series,傳出list-like的資料
def highlight_max(s, props=''):
    return np.where(s == np.nanmax(s.values), props, '')

s2.apply(highlight_max, props='color:white;background-color:darkblue',axis=0)

Unnamed: 0,A,B,C,D
0,1.764052,0.400157,0.978738,2.240893
1,1.867558,-0.977278,0.950088,-0.151357
2,-0.103219,0.410599,0.144044,1.454274
3,0.761038,0.121675,0.443863,0.333674
4,1.494079,-0.205158,0.313068,-0.854096
5,-2.55299,0.653619,0.864436,-0.742165
6,2.269755,-1.454366,0.045759,-0.187184
7,1.532779,1.469359,0.154947,0.378163
8,-0.887786,-1.980796,-0.347912,0.156349
9,1.230291,1.20238,-0.387327,-0.302303


In [18]:
s2.apply(highlight_max, props='color:white;background-color:pink;',axis=1)\
    .apply(highlight_max, props='color:white;background-color:purple', axis=None)

Unnamed: 0,A,B,C,D
0,1.764052,0.400157,0.978738,2.240893
1,1.867558,-0.977278,0.950088,-0.151357
2,-0.103219,0.410599,0.144044,1.454274
3,0.761038,0.121675,0.443863,0.333674
4,1.494079,-0.205158,0.313068,-0.854096
5,-2.55299,0.653619,0.864436,-0.742165
6,2.269755,-1.454366,0.045759,-0.187184
7,1.532779,1.469359,0.154947,0.378163
8,-0.887786,-1.980796,-0.347912,0.156349
9,1.230291,1.20238,-0.387327,-0.302303


In [19]:
#處理欄位和索引
s2.applymap_index(lambda v: "color:pink;" if v>4 else "color:darkblue;", axis=0)
s2.apply_index(lambda s: np.where(s.isin(["A", "B"]), "color:pink;", "color:darkblue;"), axis=1)

Unnamed: 0,A,B,C,D
0,1.764052,0.400157,0.978738,2.240893
1,1.867558,-0.977278,0.950088,-0.151357
2,-0.103219,0.410599,0.144044,1.454274
3,0.761038,0.121675,0.443863,0.333674
4,1.494079,-0.205158,0.313068,-0.854096
5,-2.55299,0.653619,0.864436,-0.742165
6,2.269755,-1.454366,0.045759,-0.187184
7,1.532779,1.469359,0.154947,0.378163
8,-0.887786,-1.980796,-0.347912,0.156349
9,1.230291,1.20238,-0.387327,-0.302303


## 內建的styles
- #### highlight_null()
- #### highlight_min(),highlight_max()
- #### highlight_between,highlight_quantile
- #### background_gradient
- #### bar

In [20]:
np.random.seed(0)
df2 = pd.DataFrame(np.random.randn(10,4), columns=['A','B','C','D'])
df2.style

Unnamed: 0,A,B,C,D
0,1.764052,0.400157,0.978738,2.240893
1,1.867558,-0.977278,0.950088,-0.151357
2,-0.103219,0.410599,0.144044,1.454274
3,0.761038,0.121675,0.443863,0.333674
4,1.494079,-0.205158,0.313068,-0.854096
5,-2.55299,0.653619,0.864436,-0.742165
6,2.269755,-1.454366,0.045759,-0.187184
7,1.532779,1.469359,0.154947,0.378163
8,-0.887786,-1.980796,-0.347912,0.156349
9,1.230291,1.20238,-0.387327,-0.302303


In [21]:
#hightlight Null
df2.iloc[0,2] = np.nan
df2.iloc[4,3] = np.nan
df2.loc[:4].style.highlight_null(color='yellow')

Unnamed: 0,A,B,C,D
0,1.764052,0.400157,,2.240893
1,1.867558,-0.977278,0.950088,-0.151357
2,-0.103219,0.410599,0.144044,1.454274
3,0.761038,0.121675,0.443863,0.333674
4,1.494079,-0.205158,0.313068,


In [22]:
#hightlight min or max
df2.loc[:4].style.highlight_max(axis=1,props="color:white; font-weight:bold; background-color:darkblue;")

Unnamed: 0,A,B,C,D
0,1.764052,0.400157,,2.240893
1,1.867558,-0.977278,0.950088,-0.151357
2,-0.103219,0.410599,0.144044,1.454274
3,0.761038,0.121675,0.443863,0.333674
4,1.494079,-0.205158,0.313068,


In [23]:
#highlight between
left = pd.Series([1.0, 0.0, 1.0], index=["A", "B", "D"])
df2.loc[:4].style.highlight_between(left=left, right=1.5, axis=1, props='color:white; background-color:purple;')


Unnamed: 0,A,B,C,D
0,1.764052,0.400157,,2.240893
1,1.867558,-0.977278,0.950088,-0.151357
2,-0.103219,0.410599,0.144044,1.454274
3,0.761038,0.121675,0.443863,0.333674
4,1.494079,-0.205158,0.313068,


In [24]:
## HighLight Quantile

df2.loc[:4].style.highlight_quantile(q_left=0.85, axis=None, color='yellow')

Unnamed: 0,A,B,C,D
0,1.764052,0.400157,,2.240893
1,1.867558,-0.977278,0.950088,-0.151357
2,-0.103219,0.410599,0.144044,1.454274
3,0.761038,0.121675,0.443863,0.333674
4,1.494079,-0.205158,0.313068,


In [25]:
# background Gradient and Text Gradient
import seaborn as sns

cm = sns.light_palette("green", as_cmap=True)
df2.style.background_gradient(cmap=cm)

Unnamed: 0,A,B,C,D
0,1.764052,0.400157,,2.240893
1,1.867558,-0.977278,0.950088,-0.151357
2,-0.103219,0.410599,0.144044,1.454274
3,0.761038,0.121675,0.443863,0.333674
4,1.494079,-0.205158,0.313068,
5,-2.55299,0.653619,0.864436,-0.742165
6,2.269755,-1.454366,0.045759,-0.187184
7,1.532779,1.469359,0.154947,0.378163
8,-0.887786,-1.980796,-0.347912,0.156349
9,1.230291,1.20238,-0.387327,-0.302303


In [26]:
df2.style.text_gradient(cmap=cm)

Unnamed: 0,A,B,C,D
0,1.764052,0.400157,,2.240893
1,1.867558,-0.977278,0.950088,-0.151357
2,-0.103219,0.410599,0.144044,1.454274
3,0.761038,0.121675,0.443863,0.333674
4,1.494079,-0.205158,0.313068,
5,-2.55299,0.653619,0.864436,-0.742165
6,2.269755,-1.454366,0.045759,-0.187184
7,1.532779,1.469359,0.154947,0.378163
8,-0.887786,-1.980796,-0.347912,0.156349
9,1.230291,1.20238,-0.387327,-0.302303


In [27]:
#set properties
df2.loc[:4].style.set_properties(**{'background-color':'black',
                                    'color':'lawngreen',
                                    'border-color':'white'})

Unnamed: 0,A,B,C,D
0,1.764052,0.400157,,2.240893
1,1.867558,-0.977278,0.950088,-0.151357
2,-0.103219,0.410599,0.144044,1.454274
3,0.761038,0.121675,0.443863,0.333674
4,1.494079,-0.205158,0.313068,


In [28]:
#bar charts
df2.style.bar(subset=['A', 'B'], color='#d65f5f')

Unnamed: 0,A,B,C,D
0,1.764052,0.400157,,2.240893
1,1.867558,-0.977278,0.950088,-0.151357
2,-0.103219,0.410599,0.144044,1.454274
3,0.761038,0.121675,0.443863,0.333674
4,1.494079,-0.205158,0.313068,
5,-2.55299,0.653619,0.864436,-0.742165
6,2.269755,-1.454366,0.045759,-0.187184
7,1.532779,1.469359,0.154947,0.378163
8,-0.887786,-1.980796,-0.347912,0.156349
9,1.230291,1.20238,-0.387327,-0.302303


In [29]:
df2.style.format('{:.3f}', na_rep="")\
         .bar(align=0, vmin=-2.5, vmax=2.5, cmap="bwr", height=50,
              width=60, props="width: 120px; border-right: 1px solid black;")\
         .text_gradient(cmap="bwr", vmin=-2.5, vmax=2.5)

Unnamed: 0,A,B,C,D
0,1.764,0.4,,2.241
1,1.868,-0.977,0.95,-0.151
2,-0.103,0.411,0.144,1.454
3,0.761,0.122,0.444,0.334
4,1.494,-0.205,0.313,
5,-2.553,0.654,0.864,-0.742
6,2.27,-1.454,0.046,-0.187
7,1.533,1.469,0.155,0.378
8,-0.888,-1.981,-0.348,0.156
9,1.23,1.202,-0.387,-0.302
