### Preparations

In [9]:
!python -V
import numpy as np
import pandas as pd
import unicodedata

Python 3.6.7


In [10]:
def u_hex(c):
  hexstr = hex(ord(c))[2:].upper()
  return ('000' + hexstr)[-4:]

def u_name_or_null(c):
  try:
    return unicodedata.name(c)
  except ValueError:
    return None

def get_unicode_list(range):
  return [[
    u_hex(c),
    c,
    u_name_or_null(c),
    unicodedata.category(c),
    unicodedata.east_asian_width(c),
  ] for c in [chr(c) for c in range]]

def get_unicode_df(range):
  return pd.DataFrame(get_unicode_list(range),
                      index=range,
                      columns=['hex', 'char', 'name', 'gc', 'eaw'])

r = range(0x20, 0x1FFFF)
u = get_unicode_df(r)
u.head(11)

Unnamed: 0,hex,char,name,gc,eaw
32,0020,,SPACE,Zs,Na
33,0021,!,EXCLAMATION MARK,Po,Na
34,0022,"""",QUOTATION MARK,Po,Na
35,0023,#,NUMBER SIGN,Po,Na
36,0024,$,DOLLAR SIGN,Sc,Na
37,0025,%,PERCENT SIGN,Po,Na
38,0026,&,AMPERSAND,Po,Na
39,0027,',APOSTROPHE,Po,Na
40,0028,(,LEFT PARENTHESIS,Ps,Na
41,0029,),RIGHT PARENTHESIS,Pe,Na


### [original feature file](http://blogs.adobe.com/CCJKType/files/2018/04/features.txt) from [Ken's CJK Type blog](https://blogs.adobe.com/CCJKType/2018/04/contextual-spacing.html)

In [11]:
d = {
  'open': [0x2018, 0x201C, 0x3008, 0x300A, 0x300C, 0x300E, 0x3010, 0x3014, 0x3016, 0x3018, 0x301A, 0x301D, 0xFF08, 0xFF3B, 0xFF5B, 0xFF5F],
  'close': [0x2019, 0x201D, 0x3009, 0x300B, 0x300D, 0x300F, 0x3011, 0x3015, 0x3017, 0x3019, 0x301B, 0x301F, 0xFF09, 0xFF3D, 0xFF5D, 0xFF60],
  'PeriodComma': [0x3001, 0x3002, 0xFF0C, 0xFF0E],
  'ColonExclamQuestion': [0xFF01, 0xFF1F],
  'ColonExclamQuestion.cn': [0xFF01, 0xFF1A, 0xFF1B, 0xFF1F],
  'center': [0x30FB, 0xFF1A, 0xFF1B],
  'center.tw': [0x3001, 0x3002, 0xFF0C, 0xFF0E],
  'vopen': [0x2018, 0x201C, 0x3018, 0x301A, 0xFE17, 0xFE35, 0xFE37, 0xFE39, 0xFE3B, 0xFE3D, 0xFE3F, 0xFE41, 0xFE43, 0xFE47, 0xFF5F],
  'vclose': [0x2019, 0x201D, 0x3019, 0x301B, 0xFE18, 0xFE36, 0xFE38, 0xFE3A, 0xFE3C, 0xFE3E, 0xFE40, 0xFE42, 0xFE44, 0xFE48, 0xFF60],
  'vPeriodComma': [0xFE10, 0xFE11, 0xFE12, 0xFF0E],
  'vColonExclamQuestion': [0xFE13, 0xFE14, 0xFE15, 0xFE16, 0xFF01, 0xFF1A, 0xFF1B, 0xFF1F],
  'vcenter': [0x30FB, 0xFF1A],
  'vcenter.tw': [0x3001, 0x3002, 0xFF0C, 0xFF0E],
}
s = None
for value, list in d.items():
  s1 = pd.Series(value, index=list)
  if s is None:
    s = s1
  else:
    s = s.combine(s1, lambda x, y: x if pd.isnull(y) else (y if pd.isnull(x) else str(x)+' '+str(y)))
u = u.assign(cspc=s)
u.query('cspc > ""').head(10)


Unnamed: 0,hex,char,name,gc,eaw,cspc
8216,2018,‘,LEFT SINGLE QUOTATION MARK,Pi,A,open vopen
8217,2019,’,RIGHT SINGLE QUOTATION MARK,Pf,A,close vclose
8220,201C,“,LEFT DOUBLE QUOTATION MARK,Pi,A,open vopen
8221,201D,”,RIGHT DOUBLE QUOTATION MARK,Pf,A,close vclose
12289,3001,、,IDEOGRAPHIC COMMA,Po,W,PeriodComma center.tw vcenter.tw
12290,3002,。,IDEOGRAPHIC FULL STOP,Po,W,PeriodComma center.tw vcenter.tw
12296,3008,〈,LEFT ANGLE BRACKET,Ps,W,open
12297,3009,〉,RIGHT ANGLE BRACKET,Pe,W,close
12298,300A,《,LEFT DOUBLE ANGLE BRACKET,Ps,W,open
12299,300B,》,RIGHT DOUBLE ANGLE BRACKET,Pe,W,close


### [CSS Text 4 Character classes](https://drafts.csswg.org/css-text-4/#text-spacing-classes)

In [12]:
s = pd.Series(dtype=object)
for c in r:
  v = []
  gc = unicodedata.category(chr(c))
  eaw = unicodedata.east_asian_width(chr(c))
  if (gc == 'Ps' and ((c >= 0x3000 and c <=0x303F) or eaw == 'F')) or c in [0x2018, 0x201C]:
    v.append('open')
  if (gc == 'Pe' and ((c >= 0x3000 and c <=0x303F) or eaw == 'F')) or c in [0x2019, 0x201D]:
    v.append('close')
  if c in [0x00B7, 0x2027, 0x30FB]:
    v.append('middle')
  if c in [0xFF1A, 0xFF1B]:
    v.append('colon')
  if c in [0x3001, 0x3002, 0xFF0C, 0xFF0E]:
    v.append('dot')
  if v:
    s.at[c] = ' '.join(v)
u = u.assign(css4=s)
u.query('css4 > ""').head(10)

Unnamed: 0,hex,char,name,gc,eaw,cspc,css4
183,00B7,·,MIDDLE DOT,Po,A,,middle
8216,2018,‘,LEFT SINGLE QUOTATION MARK,Pi,A,open vopen,open
8217,2019,’,RIGHT SINGLE QUOTATION MARK,Pf,A,close vclose,close
8220,201C,“,LEFT DOUBLE QUOTATION MARK,Pi,A,open vopen,open
8221,201D,”,RIGHT DOUBLE QUOTATION MARK,Pf,A,close vclose,close
8231,2027,‧,HYPHENATION POINT,Po,A,,middle
12289,3001,、,IDEOGRAPHIC COMMA,Po,W,PeriodComma center.tw vcenter.tw,dot
12290,3002,。,IDEOGRAPHIC FULL STOP,Po,W,PeriodComma center.tw vcenter.tw,dot
12296,3008,〈,LEFT ANGLE BRACKET,Ps,W,open,open
12297,3009,〉,RIGHT ANGLE BRACKET,Pe,W,close,close


### Comparisons

In [13]:
# Either CSPC or CSS4 is set
pd.options.display.max_rows = None
u.query('cspc == cspc or css4 == css4')

Unnamed: 0,hex,char,name,gc,eaw,cspc,css4
183,00B7,·,MIDDLE DOT,Po,A,,middle
8216,2018,‘,LEFT SINGLE QUOTATION MARK,Pi,A,open vopen,open
8217,2019,’,RIGHT SINGLE QUOTATION MARK,Pf,A,close vclose,close
8220,201C,“,LEFT DOUBLE QUOTATION MARK,Pi,A,open vopen,open
8221,201D,”,RIGHT DOUBLE QUOTATION MARK,Pf,A,close vclose,close
8231,2027,‧,HYPHENATION POINT,Po,A,,middle
12289,3001,、,IDEOGRAPHIC COMMA,Po,W,PeriodComma center.tw vcenter.tw,dot
12290,3002,。,IDEOGRAPHIC FULL STOP,Po,W,PeriodComma center.tw vcenter.tw,dot
12296,3008,〈,LEFT ANGLE BRACKET,Ps,W,open,open
12297,3009,〉,RIGHT ANGLE BRACKET,Pe,W,close,close


In [14]:
# CSPC is not set but CSS4 is set
u.query('cspc != cspc and css4 == css4')

Unnamed: 0,hex,char,name,gc,eaw,cspc,css4
183,00B7,·,MIDDLE DOT,Po,A,,middle
8231,2027,‧,HYPHENATION POINT,Po,A,,middle
12318,301E,〞,DOUBLE PRIME QUOTATION MARK,Pe,W,,close


In [15]:
# CSPC is set but CSS4 is not set
u.query('cspc == cspc and css4 != css4')

Unnamed: 0,hex,char,name,gc,eaw,cspc,css4
65040,FE10,︐,PRESENTATION FORM FOR VERTICAL COMMA,Po,W,vPeriodComma,
65041,FE11,︑,PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA,Po,W,vPeriodComma,
65042,FE12,︒,PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FUL...,Po,W,vPeriodComma,
65043,FE13,︓,PRESENTATION FORM FOR VERTICAL COLON,Po,W,vColonExclamQuestion,
65044,FE14,︔,PRESENTATION FORM FOR VERTICAL SEMICOLON,Po,W,vColonExclamQuestion,
65045,FE15,︕,PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK,Po,W,vColonExclamQuestion,
65046,FE16,︖,PRESENTATION FORM FOR VERTICAL QUESTION MARK,Po,W,vColonExclamQuestion,
65047,FE17,︗,PRESENTATION FORM FOR VERTICAL LEFT WHITE LENT...,Ps,W,vopen,
65048,FE18,︘,PRESENTATION FORM FOR VERTICAL RIGHT WHITE LEN...,Pe,W,vclose,
65077,FE35,︵,PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS,Ps,W,vopen,


In [16]:
# Both are set but different values
u.query('cspc == cspc and css4 == css4 and cspc != css4')

Unnamed: 0,hex,char,name,gc,eaw,cspc,css4
8216,2018,‘,LEFT SINGLE QUOTATION MARK,Pi,A,open vopen,open
8217,2019,’,RIGHT SINGLE QUOTATION MARK,Pf,A,close vclose,close
8220,201C,“,LEFT DOUBLE QUOTATION MARK,Pi,A,open vopen,open
8221,201D,”,RIGHT DOUBLE QUOTATION MARK,Pf,A,close vclose,close
12289,3001,、,IDEOGRAPHIC COMMA,Po,W,PeriodComma center.tw vcenter.tw,dot
12290,3002,。,IDEOGRAPHIC FULL STOP,Po,W,PeriodComma center.tw vcenter.tw,dot
12312,3018,〘,LEFT WHITE TORTOISE SHELL BRACKET,Ps,W,open vopen,open
12313,3019,〙,RIGHT WHITE TORTOISE SHELL BRACKET,Pe,W,close vclose,close
12314,301A,〚,LEFT WHITE SQUARE BRACKET,Ps,W,open vopen,open
12315,301B,〛,RIGHT WHITE SQUARE BRACKET,Pe,W,close vclose,close
