In [1]:
import numpy as np, pandas as pd

# 11.2.1 .str访问器

In [4]:
# 对内容进行字符操作
s = pd.Series(['A','Boy','2',np.nan], dtype="string")
s

0       A
1     Boy
2       2
3    <NA>
dtype: string

In [5]:
# 对字符转换小写，忽略数字和NaN
s.str.lower()

0       a
1     boy
2       2
3    <NA>
dtype: string

In [6]:
df = pd.read_excel("../team2.xlsx")
df

Unnamed: 0,name,team,Q1,Q2,Q3,Q4,1city,what color
0,Liver,E,89,21,24,1,shanghai,yellew
1,Arry,C,55,37,37,2,nanjin,red
2,Ack,A,57,60,33,3,beijin,purple
3,Eorge,C,93,4,71,4,tianjin,blue
4,Oah,D,93,49,44,5,henan,black
5,Harlie,C,24,13,87,6,hebei,pink


In [8]:
# 对于非字符类型，可以先转换再使用

# 将Q1转为object对象
df.Q1.astype(str)

0    89
1    55
2    57
3    93
4    93
5    24
Name: Q1, dtype: object

In [10]:
df.team.dtype

dtype('O')

In [11]:
# 将object转为string
df.team.astype('string')

0    E
1    C
2    A
3    C
4    D
5    C
Name: team, dtype: string

In [19]:
# 对表头操作，转换成小写
df.columns.str.lower()

Index(['name', 'team', 'q1', 'q2', 'q3', 'q4', '1city', 'what color'], dtype='object')

In [17]:
df.columns

Index(['name', 'team', 'Q1', 'Q2', 'Q3', 'Q4', '1city', 'what color'], dtype='object')

In [23]:
# str连续操作字符串
df.columns.str.lower().str.replace('q','_')

Index(['name', 'team', '_1', '_2', '_3', '_4', '1city', 'what color'], dtype='object')

In [24]:
df.columns.str.lower().str.replace('q','_')[4]

'_3'

# 11.2.2 文本格式

In [25]:
s = pd.Series(['lower','CAPITALS','this is a sentence','SeApCaSe'])
s

0                 lower
1              CAPITALS
2    this is a sentence
3              SeApCaSe
dtype: object

In [26]:
s.str.lower()

0                 lower
1              capitals
2    this is a sentence
3              seapcase
dtype: object

In [27]:
s.str.upper()

0                 LOWER
1              CAPITALS
2    THIS IS A SENTENCE
3              SEAPCASE
dtype: object

In [28]:
# 标题格式，每个单词大写
s.str.title()

0                 Lower
1              Capitals
2    This Is A Sentence
3              Seapcase
dtype: object

In [29]:
# 首字母大写
s.str.capitalize()

0                 Lower
1              Capitals
2    This is a sentence
3              Seapcase
dtype: object

In [30]:
# 大小写互换
s.str.swapcase()

0                 LOWER
1              capitals
2    THIS IS A SENTENCE
3              sEaPcAsE
dtype: object

In [31]:
# 转小写，支持其他语言
s.str.casefold()

0                 lower
1              capitals
2    this is a sentence
3              seapcase
dtype: object

# 11.2.3 文本对齐

In [32]:
s

0                 lower
1              CAPITALS
2    this is a sentence
3              SeApCaSe
dtype: object

In [34]:
# 居中对齐，宽度50，用‘-’左右填充
s.str.center(50, fillchar='-')

0    ----------------------lower-------------------...
1    ---------------------CAPITALS-----------------...
2    ----------------this is a sentence------------...
3    ---------------------SeApCaSe-----------------...
dtype: object

In [35]:
# 左对齐
s.str.ljust(50, fillchar='-')

0    lower-----------------------------------------...
1    CAPITALS--------------------------------------...
2    this is a sentence----------------------------...
3    SeApCaSe--------------------------------------...
dtype: object

In [36]:
# 右对齐
s.str.rjust(50, fillchar='-')

0    ---------------------------------------------l...
1    ------------------------------------------CAPI...
2    --------------------------------this is a sent...
3    ------------------------------------------SeAp...
dtype: object

In [39]:
s.str.pad(width=50, side='left', fillchar='-')
# 等同于右对齐

0    ---------------------------------------------l...
1    ------------------------------------------CAPI...
2    --------------------------------this is a sent...
3    ------------------------------------------SeAp...
dtype: object

In [41]:
# 不足13位的前面加0
s.str.zfill(13)

0         00000000lower
1         00000CAPITALS
2    this is a sentence
3         00000SeApCaSe
dtype: object

# 11.2.4 计数和编码

In [44]:
s

0                 lower
1              CAPITALS
2    this is a sentence
3              SeApCaSe
dtype: object

In [45]:
# 统计字符串中字母a的数量，注意大小写有区别
s.str.count('a')

0    0
1    0
2    1
3    1
dtype: int64

In [46]:
# 统计字符的长度
s.str.len()

0     5
1     8
2    18
3     8
dtype: int64

In [47]:
# 编码
s.str.encode('utf-8')

0                 b'lower'
1              b'CAPITALS'
2    b'this is a sentence'
3              b'SeApCaSe'
dtype: object

In [48]:
# 解码
s.str.decode('utf-8')

0   NaN
1   NaN
2   NaN
3   NaN
dtype: float64

# 11.2.5 格式判定

In [50]:
# 是否为字母
s.str.isalpha()

0     True
1     True
2    False
3     True
dtype: bool

In [51]:
# 是否为数字
s.str.isnumeric()

0    False
1    False
2    False
3    False
dtype: bool

In [52]:
# 是否有数字和字母组成
s.str.isalnum()

0     True
1     True
2    False
3     True
dtype: bool

In [53]:
# 是否为数字
s.str.isdigit()

0    False
1    False
2    False
3    False
dtype: bool

In [54]:
# 是否为小数
s.str.isdecimal()

0    False
1    False
2    False
3    False
dtype: bool

In [55]:
# 是否为空格
s.str.isspace()

0    False
1    False
2    False
3    False
dtype: bool

In [56]:
s.str.islower()

0     True
1    False
2     True
3    False
dtype: bool

In [57]:
s.str.isupper()

0    False
1     True
2    False
3    False
dtype: bool

In [58]:
# 是否标题格式
s.str.istitle()

0    False
1    False
2    False
3    False
dtype: bool