## 字符串操作

In [1]:
from __future__ import print_function
import numpy as np

In [2]:
author = "kyubyong. https://github.com/Kyubyong/numpy_exercises"

In [3]:
np.__version__

'1.15.0'

 1. [ ] 把`x1`与`x2`连接起来。

In [4]:
x1 = np.array(['Hello', 'Say'], dtype=np.str)
x2 = np.array([' world', ' something'], dtype=np.str)
out = np.char.add(x1, x2)
out

array(['Hello world', 'Say something'], dtype='<U15')

 2. [ ] 智能重复`x`三次。

In [5]:
x = np.array(['Hello ', 'Say '], dtype=np.str)
out = np.char.multiply(x, 3)
out

array(['Hello Hello Hello ', 'Say Say Say '], dtype='<U18')

 3. [ ] 对`x`完成如下五种操作：
  - 智能首字母大写操作。
  - 智能小写操作。
  - 智能大写操作。
  - 智能切换操作。
  - 智能标题操作。

In [6]:
x = np.array(['heLLo woRLd', 'Say sOmething'], dtype=np.str)
capitalized = np.char.capitalize(x)
lowered = np.char.lower(x)
uppered = np.char.upper(x)
swapcased = np.char.swapcase(x)
titlecased = np.char.title(x)
print("capitalized =", capitalized)
print("lowered =", lowered)
print("uppered =", uppered)
print("swapcased =", swapcased)
print("titlecased =", titlecased)

capitalized = ['Hello world' 'Say something']
lowered = ['hello world' 'say something']
uppered = ['HELLO WORLD' 'SAY SOMETHING']
swapcased = ['HEllO WOrlD' 'sAY SoMETHING']
titlecased = ['Hello World' 'Say Something']


 4. [ ] 用下划线`_`来填充每个字符串，总宽度为`20`，分别进行居中对齐、左对齐、右对齐三种操作。

In [7]:
x = np.array(['hello world', 'say something'], dtype=np.str)
centered = np.char.center(x, 20, fillchar='_')
left = np.char.ljust(x, 20, fillchar='_')
right = np.char.rjust(x, 20, fillchar='_')

print("centered =", centered)
print("left =", left)
print("right =", right)

centered = ['____hello world_____' '___say something____']
left = ['hello world_________' 'say something_______']
right = ['_________hello world' '_______say something']


 5. 对`x`进行`utf-8`编码和解码操作。

In [8]:
x = np.array(['你好世界', '说些什么吧'], dtype=np.str)
encoded = np.char.encode(x, 'utf-8')
decoded = np.char.decode(encoded, 'utf-8')
print("encoded =", encoded)
print("decoded =", decoded)

encoded = [b'\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xb8\x96\xe7\x95\x8c'
 b'\xe8\xaf\xb4\xe4\xba\x9b\xe4\xbb\x80\xe4\xb9\x88\xe5\x90\xa7']
decoded = ['你好世界' '说些什么吧']


 6. [ ] 在`x`的字符之间插入一个空格。

In [9]:
x = np.array(['hello world', 'say something'], dtype=np.str)
np.char.join(' ', x)

array(['h e l l o   w o r l d', 's a y   s o m e t h i n g'], dtype='<U25')

 7. [ ] 对`x`完成如下三项操作：
  - 智能移除首尾的空白符。
  - 智能移除头部空白符。
  - 智能移除尾部空白符。

In [10]:
x = np.array(['   hello world   ', '\tsay something\n'], dtype=np.str)
stripped = np.char.strip(x)
lstripped = np.char.lstrip(x)
rstripped = np.char.rstrip(x)
print("stripped =", stripped)
print("lstripped =", lstripped)
print("rstripped =", rstripped)

stripped = ['hello world' 'say something']
lstripped = ['hello world   ' 'say something\n']
rstripped = ['   hello world' '\tsay something']


 8. [ ] 用空格来划分`x`中的元素。

In [11]:
x = np.array(['Hello my name is John'], dtype=np.str)
out = np.char.split(x)
out

array([list(['Hello', 'my', 'name', 'is', 'John'])], dtype=object)

 9. [ ] 根据换行符把`x`中的元素分解成多行形式。

In [12]:
x = np.array(['Hello\nmy name is John'], dtype=np.str)
out = np.char.splitlines(x)
out

array([list(['Hello', 'my name is John'])], dtype=object)

 10. [ ] 把数字字符串内容的`x`变成4位数字形式，用`0`进行左边补位。

In [13]:
x = np.array(['34'], dtype=np.str)
out = np.char.zfill(x, 4)
out

array(['0034'], dtype='<U4')

 11. [ ] 把`x`中的`John`替换成`Jim`。

In [14]:
x = np.array(['Hello my name is John'], dtype=np.str)
out = np.char.replace(x, 'John', 'Jim')
out

array(['Hello my name is Jim'], dtype='<U20')

## 比较操作

 12. [ ] 智能比较 `x1 == x2` 的结果。

In [15]:
x1 = np.array(['Hello', 'my', 'name', 'is', 'John'], dtype=np.str)
x2 = np.array(['Hello', 'my', 'name', 'is', 'Jim'], dtype=np.str)
out1 = x1 == x2
out2 = np.char.equal(x1, x2)
assert np.allclose(out1, out2)
out2

array([ True,  True,  True,  True, False])

 13. 智能比较 `x1 != x2`的结果

In [16]:
x1 = np.array(['Hello', 'my', 'name', 'is', 'John'], dtype=np.str)
x2 = np.array(['Hello', 'my', 'name', 'is', 'Jim'], dtype=np.str)
out1 = x1 != x2
out2 = np.char.not_equal(x1, x2)
assert np.allclose(out1, out2)
out2

array([False, False, False, False,  True])

## 字符串信息

 14. [ ] 智能数算`x`中每个字符串中`"l"`字母的个数。

In [17]:
x = np.array(['Hello', 'my', 'name', 'is', 'Lily'], dtype=np.str)
out = np.char.count(x, 'l')
out

array([2, 0, 0, 0, 1])

 15. [ ] 智能数算`x`中每个字符串中第一次出现`"l"`字母的索引位。

In [18]:
x = np.array(['Hello', 'my', 'name', 'is', 'Lily'], dtype=np.str)
out = np.char.find(x, 'l')
out

array([ 2, -1, -1, -1,  2])

 16. [ ] 对`x`进行如下三项操作：
  - 智能检查`x`中的每个元素是否只是数字内容。
  - 智能检查`x`中的每个元素是否只是小写形式。
  - 智能检查`x`中的每个元素是否只是全大写内容。

In [19]:
x = np.array(['Hello', 'I', 'am', '20', 'years', 'old'], dtype=np.str)
out1 = np.char.isdigit(x)
out2 = np.char.islower(x)
out3 = np.char.isupper(x)
print("Digits only =", out1)
print("Lower cases only =", out2)
print("Upper cases only =", out3)

Digits only = [False False False  True False False]
Lower cases only = [False False  True False  True  True]
Upper cases only = [False  True False False False False]


 17. [ ] 智能检查`x`中每个元素的内容是否都是以`"hi"`作为开始的。

In [20]:
x = np.array(['he', 'his', 'him', 'his'], dtype=np.str)
out = np.char.startswith(x, "hi")
out

array([False,  True,  True,  True])