# 字符串与文本操作

## 字符串是一个线性结构

In [1]:
s = 'i love python'

In [2]:
s[0]

'i'

In [3]:
s[-1]

'n'

In [4]:
s[3:8]

'ove p'

In [5]:
s[::-1]

'nohtyp evol i'

In [6]:
for c in s:
    print(c)

i
 
l
o
v
e
 
p
y
t
h
o
n


### 字符串是不可变的

In [7]:
s[1]

' '

In [8]:
s[1] = '_'

TypeError: 'str' object does not support item assignment

In [9]:
t = (1, 2)

In [10]:
t[0] = 3

TypeError: 'tuple' object does not support item assignment

## 字符串的格式化

### print style format

In [12]:
'l love %s' % ('Python', )

'l love Python'

In [13]:
'I love %(name)s' % {'name': 'Python'}

'I love Python'

In [14]:
'I love %(name)s, %(name)s is my first lang' % {'name': 'Python'}

'I love Python, Python is my first lang'

In [15]:
'I love %s, %s is my first lang' % ('Python', 'Python')

'I love Python, Python is my first lang'

#### Conversion

In [16]:
'%d' % 4

'4'

In [17]:
'%d' % 'str'

TypeError: %d format: a number is required, not str

In [18]:
'%d' % 3.4

'3'

In [19]:
'%E' % 0.000000001

'1.000000E-09'

In [20]:
'%E' % 100000000000000000000000000

'1.000000E+26'

In [21]:
'%g' % 0.001

'0.001'

In [22]:
'%g' % 0.0000000001

'1e-10'

#### flags

In [23]:
'%10d' % 1

'         1'

In [24]:
'%010d' % 1

'0000000001'

### format方法

In [26]:
'{0}, {name}'.format('hello', name='world')

'hello, world'

## 字符串的常用操作

In [27]:
help(str)

Help on class str in module builtins:

class str(object)
 |  str(object='') -> str
 |  str(bytes_or_buffer[, encoding[, errors]]) -> str
 |  
 |  Create a new string object from the given object. If encoding or
 |  errors is specified, then the object must expose a data buffer
 |  that will be decoded using the given encoding and error handler.
 |  Otherwise, returns the result of object.__str__() (if defined)
 |  or repr(object).
 |  encoding defaults to sys.getdefaultencoding().
 |  errors defaults to 'strict'.
 |  
 |  Methods defined here:
 |  
 |  __add__(self, value, /)
 |      Return self+value.
 |  
 |  __contains__(self, key, /)
 |      Return key in self.
 |  
 |  __eq__(self, value, /)
 |      Return self==value.
 |  
 |  __format__(...)
 |      S.__format__(format_spec) -> str
 |      
 |      Return a formatted version of S as described by format_spec.
 |  
 |  __ge__(self, value, /)
 |      Return self>=value.
 |  
 |  __getattribute__(self, name, /)
 |      Return getatt

In [28]:
lst = ['I', 'love', 'Python']

' '.join(lst)

'I love Python'

In [29]:
'_'.join(lst)

'I_love_Python'

In [30]:
ret = ''
for s in lst:
    ret += s
    ret += ' '
ret

'I love Python '

In [31]:
help(str.split)

Help on method_descriptor:

split(...)
    S.split(sep=None, maxsplit=-1) -> list of strings
    
    Return a list of the words in S, using sep as the
    delimiter string.  If maxsplit is given, at most maxsplit
    splits are done. If sep is not specified or is None, any
    whitespace string is a separator and empty strings are
    removed from the result.



In [32]:
help(str.rsplit)

Help on method_descriptor:

rsplit(...)
    S.rsplit(sep=None, maxsplit=-1) -> list of strings
    
    Return a list of the words in S, using sep as the
    delimiter string, starting at the end of the string and
    working to the front.  If maxsplit is given, at most maxsplit
    splits are done. If sep is not specified, any whitespace string
    is a separator.



In [33]:
s

'Python'

In [34]:
s = 'I love Python'

In [35]:
s.split()

['I', 'love', 'Python']

In [36]:
s.split('o')

['I l', 've Pyth', 'n']

In [37]:
s.split(' ', 1)

['I', 'love Python']

In [39]:
s = 'root:x:0:0:root:/root:/bin/bash'

In [40]:
s.split(':', 1)

['root', 'x:0:0:root:/root:/bin/bash']

In [41]:
username, _ = s.split(':', 1)

In [42]:
username

'root'

In [43]:
s = 'URL:http://www.magedu.com'

In [44]:
key, value = s.split(':', 1)

In [45]:
print(key)
print(value)

URL
http://www.magedu.com


In [46]:
s.split(':')

['URL', 'http', '//www.magedu.com']

In [47]:
s.rsplit(':')

['URL', 'http', '//www.magedu.com']

In [48]:
s.rsplit(':', 1)

['URL:http', '//www.magedu.com']

In [49]:
help(s.splitlines)

Help on built-in function splitlines:

splitlines(...) method of builtins.str instance
    S.splitlines([keepends]) -> list of strings
    
    Return a list of the lines in S, breaking at line boundaries.
    Line breaks are not included in the resulting list unless keepends
    is given and true.



In [50]:
s = '''
I love Python
I also love linux
'''

In [51]:
s

'\nI love Python\nI also love linux\n'

In [52]:
s.splitlines()

['', 'I love Python', 'I also love linux']

In [53]:
s.splitlines(True)

['\n', 'I love Python\n', 'I also love linux\n']

In [54]:
s.splitlines(False)

['', 'I love Python', 'I also love linux']

In [55]:
help(s.partition)

Help on built-in function partition:

partition(...) method of builtins.str instance
    S.partition(sep) -> (head, sep, tail)
    
    Search for the separator sep in S, and return the part before it,
    the separator itself, and the part after it.  If the separator is not
    found, return S and two empty strings.



In [56]:
s = 'root:x:0:0:root:/root:/bin/bash'

In [57]:
s

'root:x:0:0:root:/root:/bin/bash'

In [58]:
s.partition(':')

('root', ':', 'x:0:0:root:/root:/bin/bash')

In [59]:
h, _, t = s.partition(':')

In [60]:
t

'x:0:0:root:/root:/bin/bash'

In [64]:
h, _, t = t.partition(':')

In [65]:
t

'0:0:root:/root:/bin/bash'

In [63]:
t

'x:0:0:root:/root:/bin/bash'

In [66]:
s.rpartition(':')

('root:x:0:0:root:/root', ':', '/bin/bash')

In [67]:
s = 'I love Python'

In [68]:
s.capitalize()

'I love python'

In [69]:
s = 'i love python'

In [70]:
s.capitalize()

'I love python'

In [71]:
s.title()

'I Love Python'

In [72]:
s.upper()

'I LOVE PYTHON'

In [73]:
s.lower()

'i love python'

In [74]:
s = s.title()

In [75]:
s

'I Love Python'

In [76]:
s.swapcase()

'i lOVE pYTHON'

In [77]:
help(s.center)

Help on built-in function center:

center(...) method of builtins.str instance
    S.center(width[, fillchar]) -> str
    
    Return S centered in a string of length width. Padding is
    done using the specified fill character (default is a space)



In [78]:
s = 'Python'

In [79]:
s.center(20)

'       Python       '

In [80]:
s.center(20, '*')

'*******Python*******'

In [81]:
help(s.ljust)

Help on built-in function ljust:

ljust(...) method of builtins.str instance
    S.ljust(width[, fillchar]) -> str
    
    Return S left-justified in a Unicode string of length width. Padding is
    done using the specified fill character (default is a space).



In [82]:
s.ljust(20)

'Python              '

In [83]:
s.ljust(20, '*')

'Python**************'

In [84]:
s.rjust(20, '*')

'**************Python'

In [85]:
s = s.center(20)

In [86]:
s

'       Python       '

In [87]:
s.strip()

'Python'

In [88]:
s = 'abc\n'

In [89]:
s

'abc\n'

In [90]:
s.strip()

'abc'

In [91]:
f =  open('/etc/passwd', 'r')

In [92]:
f.readline()

'root:x:0:0:root:/root:/bin/bash\n'

In [93]:
f.readline().strip()

'bin:x:1:1:bin:/bin:/sbin/nologin'

In [94]:
f.close()

In [95]:
f =  open('/etc/passwd', 'r')

In [96]:
for line in f.readlines():
    line = line.strip()
    if line.startswith('root:'):
        _, shell = line.rsplit(':', 1)
        print(shell)


/bin/bash


In [97]:
help(s.startswith)

Help on built-in function startswith:

startswith(...) method of builtins.str instance
    S.startswith(prefix[, start[, end]]) -> bool
    
    Return True if S starts with the specified prefix, False otherwise.
    With optional start, test S beginning at that position.
    With optional end, stop comparing S at that position.
    prefix can also be a tuple of strings to try.



In [98]:
help(s.endswith)

Help on built-in function endswith:

endswith(...) method of builtins.str instance
    S.endswith(suffix[, start[, end]]) -> bool
    
    Return True if S ends with the specified suffix, False otherwise.
    With optional start, test S beginning at that position.
    With optional end, stop comparing S at that position.
    suffix can also be a tuple of strings to try.



In [99]:
s

'abc\n'

In [100]:
s = 'root:x:0:0:root:/root:/bin/bash\n'

In [101]:
s

'root:x:0:0:root:/root:/bin/bash\n'

In [102]:
s.count('r')

3

In [103]:
s.count('root')

3

In [104]:
s.index('r')

0

In [105]:
s.index('s')

29

In [106]:
s.index('root')

0

In [107]:
s.index('bash')

27

In [108]:
help(s.find)

Help on built-in function find:

find(...) method of builtins.str instance
    S.find(sub[, start[, end]]) -> int
    
    Return the lowest index in S where substring sub is found,
    such that sub is contained within S[start:end].  Optional
    arguments start and end are interpreted as in slice notation.
    
    Return -1 on failure.



In [109]:
s.find('root')

0

In [110]:
s.find('root', 3)

11

In [111]:
s.find('comyn')

-1

In [112]:
s.index('comyn')

ValueError: substring not found

In [113]:
help(s.replace)

Help on built-in function replace:

replace(...) method of builtins.str instance
    S.replace(old, new[, count]) -> str
    
    Return a copy of S with all occurrences of substring
    old replaced by new.  If the optional argument count is
    given, only the first count occurrences are replaced.



In [114]:
s

'root:x:0:0:root:/root:/bin/bash\n'

In [115]:
s.replace('root', 'comyn')

'comyn:x:0:0:comyn:/comyn:/bin/bash\n'

In [116]:
s.replace('root', 'comyn', 1)

'comyn:x:0:0:root:/root:/bin/bash\n'

In [117]:
s.replace('root', 'comyn', -1)

'comyn:x:0:0:comyn:/comyn:/bin/bash\n'

In [118]:
s.replace('root', 'comyn', -2)

'comyn:x:0:0:comyn:/comyn:/bin/bash\n'