In [25]:
import string

### 1. Matching text at the start or end of a string
* __str.startswith()__ or __str.endswith()__
* convenient way to perform basic prefix and suffix checking

In [1]:
filename = 'spam.txt'
filename.endswith('.txt')

True

In [2]:
filename.startswith('file:')

False

In [3]:
url = 'http://www.python.org'
url.startswith('http:')

True

* if we need to check against multiple choices, simply provide a tuple of possibilities to startswith() or endswith()

In [7]:
filenames = ['Makefile', 'foo.c', 'bar.py', 'spam.c', 'spam.h']

In [8]:
[name for name in filenames if name.endswith(('.c', '.h'))]

['foo.c', 'spam.c', 'spam.h']

In [9]:
any(name.endswith('.py') for name in filenames)

True

In [10]:
from urllib.request import urlopen

In [11]:
def read_data(name):
    if name.startswith(('http:', 'https:', 'ftp:')):
        return urlopen(name).read()
    else:
        with open(name) as f:
            return f.read()

In [12]:
choices = ['http:', 'ftp:']
url = 'http://www.python.org'

In [13]:
url.starswith(choices)

AttributeError: 'str' object has no attribute 'starswith'

In [14]:
url.startswith(tuple(choices))

True

### 2. Matching and searching for text patterns

In [19]:
text = 'yeah, all right, but yeah, but ok, yeah'

In [16]:
# exact match
text =='yeah'

False

In [17]:
# match at start or end
text.startswith('yeah')

True

In [20]:
# search for the location of the first occurence
# for more complicated matach, use regular expressing __re.match()__
text.find('ok')

31

### 3. Interpolating variables in string
* format()
* format_map(), var()

In [12]:
# using format() method
s = '{name} has {n} messages.'
s.format(name='Guido', n=37)

'Guido has 37 messages.'

In [14]:
# alternative
name = 'Guido'
n = 37
s.format_map(vars())

'Guido has 37 messages.'

In [15]:
class Info:
    def __init__(self, name, n):
        self.name = name
        self.n = n

In [16]:
a = Info('Guido', 37)
s.format_map(vars(a))

'Guido has 37 messages.'

In [17]:
# one downside of format and format_map is that they do not deal gracefully with missing values
s.format(name='Guido')

KeyError: 'n'

In [18]:
# to avoid this is to define an dictionary class with __missing__() method
class safesub(dict):
    def __missing__(self, key):
        return '{' + key + '}'

In [24]:
del n  #make sure n is undefined
s.format_map(safesub(vars()))

'Guido has {n} messages.'

In [32]:
name = 'Guido'
n = 37
s = string.Template('$name has $n messages.')
s.substitute(vars())

'Guido has 37 messages.'

### 4. Reforming text to a fixed number of columns

In [6]:
s = "It was the best of times, it was the worst of times, it was the age of wisdom, \
it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity,\
it was the season of Light, it was the season of Darkness, it was the spring of hope,\
it was the winter of despair, we had everything before us, we had nothing before us, \
we were all going direct to Heaven, we were all going direct the other way – in short, \
the period was so far like the present period, that some of its noisiest authorities \
insisted on its being received, for good or for evil, in the superlative degree of \
comparison only."

In [2]:
import textwrap

In [7]:
print(textwrap.fill(s, 70))

It was the best of times, it was the worst of times, it was the age of
wisdom, it was the age of foolishness, it was the epoch of belief, it
was the epoch of incredulity, it was the season of Light, it was the
season of Darkness, it was the spring of hope, it was the winter of
despair, we had everything before us, we had nothing before us, we
were all going direct to Heaven, we were all going direct the other
way – in short, the period was so far like the present period, that
some of its noisiest authorities insisted on its being received, for
good or for evil, in the superlative degree of comparison only.


In [8]:
print(textwrap.fill(s, 40))

It was the best of times, it was the
worst of times, it was the age of
wisdom, it was the age of foolishness,
it was the epoch of belief, it was the
epoch of incredulity, it was the season
of Light, it was the season of Darkness,
it was the spring of hope, it was the
winter of despair, we had everything
before us, we had nothing before us, we
were all going direct to Heaven, we were
all going direct the other way – in
short, the period was so far like the
present period, that some of its
noisiest authorities insisted on its
being received, for good or for evil, in
the superlative degree of comparison
only.


In [9]:
print(textwrap.fill(s, 40, initial_indent="    "))

    It was the best of times, it was the
worst of times, it was the age of
wisdom, it was the age of foolishness,
it was the epoch of belief, it was the
epoch of incredulity, it was the season
of Light, it was the season of Darkness,
it was the spring of hope, it was the
winter of despair, we had everything
before us, we had nothing before us, we
were all going direct to Heaven, we were
all going direct the other way – in
short, the period was so far like the
present period, that some of its
noisiest authorities insisted on its
being received, for good or for evil, in
the superlative degree of comparison
only.


In [10]:
print(textwrap.fill(s, 40, subsequent_indent="    "))

It was the best of times, it was the
    worst of times, it was the age of
    wisdom, it was the age of
    foolishness, it was the epoch of
    belief, it was the epoch of
    incredulity, it was the season of
    Light, it was the season of
    Darkness, it was the spring of hope,
    it was the winter of despair, we had
    everything before us, we had nothing
    before us, we were all going direct
    to Heaven, we were all going direct
    the other way – in short, the period
    was so far like the present period,
    that some of its noisiest
    authorities insisted on its being
    received, for good or for evil, in
    the superlative degree of comparison
    only.


### 5. Stripping unwanted characers from strings
* strip()
* lstrip(), rstrip()

In [33]:
# whitespace stripping
s = '   hello world  \n'
s.strip()

'hello world'

In [34]:
s.lstrip()

'hello world  \n'

In [35]:
s.rstrip()

'   hello world'

In [36]:
# characer stripping
t = '----hello====='
t.lstrip('-')

'hello====='

In [37]:
t.strip('-=')

'hello'

### 6. Aligning text strings

In [38]:
text = 'Hello World'
text.ljust(20)

'Hello World         '

In [39]:
text.rjust(20)

'         Hello World'

In [40]:
text.center(20)

'    Hello World     '

In [41]:
text.rjust(20, '=')



In [42]:
text.center(20,'*')

'****Hello World*****'

In [43]:
format(text, '>20')

'         Hello World'

In [44]:
format(text, '<20')

'Hello World         '

In [45]:
format(text, '^20')

'    Hello World     '

In [47]:
format(text, '=>20s')



In [48]:
format(text, '*^20s')

'****Hello World*****'

In [49]:
'{:>10s} {:>10s}'.format('Hello', 'World')

'     Hello      World'

In [51]:
x =1.2345
format(x, '>10')

'    1.2345'

In [52]:
format(x, '^10.2f')

'   1.23   '

### 7. Combining and concatenating strings

In [53]:
parts = ['is', 'San Jose', 'not', 'Los Angeles?']
' '.join(parts)

'is San Jose not Los Angeles?'

In [54]:
','.join(parts)

'is,San Jose,not,Los Angeles?'

In [55]:
''.join(parts)

'isSan JosenotLos Angeles?'

In [56]:
a = 'is San Jose'
b = 'not Los Angeles?'

In [57]:
a+' '+b

'is San Jose not Los Angeles?'

In [58]:
print("{} {}".format(a,b))

is San Jose not Los Angeles?


In [59]:
print(a+' '+b)

is San Jose not Los Angeles?


In [60]:
# building output from lots of small strings， can write that code as a generator function, 
# using yield to emit fragments
def sample():
    yield 'Is'
    yield 'San Jose'
    yield 'Not'
    yield 'Los Angeles?'

In [62]:
# to assemble the fragments together, simply join them using join():
text = ' '.join(sample())
text

'Is San Jose Not Los Angeles?'