# STRINGS

In [5]:
# A string is a sequence of characters
fruit = 'banana'
length = len(fruit)
letter = fruit[1]

# To get the last letter of a string, you might be tempted to try something like this:
last = fruit[length]
last

IndexError: string index out of range

In [6]:
a = 'test' + 3

TypeError: Can't convert 'int' object to str implicitly

In [16]:
# The reason for the IndexError is that there is no letter in ’banana’ with the index 6.

last = fruit [length - 1]
last

'a'

In [13]:
# Sometimes you need to combine a string with something that isn't a string

print ()"I have " + str(2) + " coconuts!")

I have 2 coconuts!


## Traversal through a string with a loop

In [26]:
# Traversal = processing a string one character at a time
# One way to write a traversal is with a while loop

index = 0
while index < len(fruit):
    letter = fruit[index]
    print (letter)
    index = index + 1

# The loop condition is index < len(fruit), so when index is equal to the length of the string, 
# the condition is false, and the body of the loop is not executed.

b
a
n
a
n
a


In [27]:
# Another waay of writing a traversal is with a for loop:
for i in fruit:
    print (i)

b
a
n
a
n
a


## String slices

In [29]:
# The operator [n:m] returns the part of the string from the “n-eth” character to the “m-eth” character, 
# including the first but excluding the last

s = 'Monty Python'
print s[:5]
print s[6:13]

Monty
Python


In [1]:
# A string slice can take a third index that specifies the “step size” that is, the number of spaces between successive characters
# A step size of 2 means every other character:

fruit = 'banana'
fruit[:5:2]

'bnn'

In [2]:
# A step size of -1 goes through the word backwards, generating a reversed string

fruit[::-1]

'ananab'

## Strings are immutable

In [32]:
# Strings are IMMUTABLE, which means you can't change an existing string.

greeting = 'Hello, world!'
greeting[0] = 'J'

TypeError: 'str' object does not support item assignment

In [33]:
# The best you can do is create a new string that is a variation on the original:

greeting = 'Hello, world!'
new_greeting = 'J' + greeting[1:]
print (new_greeting)

Jello, world!


## Looping and counting

In [35]:
# The following program counts the number of times the letter "a" appears in a string:

word = 'banana'
count = 0
for letter in word:
    if letter == 'a':
        count = count + 1
print (count)

# This program demonstrates another pattern of computation called a counter
# The variable count is initialized to 0 and then incremented each time an a is found. 
# When the loop exits, count contains the result—the total number of "a"s

3


In [6]:
# The following function prints all the letters from word1 that also appear in word2:
def in_both(word1, word2):
    for letter in word1:
        if letter in word2:
            print (letter)
x = 'abcd'
y = 'aabteiu'
in_both(a, b)

a
b


## Strings comparison

In [39]:
'an' in 'banana'

True

In [40]:
word = 'banana'
if word == 'banana':
    print ('All right')

All right


In [1]:
# For example, comparison operations are useful for putting words in alphabetical order:

word = raw_input('Write a name: ')
if word < 'banana':
    print ('Your word, ' + word + ', comes before banana.')
elif word > 'banana':
    print ()'Your word, ' + word + ', comes after banana.')
else:
    'All right.'


Write a name: Lemon
Your word, Lemon, comes before banana.


In [None]:
# But "lemon" does not come before "banana"!
# Python does not handle uppercase and lowercase letters the same way that people do.
# All the uppercase letters come before all the lowercase letters.
# A common way to address this problem is to convert strings to a standard format, such as all lowercase

## String METHODS

In [None]:
# Strings are an example of Python objects. An object contains both data (the actual string itself) as well as methods, 
# which are effectively functions that are built into the object and are available to any instance of the object.

In [4]:
# dir function lists all the methods available for an object
# type function shows the type of an  object

stuff = 'Hello world'
type(stuff)


str

In [5]:
dir(stuff)

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__getslice__',
 '__gt__',
 '__hash__',
 '__init__',
 '__le__',
 '__len__',
 '__lt__',
 '__mod__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmod__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '_formatter_field_name_split',
 '_formatter_parser',
 'capitalize',
 'center',
 'count',
 'decode',
 'encode',
 'endswith',
 'expandtabs',
 'find',
 'format',
 'index',
 'isalnum',
 'isalpha',
 'isdigit',
 'islower',
 'isspace',
 'istitle',
 'isupper',
 'join',
 'ljust',
 'lower',
 'lstrip',
 'partition',
 'replace',
 'rfind',
 'rindex',
 'rjust',
 'rpartition',
 'rsplit',
 'rstrip',
 'split',
 'splitlines',
 'startswith',
 'strip',
 'swapcase',
 'title',
 'translate',
 'upper',
 'zfill']

In [7]:
word = 'banana'
new_word = word.upper()
print new_word

BANANA


In [8]:
# The empty parentheses indicate that this method takes no argument.

In [25]:
# there is a string method named find that is remarkably similar to the function we wrote before.
# In this example, we invoke find on word and pass the letter we are looking for as a parameter

word = 'banana'
index = word.find('b')
print index

0


In [26]:
# Actually, the find method is more general than our function; it can find substrings, not just characters:
word.find('na')

2

In [27]:
# It can take as a second argument the index where it should start:
word.find('na', 3)

4

In [None]:
'''The documentation uses a syntax that might be confusing. For example, in find(sub[, start[, end]]), the brackets indicate 
optional arguments. So sub is required, but start is optional, and if you include start, then end is optional.'''

In [12]:
# One common task is to remove white space (spaces, tabs, or newlines) from the beginning and end of a string using the strip method:

line = ' Here we go    '
line.strip()

'Here we go'

In [16]:
# Some methods such as startswith return boolean values.

line = 'Please have a nice day'
line.startswith('please')

False

In [17]:
# startswith requires case to match so sometimes we take a line and map it all to lowercase befor checking with lower method:

line.lower()

'please have a nice day'

In [28]:
line.lower().startswith('p')

True

## Parsing strings

In [None]:
# Often, we want to look into a string and find a substring.
# And we wanted to pull out only the second half of the address (i.e. uct.ac.za). We can do this by using the find method and string slicing
''' From stephen.marquard@ uct.ac.za Sat Jan 5 09:14:16 2008 '''
# 1) First, we will find the position of the at-sign in the string.
# 2) Then we will find the position of the first space after the at-sign

In [21]:
data = 'From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008'
atpos = data.find('@')
print (atpos)

21


In [30]:
sppos =  data.find(' ',atpos)
print (sppos)

31


In [31]:
host = data[atpos+1:sppos]
print (host)

uct.ac.za


# Format operator

In [14]:
# The % operator after a string is used to combine a string with variables.
# The % operator will replace a %s in the string with the string variable that comes after it.

string_1 = "Camelot"
string_2 = "place"
print "Let's not go to %s. 'Tis a silly %s." % (string_1, string_2)

Let's not go to Camelot. 'Tis a silly place.


In [32]:
# The format operator, % allows us to construct strings, replacing parts of the strings with the data stored in variables

camels = 42
'%d' %camels

# The format sequence '%d' means that the second operand should be formatted as an integer (d stands for “decimal”):

'42'

In [33]:
camels = 42
'I have spotted %d camels.' % camels

'I have spotted 42 camels.'

In [1]:
# this prints the value of total with exactly two numbers after the decimal

total = 54.6354
print ('%.2f' %total)

54.64


In [2]:
# If there is more than one format sequence in the string, the second argument has to be a tuple. 
# Each format sequence is matched with an element of the tuple, in order.
# The following example uses '%d' to format an integer, '%g' to format a floating point number and '%s' to format a string:

'In %d years I have spotted %g %s.' % (3, 0.1, 'camels')

'In 3 years I have spotted 0.1 camels.'

In [5]:
# use the backslash to fix the problem, like this:

'There\'s a snake in my boot!'

"There's a snake in my boot!"