# Chapter 7: Mangle Data Like a Pro

In [1]:
def unicode_test(value):
    import unicodedata
    name = unicodedata.name(value)
    value2 = unicodedata.lookup(name)
    print('value="%s", name="%s", value2="%s"' % (value, name, value2))

In [2]:
unicode_test('A')

value="A", name="LATIN CAPITAL LETTER A", value2="A"


In [3]:
unicode_test('$')

value="$", name="DOLLAR SIGN", value2="$"


In [4]:
unicode_test('\u00a2')

value="¢", name="CENT SIGN", value2="¢"


In [5]:
unicode_test('\u20ac')

value="€", name="EURO SIGN", value2="€"


In [6]:
unicode_test('\u2603')

value="☃", name="SNOWMAN", value2="☃"


In [7]:
place = 'caf\u00e9'
place

'café'

In [8]:
u_umlaut = '\N{LATIN SMALL LETTER U WITH DIAERESIS}'
u_umlaut

'ü'

In [9]:
snowman = '\u2603'
len(snowman)

1

In [10]:
ds = snowman.encode('utf-8')
len(ds)

3

In [11]:
ds

b'\xe2\x98\x83'

In [12]:
place = 'caf\u00e9'
place

'café'

In [13]:
type(place)

str

In [14]:
place_bytes = place.encode('utf-8')
place_bytes

b'caf\xc3\xa9'

In [15]:
place2 = place_bytes.decode('utf-8')
place2

'café'

In [16]:
import re
source = 'Young Frankenstein'
m = re.match('You', source)
if m:
    print(m.group())

You


In [17]:
m = re.search('Frank', source)
if m:
    print(m.group())

Frank


In [18]:
m = re.search('.*Frank', source)
if m:
    print(m.group())

Young Frank


In [19]:
source2 = 'Young Frankenstein Frankly'
m = re.match('.*Frank', source2)
if m:
    print(m.group())

Young Frankenstein Frank


In [20]:
m = re.search('Frank', source2)
if m:
    print(m.group())

Frank


In [21]:
print(re.split('n', source2))

['You', 'g Fra', 'ke', 'stei', ' Fra', 'kly']


In [22]:
print(re.findall('\w', source2))

['Y', 'o', 'u', 'n', 'g', 'F', 'r', 'a', 'n', 'k', 'e', 'n', 's', 't', 'e', 'i', 'n', 'F', 'r', 'a', 'n', 'k', 'l', 'y']


In [23]:
print(re.findall('ly$', source2))

['ly']


### 7.1

In [24]:
import unicodedata
mystery = '\U0001f4a9'
name = unicodedata.name(mystery)
print(name)
print(mystery)

PILE OF POO
💩


### 7.2

In [25]:
pop_bytes = mystery.encode('utf-8')
print(pop_bytes)

b'\xf0\x9f\x92\xa9'


### 7.3

In [26]:
pop_string = pop_bytes.decode('utf-8')
print(pop_string)

💩


### 7.4

In [27]:
print('My kitty cat likes %s,\n\
      My kitty cat likes %s,\n\
      My kitty cat fell on his %s,\n\
      And now thinks he''s a %s.'% ('roast beef', 'ham', 'head', 'clam'))

My kitty cat likes roast beef,
      My kitty cat likes ham,
      My kitty cat fell on his head,
      And now thinks hes a clam.


### 7.5

In [28]:
letter = '''
Dear {salutation} {name},
Thank you for your letter. We are sorry that our {product} {verbed} in your {room}. Please note that it should never be used in a {room}, especially near any {animals}.

Send us your recipt and {amount} for shipping and handling. We will send you another {product} that, in our tests, is {percent}% less likely to have {verbed}.

Thank you for your support.
Sincerely,
{spokesman}
{job_title}
'''

### 7.6

In [29]:
response = {'salutation': 'Mr.',
            'name': 'Charlie',
            'product': 'NB',
            'verbed': 'broke',
            'room': 'A',
            'animals': 'dogs',
            'amount': '3',
            'percent': '10',
            'spokesman': 'Dr. Evil',
            'job_title': 'Cleaner'}
print(letter.format(**response))


Dear Mr. Charlie,
Thank you for your letter. We are sorry that our NB broke in your A. Please note that it should never be used in a A, especially near any dogs.

Send us your recipt and 3 for shipping and handling. We will send you another NB that, in our tests, is 10% less likely to have broke.

Thank you for your support.
Sincerely,
Dr. Evil
Cleaner



### 7.7

In [30]:
mammoth = '''
We have seen the Queen of cheese, 
Laying quietly at your ease, 
Gently fanned by evening breeze -- 
Thy fair form no flies dare seize. 

All gaily dressed soon you'll go 
To the great Provincial Show, 
To be admired by many a beau 
In the city of Toronto. 

Cows numerous as a swarm of bees -- 
Or as the leaves upon the trees -- 
It did require to make thee please, 
And stand unrivalled Queen of Cheese. 

May you not receive a scar as 
We have heard that Mr. Harris 
Intends to send you off as far as 
The great World's show at Paris. 

Of the youth -- beware of these -- 
For some of them might rudely squeeze 
And bite your cheek; then songs or glees 
We could not sing o' Queen of Cheese. 

We'rt thou suspended from baloon, 
You'd cast a shade, even at noon; 
Folks would think it was the moon 
About to fall and crush them soon.
'''

### 7.8

In [31]:
a = re.findall('(?<=' ')c\w*', mammoth)
a

['cheese', 'cial', 'city', 'ceive', 'car', 'cheek', 'could', 'cast', 'crush']

In [32]:
b = re.findall(r'\bc\w*', mammoth)
b

['cheese', 'city', 'cheek', 'could', 'cast', 'crush']

### 7.9

In [33]:
c = re.findall(r'\bc\w{3}\b', mammoth)
c

['city', 'cast']

### 7.10

In [34]:
d = re.findall(r'\b[\w\']*l\b', mammoth)
d

['All', "you'll", 'Provincial', 'fall']

### 7.11

In [35]:
e = re.findall(r'\b\w*[aeiou]{3}[^aeiou\s]*\w*\b', mammoth)
e

['Queen', 'quietly', 'beau', 'Queen', 'squeeze', 'Queen']

### 7.12

In [36]:
a = '47494638396101000100800000000000ffffff21f90401000000002c000000000100010000020144003b'
import binascii
gif = binascii.unhexlify(a)
len(gif)

42

### 7.13

In [37]:
gif[:6]

b'GIF89a'

### 7.14

In [38]:
import struct
width, height = struct.unpack('>HH', gif[6:10])
width, height

(256, 256)