# 6.1 Dictionaries

## 6.1.3 Example: Polynomials as Dictionaries

In [1]:
# Dictionary Comprehension
from math import factorial
d = {k: (-1)**k / factorial(k) for k in range(5)}
d

{0: 1.0, 1: -1.0, 2: 0.5, 3: -0.16666666666666666, 4: 0.041666666666666664}

# Dictionaries with default values

In [5]:
p1 = {'a': 1, 'b': 2}
p1.get('c', 0)

0

In [6]:
from collections import defaultdict
p2 = defaultdict(lambda: 0)
p2.update(p1)
p2['c']

0

In [7]:
from collections import defaultdict
p3 = defaultdict(float)    # float() is called if key is not in dict.
p3.update(p1)
p3['c']

0.0

In [8]:
p3.keys()    # c is now a key of the dictionary, because it was called!

dict_keys(['a', 'b', 'c'])

In [9]:
sorted(p3)    # key-sorted

['a', 'b', 'c']

# 6.2 Strings

## 6.2.1 Common Operations on Strings

In [23]:
s = "großartig!"

In [24]:
s2 = s.upper()
s2

'GROSSARTIG!'

In [25]:
s3 = s2.lower()
s3

'grossartig!'

## 6.2.2 Example: Reading Pairs of Numbers

In [39]:
with open("stuff/read_pairs1.dat") as f:
    text = f.read()
    pairs = []
    for word in text.split():
        pairs.append(tuple([float(x) for x in word[1:-1].split(',')]))
pairs

[(1.3, 0.0),
 (-1.0, 2.0),
 (3.0, -1.5),
 (0.0, 1.0),
 (1.0, 0.0),
 (1.0, 1.0),
 (0.0, -0.01),
 (10.5, -1.0),
 (2.5, -2.5)]

In [54]:
with open("stuff/read_pairs2.dat") as f:
    text = ''.join(f.read().split())[1:-1].split(')(')
    pairs = []
    for word in text:
        pairs.append(tuple([float(x) for x in word.split(',')]))
pairs

[(1.3, 0.0),
 (-1.0, 2.0),
 (3.0, -1.5),
 (0.0, 1.0),
 (1.0, 0.0),
 (1.0, 1.0),
 (0.0, -0.01),
 (10.5, -1.0),
 (2.5, -2.5)]

# 6.3 Reading Data from Web Pages

## 6.3.2 How to Access Web Pages in Programs

In [55]:
from urllib.request import urlopen

In [68]:
with urlopen("http://pota.to/") as f:
    print(f.read())

b'<html>\n\n\n\n<head>\n\n<title>Pota.to</title>\n\n</head>\n\n\n\n<body background="images/potato-repeated-mini.jpg">\n\n\n\n<p align="left">&nbsp;</p>\n\n\n\n<p align="left">&nbsp;</p>\n\n<div align="center"><center>\n\n\n\n<table border="0" width="50%" height="50%" cellspacing="0" cellpadding="7">\n\n  <tr>\n\n    <td width="33%"></td>\n\n    <td width="33%"></td>\n\n    <td width="34%"></td>\n\n  </tr>\n\n  <tr>\n\n    <td width="33%"></td>\n\n    <td width="33%" bgcolor="#FFFFFF"><p align="center">Nothing&nbsp;yet,&nbsp;nosy.</td>\n\n    <td width="34%"></td>\n\n  </tr>\n\n  <tr>\n\n    <td width="33%"></td>\n\n    <td width="33%"></td>\n\n    <td width="34%"></td>\n\n  </tr>\n\n</table>\n\n</center></div>\n\n</body>\n\n</html>\n\n'


In [66]:
# mit Requests (high-level api, supporting authentication, etc.):
import requests
print(requests.get("http://pota.to/").text)

<html>



<head>

<title>Pota.to</title>

</head>



<body background="images/potato-repeated-mini.jpg">



<p align="left">&nbsp;</p>



<p align="left">&nbsp;</p>

<div align="center"><center>



<table border="0" width="50%" height="50%" cellspacing="0" cellpadding="7">

  <tr>

    <td width="33%"></td>

    <td width="33%"></td>

    <td width="34%"></td>

  </tr>

  <tr>

    <td width="33%"></td>

    <td width="33%" bgcolor="#FFFFFF"><p align="center">Nothing&nbsp;yet,&nbsp;nosy.</td>

    <td width="34%"></td>

  </tr>

  <tr>

    <td width="33%"></td>

    <td width="33%"></td>

    <td width="34%"></td>

  </tr>

</table>

</center></div>

</body>

</html>




# 6.3.5 Handling Non-English Text

In [88]:
# -*- coding: utf-8 -*-

In [130]:
# ascii
for i in range(128):
    print(chr(i), end=", ")

 , , , , , , , , , 	, 
, , , , , , , , , , , , , , , , , , , , , ,  , !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, :, ;, <, =, >, ?, @, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z, [, \, ], ^, _, `, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, {, |, }, ~, , 

In [89]:
def check(s):
    print("%s, %s: %s (%d)" % (s, s.__class__.__name__, repr(s), len(s)))

In [90]:
for char in "C. F. Gauß":
    print(ord(char))

67
46
32
70
46
32
71
97
117
223


In [126]:
print("\xc3\x9f")    # but utf-8 _is_ unicode, right?
print("\xdf")        # why then, should this be "unicode" and the above "utf-8"??
# also, "DF" is smaller than 256, so I really don't get where C3 9F comes from.
# for the exciting answers to some of this, see a few cells below :)

Ã
ß


In [111]:
"ß".encode('utf-8')

b'\xc3\x9f'

In [117]:
"ß".encode('latin-1')    # huh?

b'\xdf'

In [127]:
# okay. again
s = "Gauß"    # same as u"Gauß" in python 3
print(s)
print(ascii(s))    # python 2 representation
print(repr(s))     # python 3 representation
print(s.encode('utf-8'))

Gauß
'Gau\xdf'
'Gauß'
b'Gau\xc3\x9f'


In [125]:
s = b"Gau\xc3\x9f"    # same as s = "Gau\xc3\x9f" in python 2
print(s)
print(ascii(s))    # python 2 representation
print(repr(s))     # python 3 representation
s2 = s.decode('utf-8')
print(s2)
print(ascii(s2))
print(repr(s2))

b'Gau\xc3\x9f'
b'Gau\xc3\x9f'
b'Gau\xc3\x9f'
Gauß
'Gau\xdf'
'Gauß'


In [132]:
# summerizing, the utf8 table gives the following information about the ß character:
# unicode code point: U+00DF
# character: ß
# utf-8: c3 9f
# name: LATIN SMALL LETTER SHARP S

In [147]:
# so this should also work:
"\u00df"    # \u needs a 16-bit hex number (4 digits)

ß


In [148]:
# same as
"\U000000df"    # \U needs a 32-bit hex number (8 digits)

'ß'

In [160]:
# same as
"\xdf"    # this is the unicode code point as well.
# <=> u"\xdf"

'ß'

In [158]:
# same as
b"\xc3\x9f".decode()    # b-strings are utf-8 byte-strings.
# <=> b"\xc3\x9f".decode('utf-8')

'ß'

In [153]:
# same as
"\N{LATIN SMALL LETTER SHARP S}"

'ß'