In [8]:
# 6.1. Reading and Writing CSV Data
# Read CSV rows as lists
import csv
with open('files/stocks.csv') as f:
    f_csv = csv.reader(f)
    headers = next(f_csv)
    print(headers)
    for row in f_csv:
        print(row)
print(type(row))
print(headers[4],': ',row[4])

['Symbol', 'Price', 'Date', 'Time', 'Change', 'Volume']
['AA', '39.48', '6/11/2007', '9:36am', '-0.18', '181800']
['AIG', '71.38', '6/11/2007', '9:36am', '-0.15', '195500']
['AXP', '62.58', '6/11/2007', '9:36am', '-0.46', '935000']
['BA', '98.31', '6/11/2007', '9:36am', '+0.12', '104800']
['C', '53.08', '6/11/2007', '9:36am', '-0.25', '360900']
['CAT', '78.29', '6/11/2007', '9:36am', '-0.23', '225400']
<class 'list'>
Change :  -0.23


In [20]:
# 6.1.
# Read CSV rows as namedtules (the column headers are valid Python identifiers)
from collections import namedtuple
with open('files/stocks.csv') as f:
    f_csv = csv.reader(f)
    headings = next(f_csv)
    Row = namedtuple('Row', headings)
    for r in f_csv:
        row = Row(*r)
        print(row)
print(row.Symbol)

Row(Symbol='AA', Price='39.48', Date='6/11/2007', Time='9:36am', Change='-0.18', Volume='181800')
Row(Symbol='AIG', Price='71.38', Date='6/11/2007', Time='9:36am', Change='-0.15', Volume='195500')
Row(Symbol='AXP', Price='62.58', Date='6/11/2007', Time='9:36am', Change='-0.46', Volume='935000')
Row(Symbol='BA', Price='98.31', Date='6/11/2007', Time='9:36am', Change='+0.12', Volume='104800')
Row(Symbol='C', Price='53.08', Date='6/11/2007', Time='9:36am', Change='-0.25', Volume='360900')
Row(Symbol='CAT', Price='78.29', Date='6/11/2007', Time='9:36am', Change='-0.23', Volume='225400')
CAT


In [26]:
# 6.1.
# Read CSV rows as dictionaries
import csv
with open('files/stocks.tsv') as f:
    f_csv = csv.DictReader(f)
    for row in f_csv:
        print(row)

{'Symbol\tPrice\tDate\tTime\tChange\tVolume': 'AA\t39.48\t"6/11/2007"\t"9:36am"\t-0.18\t181800'}
{'Symbol\tPrice\tDate\tTime\tChange\tVolume': 'AIG\t71.38\t"6/11/2007"\t"9:36am"\t-0.15\t195500'}
{'Symbol\tPrice\tDate\tTime\tChange\tVolume': 'AXP\t62.58\t"6/11/2007"\t"9:36am"\t-0.46\t935000'}
{'Symbol\tPrice\tDate\tTime\tChange\tVolume': 'BA\t98.31\t"6/11/2007"\t"9:36am"\t+0.12\t104800'}
{'Symbol\tPrice\tDate\tTime\tChange\tVolume': 'C\t53.08\t"6/11/2007"\t"9:36am"\t-0.25\t360900'}
{'Symbol\tPrice\tDate\tTime\tChange\tVolume': 'CAT\t78.29\t"6/11/2007"\t"9:36am"\t-0.23\t225400'}


In [32]:
# 6.1.
# Example of reading tab-separated values
with open('files/stocks.tsv') as f:
    f_tsv = csv.reader(f, delimiter='\t')
    for row in f_tsv:
            print(row)

['Symbol', 'Price', 'Date', 'Time', 'Change', 'Volume']
['AA', '39.48', '6/11/2007', '9:36am', '-0.18', '181800']
['AIG', '71.38', '6/11/2007', '9:36am', '-0.15', '195500']
['AXP', '62.58', '6/11/2007', '9:36am', '-0.46', '935000']
['BA', '98.31', '6/11/2007', '9:36am', '+0.12', '104800']
['C', '53.08', '6/11/2007', '9:36am', '-0.25', '360900']
['CAT', '78.29', '6/11/2007', '9:36am', '-0.23', '225400']


In [22]:
# 6.1.
# write lists to CSV file
headers = ['Symbol','Price','Date','Time','Change','Volume']
rows = [('AA', 39.48, '6/11/2007', '9:36am', -0.18, 181800),
        ('AIG', 71.38, '6/11/2007', '9:36am', -0.15, 195500),
        ('AXP', 62.58, '6/11/2007', '9:36am', -0.46, 935000),
        ]
with open('files/stocks.csv','w') as f:
    f_csv = csv.writer(f)
    f_csv.writerow(headers)
    f_csv.writerows(rows)

In [23]:
# 6.1.
# write dictionaries to CSV file
headers = ['Symbol', 'Price', 'Date', 'Time', 'Change', 'Volume']
rows = [{'Symbol':'AA', 'Price':39.48, 'Date':'6/11/2007',
        'Time':'9:36am', 'Change':-0.18, 'Volume':181800},
        {'Symbol':'AIG', 'Price': 71.38, 'Date':'6/11/2007',
        'Time':'9:36am', 'Change':-0.15, 'Volume': 195500},
        {'Symbol':'AXP', 'Price': 62.58, 'Date':'6/11/2007',
        'Time':'9:36am', 'Change':-0.46, 'Volume': 935000},
        ]
with open('stocks.csv','w') as f:
    f_csv = csv.DictWriter(f, headers)
    f_csv.writeheader()
    f_csv.writerows(rows)

In [33]:
# 6.1.
# Read CSV rows with nonvalid Python identifiers
import re
with open('files/stocks.csv') as f:
    f_csv = csv.reader(f)
    headers = [ re.sub('[^a-zA-Z_]', '_', h) for h in next(f_csv) ]
    Row = namedtuple('Row', headers)
    for r in f_csv:
        row = Row(*r)
        print(row)

Row(Symbol='AA', Price='39.48', Date='6/11/2007', Time='9:36am', Change='-0.18', Volume='181800')
Row(Symbol='AIG', Price='71.38', Date='6/11/2007', Time='9:36am', Change='-0.15', Volume='195500')
Row(Symbol='AXP', Price='62.58', Date='6/11/2007', Time='9:36am', Change='-0.46', Volume='935000')


In [36]:
# 6.1.
# Read CSV rows with data type conversions
col_types = [str, float, str, str, float, int]
with open('files/stocks.csv') as f:
    f_csv = csv.reader(f)
    headers = next(f_csv)
    for row in f_csv:
        # Apply conversions to the row items
        row = tuple(convert(value) for convert, value in zip(col_types, row))
        print(row)
print()

# Reading as dicts with type conversion
field_types = [ ('Price', float),
                ('Change', float),
                ('Volume', int) ]
with open('files/stocks.csv') as f:
    for row in csv.DictReader(f):
        row.update((key, conversion(row[key])) for key, conversion in field_types)
        print(row)

('AA', 39.48, '6/11/2007', '9:36am', -0.18, 181800)
('AIG', 71.38, '6/11/2007', '9:36am', -0.15, 195500)
('AXP', 62.58, '6/11/2007', '9:36am', -0.46, 935000)

{'Price': 39.48, 'Symbol': 'AA', 'Time': '9:36am', 'Change': -0.18, 'Date': '6/11/2007', 'Volume': 181800}
{'Price': 71.38, 'Symbol': 'AIG', 'Time': '9:36am', 'Change': -0.15, 'Date': '6/11/2007', 'Volume': 195500}
{'Price': 62.58, 'Symbol': 'AXP', 'Time': '9:36am', 'Change': -0.46, 'Date': '6/11/2007', 'Volume': 935000}


In [9]:
# 6.2. Reading and Writing JSON Data
import json
data = {
    'name' : 'ACME',
    'shares' : 100,
    'price' : 542.23
    }
json_str = json.dumps(data)
print(data)
print(json_str)

# Writing JSON data
with open('files/data.json', 'w') as f:
    json.dump(data, f)

# Reading data back
with open('files/data.json', 'r') as f:
    data = json.load(f)
print(data)

{'price': 542.23, 'shares': 100, 'name': 'ACME'}
{"price": 542.23, "shares": 100, "name": "ACME"}
{'price': 542.23, 'shares': 100, 'name': 'ACME'}


In [41]:
# 6.2. 
# JSON mapping
d= {'a': True, 'b': False, 'c': None}
print(json.dumps(d))
print()

# JSON from website:
from urllib.request import urlopen
import json
u = urlopen('https://www.onet.pl/manifest.json')
resp = json.loads(u.read().decode('utf-8'))
from pprint import pprint
pprint(resp)
print()

{"a": true, "b": false, "c": null}

{'display': 'standalone',
 'gcm_sender_id': '211794955562',
 'gcm_user_visible_only': True,
 'name': 'Onet.pl',
 'short_name': 'Onet'}



In [46]:
# 6.2.
# decode JSON data, preserving its order in an OrderedDict:
s=  '{"name": "ACME", "shares": 50, "price": 490.1}'
from collections import OrderedDict
data = json.loads(s, object_pairs_hook=OrderedDict)
print(data)
print()

# turn a JSON dictionary into a Python object:
class JSONObject:
    def __init__(self, d):
        self.__dict__ = d
data = json.loads(s, object_hook=JSONObject)
print('name:', data.name, '\tshares:', data.shares,'\tprice:', data.price)
print()

# output nicely formatted - somtethingnot working for variable: data ???
print(json.dumps(s))
print(json.dumps(s, indent=4))
print(json.dumps(s, sort_keys=True))

OrderedDict([('name', 'ACME'), ('shares', 50), ('price', 490.1)])

name: ACME 	shares: 50 	price: 490.1

"{\"name\": \"ACME\", \"shares\": 50, \"price\": 490.1}"
"{\"name\": \"ACME\", \"shares\": 50, \"price\": 490.1}"
"{\"name\": \"ACME\", \"shares\": 50, \"price\": 490.1}"


In [87]:
# 6.2.
# (c) Encoding instances
class Point:
    def __init__(self, x, y):
        self.x = x
        self.y = y

def serialize_instance(obj):
    d = { '__classname__' : type(obj).__name__ }
    print('d:',d)
    d.update(vars(obj))
    print('d:',d)
    return d

p = Point(2,3)
s = json.dumps(p, default=serialize_instance)
print("serialise:",s)

# (d) Decoding instances
classes = {
    'Point' : Point
}

def unserialize_object(d):
    clsname = d.pop('__classname__', None)
    print('clsname:', clsname)
    if clsname:
        cls = classes[clsname]
        obj = cls.__new__(cls)
        for key, value in d.items():
            setattr(obj, key, value)
            print('key', key,'value', value)
        return obj
    else:
        return d

a = json.loads(s, object_hook=unserialize_object)
print('a.x:',a.x,'a.y:',a.y)


d: {'__classname__': 'Point'}
d: {'__classname__': 'Point', 'y': 3, 'x': 2}
serialise: {"__classname__": "Point", "y": 3, "x": 2}
clsname: Point
key y value 3
key x value 2
a.x: 2 a.y: 3


In [101]:
# 6.3. Parsing Simple XML Data
from urllib.request import urlopen
from xml.etree.ElementTree import parse
# Download the RSS feed and parse it
u = urlopen('http://planet.python.org/rss20.xml')
doc = parse(u)
print('doc:', doc)
e = doc.find('channel/title')
print('e:', e)
print('e.tag:', e.tag, 'e.text:', e.text)
print(e.get('some_attribute'))
print('-'*20)

# Extract and output tags of interest
for item in doc.iterfind('channel/item'):
    title = item.findtext('title')
    date = item.findtext('pubDate')
    link = item.findtext('link')
    print(title)
    print(date)
    print(link)
    print()

doc: <xml.etree.ElementTree.ElementTree object at 0x7f8930145eb8>
e: <Element 'title' at 0x7f89300eadb8>
e.tag: title e.text: Planet Python
None
--------------------
Possbility and Probability: Debugging Flask, requests, curl, and form data
Mon, 28 Aug 2017 14:09:40 +0000
https://ironboundsoftware.com/blog/2017/08/28/debugging-flask-requests-curl-and-form-data/

Mike Driscoll: Back to School Python Book Sale 2017
Mon, 28 Aug 2017 13:52:54 +0000
http://www.blog.pythonlibrary.org/2017/08/28/back-to-school-python-book-sale-2017/

Chris Moffitt: Building a Bullet Graph in Python
Mon, 28 Aug 2017 13:38:00 +0000
http://pbpython.com/bullet-graph.html

Doug Hellmann: smtplib — Simple Mail Transfer Protocol Client — PyMOTW 3
Mon, 28 Aug 2017 13:00:40 +0000
http://feeds.doughellmann.com/~r/doughellmann/python/~3/pE7PsR_Vd1c/

Mike Driscoll: PyDev of the Week: Shannon Turner
Mon, 28 Aug 2017 12:30:36 +0000
http://www.blog.pythonlibrary.org/2017/08/28/pydev-of-the-week-shannon-turner/

Import Pyth

In [103]:
# 6.4. Parsing Huge XML Files Incrementally
from xml.etree.ElementTree import iterparse
def parse_and_remove(filename, path):
    path_parts = path.split('/')
    doc = iterparse(filename, ('start', 'end'))
    # Skip the root element
    next(doc)

    tag_stack = []
    elem_stack = []
    for event, elem in doc:
        if event == 'start':
            tag_stack.append(elem.tag)
            elem_stack.append(elem)
        elif event == 'end':
            if tag_stack == path_parts:
                yield elem
                elem_stack[-2].remove(elem)
            try:
                tag_stack.pop()
                elem_stack.pop()
            except IndexError:
                pass

# Find zip code with most potholes

from collections import Counter
potholes_by_zip = Counter()

data = parse_and_remove('files/potholes.xml', 'row/row')
for pothole in data:
    potholes_by_zip[pothole.findtext('zip')] += 1

for zipcode, num in potholes_by_zip.most_common():
    print(zipcode, num)


60617 13
60626 8
60651 7
60623 6
60647 6
60613 4
60625 4
60636 4
60609 4
60628 4
60622 3
60641 3
60619 3
60629 3
60657 3
60649 2
60618 2
60644 2
60638 2
60656 2
60654 2
60652 1
60634 1
60643 1
60612 1
60631 1
60614 1
60707 1
60632 1
60616 1
60630 1
60639 1
60660 1
60637 1


In [107]:
# 6.5. Turning a Dictionary into XML
from xml.etree.ElementTree import Element
def dict_to_xml(tag, d):
    ''' Turn a simple dict of key/value pairs into XML '''
    elem = Element(tag)
    for key, val in d.items():
        child = Element(key)
        child.text = str(val)
        elem.append(child)
    return elem
s = { 'name': 'GOOG', 'shares': 100, 'price':490.1 }
e = dict_to_xml('stock', s)
from xml.etree.ElementTree import tostring
print(tostring(e))
e.set('_id','1234') # attach attributes to an element
print(tostring(e))

b'<stock><price>490.1</price><shares>100</shares><name>GOOG</name></stock>'
b'<stock _id="1234"><price>490.1</price><shares>100</shares><name>GOOG</name></stock>'


In [114]:
# 6.5. 
def dict_to_xml_str(tag, d):
    ''' Turn a simple dict of key/value pairs into XML '''
    parts = ['<{}>'.format(tag)]
    for key, val in d.items():
        parts.append('<{0}>{1}</{0}>'.format(key,val))
    parts.append('</{}>'.format(tag))
    return ''.join(parts)
d = { 'name' : '<spam>' }
print(dict_to_xml_str('item',d))   # String creation -> BUG

# Proper XML creation
e = dict_to_xml('item',d)
print(tostring(e))
print()

from xml.sax.saxutils import escape, unescape
print(escape('<spam>'))
print(unescape('<spam>'))

<item><name><spam></name></item>
b'<item><name>&lt;spam&gt;</name></item>'

&lt;spam&gt;
<spam>


In [119]:
# 6.6. Parsing, Modifying, and Rewriting XML
from xml.etree.ElementTree import parse, Element
doc = parse('files/pred.xml')
root = doc.getroot()
print(root)
# Remove a few elements
root.remove(root.find('sri'))
root.remove(root.find('cr'))

# Insert a new element after <nm>...</nm>
nm_index = root.getchildren().index(root.find('nm'))
print(nm_index)

e = Element('spam')
e.text = 'This is a test'
root.insert(nm_index + 1, e)

# Write back to a file
doc.write('files/newpred.xml', xml_declaration=True)

<Element 'stop' at 0x7f8930102098>
1


In [25]:
# 6.7. Parsing XML Documents with Namespaces
from xml.etree.ElementTree import parse, Element
doc= parse('files/author.xml')
# Some queries that work
print(doc.findtext('author'))
print(doc.find('content'))
# A query involving a namespace (doesn't work)
print(doc.find('content/html'))
# Works if fully qualified
print(doc.find('content/{http://www.w3.org/1999/xhtml}html'))
# Doesn't work
print(doc.findtext('content/{http://www.w3.org/1999/xhtml}html/head/title'))
# Fully qualified
print(doc.findtext('content/{http://www.w3.org/1999/xhtml}html/'
                   '{http://www.w3.org/1999/xhtml}head/'
                   '{http://www.w3.org/1999/xhtml}title'))
print('to są ''połączone teksty')

David Beazley
<Element 'content' at 0x7ffa54318a48>
None
<Element '{http://www.w3.org/1999/xhtml}html' at 0x7ffa54318b38>
None
Hello World
to są połączone teksty


In [37]:
# 6.7.
# wrapping namespace
class XMLNamespaces:
    def __init__(self, **kwargs):
        self.namespaces = {}
        for name, uri in kwargs.items():
            self.register(name, uri)
            print('item.name:', name, 'item.uri:', uri)
        print('namespaces:', self.namespaces)
    def register(self, name, uri):
        self.namespaces[name] = '{'+uri+'}'
    def __call__(self, path):
        print('format_map:', path.format_map(self.namespaces))
        return path.format_map(self.namespaces)
ns = XMLNamespaces(html='http://www.w3.org/1999/xhtml')
print()
print(doc.find(ns('content/{html}html')))
print()
print(doc.findtext(ns('content/{html}html/{html}head/{html}title')))

item.name: html item.uri: http://www.w3.org/1999/xhtml
namespaces: {'html': '{http://www.w3.org/1999/xhtml}'}

format_map: content/{http://www.w3.org/1999/xhtml}html
<Element '{http://www.w3.org/1999/xhtml}html' at 0x7ffa54318b38>

format_map: content/{http://www.w3.org/1999/xhtml}html/{http://www.w3.org/1999/xhtml}head/{http://www.w3.org/1999/xhtml}title
Hello World


In [41]:
# 6.7.
from xml.etree.ElementTree import iterparse
for evt, elem in iterparse('files/author.xml', ('end', 'start-ns', 'end-ns')): print(evt, elem)

print(elem)   # This is the topmost element

end <Element 'author' at 0x7ffa5430bc28>
start-ns ('', 'http://www.w3.org/1999/xhtml')
end <Element '{http://www.w3.org/1999/xhtml}title' at 0x7ffa542b19f8>
end <Element '{http://www.w3.org/1999/xhtml}head' at 0x7ffa542b16d8>
end <Element '{http://www.w3.org/1999/xhtml}h1' at 0x7ffa542b1228>
end <Element '{http://www.w3.org/1999/xhtml}body' at 0x7ffa542b1138>
end <Element '{http://www.w3.org/1999/xhtml}html' at 0x7ffa542b17c8>
end-ns None
end <Element 'content' at 0x7ffa5430bd68>
end <Element 'top' at 0x7ffa5430bb38>
<Element 'top' at 0x7ffa5430bb38>


In [43]:
# 6.8. Interacting with a Relational Database
stocks = [
    ('GOOG', 100, 490.1),
    ('AAPL', 50, 545.75),
    ('FB', 150, 7.45),
    ('HPQ', 75, 33.2),
    ]
import sqlite3
# connect to the database
db = sqlite3.connect('files/database.db')
# create a cursor
c = db.cursor()
c.execute('create table portfolio (symbol text, shares integer, price real)')
db.commit()
# insert a sequence of rows into the data
c.executemany('insert into portfolio values (?,?,?)', stocks)
db.commit()

('GOOG', 100, 490.1)
('AAPL', 50, 545.75)
('FB', 150, 7.45)
('HPQ', 75, 33.2)


In [46]:
# 6.8.
# perform a query
for row in db.execute('select * from portfolio'): print(row)
print()
min_price = 100
for row in db.execute('select * from portfolio where price >= ?', (min_price,)): print(row)

('GOOG', 100, 490.1)
('AAPL', 50, 545.75)
('FB', 150, 7.45)
('HPQ', 75, 33.2)

('GOOG', 100, 490.1)
('AAPL', 50, 545.75)


In [53]:
# 6.9. Decoding and Encoding Hexadecimal Digits
# Initial byte string
s = b'hello'
# Encode as hex
import binascii
h = binascii.b2a_hex(s)
print(h)
# Decode back to bytes
print(binascii.a2b_hex(h))
print()

import base64
h = base64.b16encode(s)
print(h)
print(h.decode('ascii'))
print(base64.b16decode(h))


b'68656c6c6f'
b'hello'

b'68656C6C6F'
68656C6C6F
b'hello'


In [58]:
# 6.10. Decoding and Encoding Base64
# Some byte data
s = b'hello'
import base64
# Encode as Base64
a = base64.b64encode(s)
print(a)
print(a.decode('ascii'))
# Decode from Base64
print(base64.b64decode(a))

b'aGVsbG8='
aGVsbG8=
b'hello'


In [59]:
# 6.11. Reading and Writing Binary Arrays of Structures
from struct import Struct
def write_records(records, format, f):
    ''' Write a sequence of tuples to a binary file of structures. '''
    record_struct = Struct(format)
    for r in records:
        print(r)
        print(*r)
        f.write(record_struct.pack(*r))
records = [ (1, 2.3, 4.5),
            (6, 7.8, 9.0),
            (12, 13.4, 56.7) ]
with open('files/data.b', 'wb') as f:
    write_records(records, '<idd', f)

(1, 2.3, 4.5)
1 2.3 4.5
(6, 7.8, 9.0)
6 7.8 9.0
(12, 13.4, 56.7)
12 13.4 56.7


In [68]:
# 6.11.
# read the file incrementally in chunks
from struct import Struct
def read_records(format, f):
    # '<idd' - Little endian 32-bit integer, two double precision floats
    record_struct = Struct(format)
    chunks = iter(lambda: f.read(record_struct.size), b'')
    return (record_struct.unpack(chunk) for chunk in chunks)
with open('files/data.b','rb') as f:
    for rec in read_records('<idd', f):
        print(rec)
print()

# read the file entirely 
def unpack_records(format, data):
    record_struct = Struct(format)
    return (record_struct.unpack_from(data, offset) 
            for offset in range(0, len(data), record_struct.size))
with open('files/data.b', 'rb') as f:
    data = f.read()
    for rec in unpack_records('<idd', data):
        print(rec)

(1, 2.3, 4.5)
(6, 7.8, 9.0)
(12, 13.4, 56.7)

(1, 2.3, 4.5)
(6, 7.8, 9.0)
(12, 13.4, 56.7)


In [77]:
# 6.11.
from struct import Struct
record_struct = Struct('<idd')
print(record_struct)
print(record_struct.size)
print(record_struct.pack(1, 2.0, 3.0))
print(record_struct.unpack(_))
import struct
print(struct.pack('<idd', 1, 2.0, 3.0))
print(struct.unpack('<idd', _))

<Struct object at 0x7ffa543c4810>
20
b'\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x08@'
(1, 2.0, 3.0)
b'\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x08@'
(1, 2.0, 3.0)


In [80]:
# 6.11.
f = open('files/data.b', 'rb')
chunks = iter(lambda: f.read(20), b'')
for chk in chunks:
    print(chk)

b'\x01\x00\x00\x00ffffff\x02@\x00\x00\x00\x00\x00\x00\x12@'
b'\x06\x00\x00\x00333333\x1f@\x00\x00\x00\x00\x00\x00"@'
b'\x0c\x00\x00\x00\xcd\xcc\xcc\xcc\xcc\xcc*@\x9a\x99\x99\x99\x99YL@'


In [83]:
# 6.11.
from collections import namedtuple
Record = namedtuple('Record', ['kind','x','y'])
with open('files/data.b', 'rb') as f:
    records = (Record(*r) for r in read_records('<idd', f))
    for r in records:
        print(r.kind, r.x, r.y)

1 2.3 4.5
6 7.8 9.0
12 13.4 56.7


In [86]:
# 6.11.
import numpy as np
f = open('files/data.b', 'rb')
records = np.fromfile(f, dtype='<i,<d,<d')
print(records)
print(records[0])

[( 1,   2.3,   4.5) ( 6,   7.8,   9. ) (12,  13.4,  56.7)]
(1,  2.3,  4.5)


In [93]:
# 6.12. Reading Nested and Variable-Sized Binary Structures
# READ IT AGAIN !!!
import struct
import itertools
polys = [
    [ (1.0, 2.5), (3.5, 4.0), (2.5, 1.5) ],
    [ (7.0, 1.2), (5.1, 3.0), (0.5, 7.5), (0.8, 9.0) ],
    [ (3.4, 6.3), (1.2, 0.5), (4.6, 9.2) ],
    ]
print(list(itertools.chain(*polys)))
def write_polys(filename, polys):
    # Determine bounding box
    flattened = list(itertools.chain(*polys))
    min_x = min(x for x, y in flattened)
    max_x = max(x for x, y in flattened)
    min_y = min(y for x, y in flattened)
    max_y = max(y for x, y in flattened)
    with open(filename, 'wb') as f:
        f.write(struct.pack('<iddddi', 0x1234, min_x, min_y, max_x, max_y,len(polys)))
        for poly in polys:
            size = len(poly) * struct.calcsize('<dd')
            f.write(struct.pack('<i', size+4))
            for pt in poly:
                f.write(struct.pack('<dd', *pt))
# Call it with our polygon data
write_polys('files/polys.bin', polys)
print()

def read_polys(filename):
    with open(filename, 'rb') as f:
        # Read the header
        header = f.read(40)
        file_code, min_x, min_y, max_x, max_y, num_polys = \
            struct.unpack('<iddddi', header)
        polys = []
        for n in range(num_polys):
            pbytes, = struct.unpack('<i', f.read(4))
            poly = []
            for m in range(pbytes // 16):
                pt = struct.unpack('<dd', f.read(16))
                poly.append(pt)
            polys.append(poly)
    return polys
p = read_polys('files/polys.bin')
print(p)

[(1.0, 2.5), (3.5, 4.0), (2.5, 1.5), (7.0, 1.2), (5.1, 3.0), (0.5, 7.5), (0.8, 9.0), (3.4, 6.3), (1.2, 0.5), (4.6, 9.2)]

[[(1.0, 2.5), (3.5, 4.0), (2.5, 1.5)], [(7.0, 1.2), (5.1, 3.0), (0.5, 7.5), (0.8, 9.0)], [(3.4, 6.3), (1.2, 0.5), (4.6, 9.2)]]


In [1]:
# 6.12. 
# READ IT AGAIN !!!
# Example 1
import struct
class StructField:
    ''' Descriptor representing a simple structure field '''
    def __init__(self, format, offset):
        self.format = format
        self.offset = offset
    def __get__(self, instance, cls):
        if instance is None:
            return self
        else:
            r = struct.unpack_from(self.format, instance._buffer, self.offset)
            return r[0] if len(r) == 1 else r

class Structure:
    def __init__(self, bytedata):
        self._buffer = memoryview(bytedata)
        
class PolyHeader(Structure):
    file_code = StructField('<i', 0)
    min_x = StructField('<d', 4)
    min_y = StructField('<d', 12)
    max_x = StructField('<d', 20)
    max_y = StructField('<d', 28)
    num_polys = StructField('<i', 36)

f = open('files/polys.bin', 'rb')
phead = PolyHeader(f.read(40))
print(phead.file_code == 0x1234)
print(phead.min_x)
print(phead.min_y)
print(phead.max_x)
print(phead.max_y)
print(phead.num_polys)

True
0.5
0.5
7.0
9.2
3


In [100]:
# 6.12.
# READ IT AGAIN !!!
# Example 2: Introduction of a metaclass

import struct

class StructField:
    def __init__(self, format, offset):
        self.format = format
        self.offset = offset
    def __get__(self, instance, cls):
        if instance is None:
            return self
        else:
            r =  struct.unpack_from(self.format, instance._buffer, self.offset)
            return r[0] if len(r) == 1 else r
        
class StructureMeta(type):
    ''' Metaclass that automatically creates StructField descriptors '''
    def __init__(self, clsname, bases, clsdict):
        fields = getattr(self, '_fields_', [])
        byte_order = ''
        offset = 0
        for format, fieldname in fields:
            if format.startswith(('<','>','!','@')):
                byte_order = format[0]
                format = format[1:]
            format = byte_order + format
            setattr(self, fieldname, StructField(format, offset))
            offset += struct.calcsize(format)
        setattr(self, 'struct_size', offset)

class Structure(metaclass=StructureMeta):
    def __init__(self, bytedata):
        self._buffer = memoryview(bytedata)

    @classmethod
    def from_file(cls, f):
        return cls(f.read(cls.struct_size))

if __name__ == '__main__':
    class PolyHeader(Structure):classmethod
        _fields_ = [
            ('<i', 'file_code'),
            ('d', 'min_x'),
            ('d', 'min_y'),
            ('d', 'max_x'),
            ('d', 'max_y'),
            ('i', 'num_polys')
            ]

    f = open('files/polys.bin','rb')
    phead = PolyHeader.from_file(f)
    print(phead.file_code == 0x1234)
    print('min_x=', phead.min_x)
    print('max_x=', phead.max_x)
    print('min_y=', phead.min_y)
    print('max_y=', phead.max_y)
    print('num_polys=', phead.num_polys)

True
min_x= 0.5
max_x= 7.0
min_y= 0.5
max_y= 9.2
num_polys= 3


In [3]:
# 6.12.
# Example 3: Nested structure support

import struct

class StructField:
    ''' Descriptor representing a simple structure field '''
    def __init__(self, format, offset):
        self.format = format
        self.offset = offset
    def __get__(self, instance, cls):
        if instance is None:
            return self
        else:
            r =  struct.unpack_from(self.format, instance._buffer, self.offset)
            return r[0] if len(r) == 1 else r

class NestedStruct:
    ''' Descriptor representing a nested structure '''
    def __init__(self, name, struct_type, offset):
        self.name = name
        self.struct_type = struct_type
        self.offset = offset
    def __get__(self, instance, cls):
        if instance is None:
            return self
        else:
            data = instance._buffer[self.offset:
                                    self.offset+self.struct_type.struct_size]
            result = self.struct_type(data)
            # Save resulting structure back on instance to avoid
            # further recomputation of this step
            setattr(instance, self.name, result)
            return result
        
class StructureMeta(type):
    ''' Metaclass that automatically creates StructField descriptors '''
    def __init__(self, clsname, bases, clsdict):
        fields = getattr(self, '_fields_', [])
        byte_order = ''
        offset = 0
        for format, fieldname in fields:
            if isinstance(format, StructureMeta):
                setattr(self, fieldname, NestedStruct(fieldname, format, offset))
                offset += format.struct_size
            else:
                if format.startswith(('<','>','!','@')):
                    byte_order = format[0]
                    format = format[1:]
                format = byte_order + format
                setattr(self, fieldname, StructField(format, offset))
                offset += struct.calcsize(format)
        setattr(self, 'struct_size', offset)

class Structure(metaclass=StructureMeta):
    def __init__(self, bytedata):
        self._buffer = memoryview(bytedata)

    @classmethod
    def from_file(cls, f):
        return cls(f.read(cls.struct_size))

if __name__ == '__main__':
    class Point(Structure):
        _fields_ = [
            ('<d', 'x'),
            ('d', 'y')
            ]

    class PolyHeader(Structure):
        _fields_ = [
            ('<i', 'file_code'),
            (Point, 'min'),
            (Point, 'max'),
            ('i', 'num_polys')
            ]

    f = open('files/polys.bin','rb')
    phead = PolyHeader.from_file(f)
    print(phead.file_code == 0x1234)
    print('min.x=', phead.min.x)
    print('max.x=', phead.max.x)
    print('min.y=', phead.min.y)
    print('max.y=', phead.max.y)
    print('num_polys=', phead.num_polys)

True
min.x= 0.5
max.x= 7.0
min.y= 0.5
max.y= 9.2
num_polys= 3


In [6]:
# 6.12.
# Example 4: Variable sized chunks

import struct

class StructField:
    ''' Descriptor representing a simple structure field '''
    def __init__(self, format, offset):
        self.format = format
        self.offset = offset
    def __get__(self, instance, cls):
        if instance is None:
            return self
        else:
            r =  struct.unpack_from(self.format, 
                                    instance._buffer, self.offset)
            return r[0] if len(r) == 1 else r

class NestedStruct:
    '''Descriptor representing a nested structure '''
    def __init__(self, name, struct_type, offset):
        self.name = name
        self.struct_type = struct_type
        self.offset = offset
    def __get__(self, instance, cls):
        if instance is None:
            return self
        else:
            data = instance._buffer[self.offset:
                                    self.offset+self.struct_type.struct_size]
            result = self.struct_type(data)
            setattr(instance, self.name, result)
            return result
        
class StructureMeta(type):
    ''' Metaclass that automatically creates StructField descriptors '''
    def __init__(self, clsname, bases, clsdict):
        fields = getattr(self, '_fields_', [])
        byte_order = ''
        offset = 0
        for format, fieldname in fields:
            if isinstance(format, StructureMeta):
                setattr(self, fieldname, NestedStruct(fieldname, format, offset))
                offset += format.struct_size
            else:
                if format.startswith(('<','>','!','@')):
                    byte_order = format[0]
                    format = format[1:]
                format = byte_order + format
                setattr(self, fieldname, StructField(format, offset))
                offset += struct.calcsize(format)
        setattr(self, 'struct_size', offset)

class Structure(metaclass=StructureMeta):
    def __init__(self, bytedata):
        self._buffer = memoryview(bytedata)

    @classmethod
    def from_file(cls, f):
        return cls(f.read(cls.struct_size))

class SizedRecord:
    def __init__(self, bytedata):
        self._buffer = memoryview(bytedata)
        
    @classmethod
    def from_file(cls, f, size_fmt, includes_size=True):
        sz_nbytes = struct.calcsize(size_fmt)
        sz_bytes = f.read(sz_nbytes)
        sz, = struct.unpack(size_fmt, sz_bytes)
        buf = f.read(sz - includes_size * sz_nbytes)
        return cls(buf)

    def iter_as(self, code):
        if isinstance(code, str):
            s = struct.Struct(code)
            for off in range(0, len(self._buffer), s.size):
                yield s.unpack_from(self._buffer, off)
        elif isinstance(code, StructureMeta):
            size = code.struct_size
            for off in range(0, len(self._buffer), size):
                data = self._buffer[off:off+size]
                yield code(data)

if __name__ == '__main__':
    class Point(Structure):
        _fields_ = [
            ('<d', 'x'),
            ('d', 'y')
            ]

    class PolyHeader(Structure):
        _fields_ = [
            ('<i', 'file_code'),
            (Point, 'min'),
            (Point, 'max'),
            ('i', 'num_polys')
            ]

    def read_polys(filename):
        polys = []
        with open(filename, 'rb') as f:
            phead = PolyHeader.from_file(f)
            for n in range(phead.num_polys):
                rec = SizedRecord.from_file(f, '<i')
                poly = [ (p.x, p.y)
                         for p in rec.iter_as(Point) ]
                polys.append(poly)
        return polys

    polys = read_polys('files/polys.bin')
    print(polys)


[[(1.0, 2.5), (3.5, 4.0), (2.5, 1.5)], [(7.0, 1.2), (5.1, 3.0), (0.5, 7.5), (0.8, 9.0)], [(3.4, 6.3), (1.2, 0.5), (4.6, 9.2)]]


In [52]:
# 6.13. Summarizing Data and Performing Statistics
import pandas
# Read a CSV file, skipping last line
rats = pandas.read_csv('files/rats.csv', skipfooter=1, engine='python')
print(rats)
print()
# Investigate range of values for a certain field
print('Current Activity unique:\n', rats['Current Activity'].unique())
print()
# Filter the data
crew_dispatched = rats[rats['Current Activity'] == 'Completion Date']
print('crew_dispatched:\n', crew_dispatched)
print()
# Find 10 most rat-infested ZIP codes in Chicago
print('ZIP Code:\n', rats['ZIP Code'].value_counts()[:10])
print()
# Group by completion date
dates = rats.groupby('Current Activity')
print('len(dates):', len(dates))
print()
# Determine counts on each day
date_counts = dates.size()
print('date_counts:\n', date_counts[0:10])
print()
# Sort the counts
# date_counts.sort()
# print(date_counts[-10:])
print()

          Current Activity  ZIP Code            value
0            Creation Date     74055  non-null values
1                   Status     74055  non-null values
2          Completion Date     72154  non-null values
3          Completion Date     72154  non-null values
4   Service Request Number     74055  non-null values
5  Type of Service Request     74055  non-null values
6                 Latitude     74043  non-null values
7                Longitude     74043  non-null values
8                 Location     74043  non-null values

Current Activity unique:
 ['Creation Date' 'Status' 'Completion Date' 'Service Request Number'
 'Type of Service Request' 'Latitude' 'Longitude' 'Location']

crew_dispatched:
   Current Activity  ZIP Code            value
2  Completion Date     72154  non-null values
3  Completion Date     72154  non-null values

ZIP Code:
 74055    4
74043    3
72154    2
Name: ZIP Code, dtype: int64

len(dates): 8

date_counts:
 Current Activity
Completion Date         

AttributeError: 'Series' object has no attribute 'sort'