# Iterators

In [1]:
# Data

import csv
f = open('Data/portfolio.csv')
f_csv = csv.reader(f)
headers = next(f_csv)
rows = list(f_csv)

In [2]:
# Basic iteration

for row in rows:
    print(row)

['AA', '100', '32.20']
['IBM', '50', '91.10']
['CAT', '150', '83.44']
['MSFT', '200', '51.23']
['GE', '95', '40.37']
['MSFT', '50', '65.10']
['IBM', '100', '70.44']


In [3]:
# Unpacking

for name, shares, price in rows:
    print(name, shares, price)

AA 100 32.20
IBM 50 91.10
CAT 150 83.44
MSFT 200 51.23
GE 95 40.37
MSFT 50 65.10
IBM 100 70.44


In [4]:
# Throwaway

for _, _, price in rows:
    print(price)

32.20
91.10
83.44
51.23
40.37
65.10
70.44


In [5]:
# Unpacking with wildcard

for name, *values in rows:
    print(name, values)

# ** vs. *

# def foo(*args):
#     for a in args:
#         print(a)        

# foo(1)
# # 1

# foo(1, 2, 3)
# # 1
# # 2
# # 3

# def bar(**kwargs):
#     for a in kwargs:
#         print(a, kwargs[a])  

# bar(name='one', age=27)
# # name one
# # age 27

AA ['100', '32.20']
IBM ['50', '91.10']
CAT ['150', '83.44']
MSFT ['200', '51.23']
GE ['95', '40.37']
MSFT ['50', '65.10']
IBM ['100', '70.44']


In [10]:
# Group by name
from collections import defaultdict

byname = defaultdict(list)
for name, *data in rows:
    byname[name].append(data)

for shares, price in byname['IBM']:
    print(shares, price)

byname

50 91.10
100 70.44


defaultdict(list,
            {'AA': [['100', '32.20']],
             'IBM': [['50', '91.10'], ['100', '70.44']],
             'CAT': [['150', '83.44']],
             'MSFT': [['200', '51.23'], ['50', '65.10']],
             'GE': [['95', '40.37']]})

In [11]:
# Enumerate

for i, row in enumerate(rows):
    print(i, row)

0 ['AA', '100', '32.20']
1 ['IBM', '50', '91.10']
2 ['CAT', '150', '83.44']
3 ['MSFT', '200', '51.23']
4 ['GE', '95', '40.37']
5 ['MSFT', '50', '65.10']
6 ['IBM', '100', '70.44']


In [13]:
# Enumerate with unpacking

for i, (name, *data) in enumerate(rows):
    print(i, name, data)

0 AA ['100', '32.20']
1 IBM ['50', '91.10']
2 CAT ['150', '83.44']
3 MSFT ['200', '51.23']
4 GE ['95', '40.37']
5 MSFT ['50', '65.10']
6 IBM ['100', '70.44']


In [15]:
# zip to combine lists

for col, val in zip(headers, rows[0]):
    print(col, val)

name AA
shares 100
price 32.20


In [16]:
# zip to dict

dict(zip(headers, rows[0]))

{'name': 'AA', 'shares': '100', 'price': '32.20'}

In [17]:
# Sequence dictionaries

for row in rows:
    print(dict(zip(headers, row)))

{'name': 'AA', 'shares': '100', 'price': '32.20'}
{'name': 'IBM', 'shares': '50', 'price': '91.10'}
{'name': 'CAT', 'shares': '150', 'price': '83.44'}
{'name': 'MSFT', 'shares': '200', 'price': '51.23'}
{'name': 'GE', 'shares': '95', 'price': '40.37'}
{'name': 'MSFT', 'shares': '50', 'price': '65.10'}
{'name': 'IBM', 'shares': '100', 'price': '70.44'}


In [18]:
# Generator generates one time results without storing to list
# Prints just once as generator empty after first loop
# Get manually using next() function

nums = [12,21,321,312,34]
squares = (x*x for x in nums)
for n in squares:
    print(n)

for n in squares:
    print(n)


144
441
103041
97344
1156


In [21]:
# In generator functions we'll use yield instead of return

def square(nums):
    for x in nums:
        yield x*x

for n in square(nums):
    print(n)

144
441
103041
97344
1156


In [26]:
# Useful with sum() min() etc function feeding

from readport import read_portfolio

data = read_portfolio('Data/portfolio.csv')
summed = sum(s['shares'] * s['price'] for s in data)
# Instead of 
summed = [s['shares'] * s['price'] for s in data]
sum(summed)

print(min(s['shares'] for s in data))
print(any(s['name'] == 'IBM' for s in data))
print(all(s['name'] == 'IBM' for s in data))
print(sum(s['shares'] for s in data if s['name'] == 'IBM'))

50
True
False
150


In [31]:
# Generator for joining string

s = ('GOOG',100,490.10)
# Fails as gets int
# ','.join(s)
','.join(str(x) for x in s)

'GOOG,100,490.1'

In [37]:
# Memory usage should be lot less when using generator

import tracemalloc
tracemalloc.start()

import readrides
rows = readrides.read_rides_as_dict('Data/ctabus.csv')
rt22 = [row for row in rows if row['route'] == '22']
max(rt22, key=lambda row: row['rides'])
# {'date': '06/11/2008', 'route': '22', 'daytype': 'W', 'rides': 26896}
tracemalloc.get_traced_memory()

# (217 809 480, 433921319) 217MB

(217809480, 433921319)

In [38]:
import tracemalloc
tracemalloc.start()

import csv
f = open('Data/ctabus.csv')
f_csv = csv.reader(f)
headers = next(f_csv)
# Generator is done here
rows = (dict(zip(headers,row)) for row in f_csv)
# As well as here - just one time looping around, not saving raw dataset to memory
# Saving just filtered results
rt22 = (row for row in rows if row['route'] == '22')
max(rt22, key=lambda row: int(row['rides']))
# {'date': '06/11/2008', 'route': '22', 'daytype': 'W', 'rides': 26896}
tracemalloc.get_traced_memory()

# (1 732 040, 433921319) 1.7MB 

(1732040, 433921319)