In [4]:
# readport.py

import csv
from pprint import pprint

# A function that reads a file into a list of dicts
def read_portfolio(filename):
    portfolio = []
    with open(filename) as f:
        rows = csv.reader(f)
        headers = next(rows)
        for row in rows:
            record = {
                'name' : row[0],
                'shares' : int(row[1]),
                'price' : float(row[2])
            }
            portfolio.append(record)
    return portfolio

portfolio = read_portfolio('Data/portfolio.csv')
pprint(portfolio)

[{'name': 'AA', 'price': 32.2, 'shares': 100},
 {'name': 'IBM', 'price': 91.1, 'shares': 50},
 {'name': 'CAT', 'price': 83.44, 'shares': 150},
 {'name': 'MSFT', 'price': 51.23, 'shares': 200},
 {'name': 'GE', 'price': 40.37, 'shares': 95},
 {'name': 'MSFT', 'price': 65.1, 'shares': 50},
 {'name': 'IBM', 'price': 70.44, 'shares': 100}]


In [9]:
# Find all holdings more than 100 shares
[s for s in portfolio if s['shares'] > 100]

# Compute total cost (shares * price)
sum([s['shares']*s['price'] for s in portfolio])

# Find all unique stock names (set)
{ s['name'] for s in portfolio }

# Count the total shares of each of stock
totals = { s['name']: 0 for s in portfolio }
for s in portfolio:
    totals[s['name']] += s['shares']

# Same with Counter
from collections import Counter
totals = Counter()
for s in portfolio:
    totals[s['name']] += s['shares']

# Get the two most common holdings
totals.most_common(2)

In [10]:
# Errors as no keys provided

byname = {}
for s in portfolio:
    byname[s['name']].append(s)

byname

KeyError: 'AA'

In [13]:
# Defaultdict automatically initializes elements

from collections import defaultdict

byname = defaultdict(list)
for s in portfolio:
    byname[s['name']].append(s)

byname['MSFT']

[{'name': 'MSFT', 'shares': 200, 'price': 51.23},
 {'name': 'MSFT', 'shares': 50, 'price': 65.1}]

In [14]:
import csv

def read_rides_as_dict(filename):
    '''
    Read the bus ride data as a list of dicts
    '''
    records = []
    with open(filename) as f:
        rows = csv.reader(f)
        headings = next(rows)     # Skip headers
        for row in rows:
            route = row[0]
            date = row[1]
            daytype = row[2]
            rides = int(row[3])
            record = {
                'route': route,
                'date': date,
                'daytype': daytype,
                'rides': rides,
                }
            records.append(record)
    return records

if __name__ == '__main__':
    rows = read_rides_as_dict('Data/ctabus.csv')

In this exercise, you task is this: write a program to answer the following three questions:

- How many bus routes exist in Chicago?
- How many people rode the number 22 bus on February 2, 2011? What about any route on any date of your choosing?
- What is the total number of rides taken on each bus route?
- What five bus routes had the greatest ten-year increase in ridership from 2001 to 2011?

You are free to use any technique whatsoever to answer the above questions as long as it's part of the Python standard library (i.e., built-in datatypes, standard library modules, etc.).

In [30]:
# Using list of dicts

# How many bus routes exist in Chicago? / 181
len({ row['route'] for row in rows })

# How many people rode the number 22 bus on February 2, 2011? / 218422
# What about any route on any date of your choosing?

sum( row['rides'] for row in rows if row['date'] == '02/02/2011' )
sum( row['rides'] for row in rows if row['date'] == '02/02/2010' )

# What is the total number of rides taken on each bus route?
from collections import Counter
from pprint import pprint

totals = Counter()
for row in rows:
    totals[row['route']] += row['rides']

totals
#pprint(totals)

# Testing
# '290S': 7308
# sum([ row['rides'] for row in rows if row['route'] == '290S' ])

# What five bus routes had the greatest ten-year increase in ridership from 2001 to 2011?
totals = Counter()
for row in rows:
    if row['date'][-4:] == '2011':
        totals[row['route']] += row['rides']
    elif row['date'][-4:] == '2001':
        totals[row['route']] -= row['rides']

totals.most_common(5)

# Testing
# ('15', 2732209)
# sum([ row['rides'] for row in rows if row['date'][-4:] == '2001' and row['route'] == '15' ]) # 0
# sum([ row['rides'] for row in rows if row['date'][-4:] == '2011' and row['route'] == '15' ]) # 2732209
# sum([ row['rides'] for row in rows if row['date'][-4:] == '2001' and row['route'] == '147' ]) # 2748180
# sum([ row['rides'] for row in rows if row['date'][-4:] == '2011' and row['route'] == '147' ]) # 4856090 = 2 107 910


4856090