In [15]:
from pprint import pprint
import csv
from collections import defaultdict

In [1]:
def parse_row(input_row, parsers):
    """given a list of parsers (some of which may be None)
    apply the appropriate one to each element of the input_row"""

    return [parser(value) if parser is not None else value
            for value, parser in zip(input_row, parsers)]

def parse_rows_with(reader, parsers):
    """wrap a reader to apply the parsers to each of its rows"""
    for row in reader:
        yield parse_row(row, parsers)

In [2]:
def try_or_none(f):
    """wraps f to return None if f raises an exception
    assumes f takes only one input"""
    def f_or_none(x):
        try: return f(x)
        except: return None
    return f_or_none

In [3]:
def parse_row(input_row, parsers):
    return [try_or_none(parser)(value) if parser is not None else value
            for value, parser in zip(input_row, parsers)]

In [7]:
import dateutil.parser
data = []

with open("comma_delimited_stock_prices.csv", "rb") as f:
    reader = csv.reader(f)
    for line in parse_rows_with(reader, [dateutil.parser.parse, None, float]):
        data.append(line)

pprint(data)

[[datetime.datetime(2014, 6, 20, 0, 0), 'AAPL', 90.91],
 [datetime.datetime(2014, 6, 20, 0, 0), 'MSFT', 41.68],
 [datetime.datetime(3014, 6, 20, 0, 0), 'FB', 64.5],
 [datetime.datetime(2014, 6, 19, 0, 0), 'AAPL', 91.86],
 [datetime.datetime(2014, 6, 19, 0, 0), 'MSFT', None],
 [datetime.datetime(2014, 6, 19, 0, 0), 'FB', 64.34]]


In [8]:
for row in data:
    if any(x is None for x in row):
        print row

[datetime.datetime(2014, 6, 19, 0, 0), 'MSFT', None]


In [9]:
def try_parse_field(field_name, value, parser_dict):
    """try to parse value using the appropriate function from parser_dict"""
    parser = parser_dict.get(field_name)   # None if no such entry
    if parser is not None:
        return try_or_none(parser)(value)
    else:
        return value

def parse_dict(input_dict, parser_dict):
    return { field_name : try_parse_field(field_name, value, parser_dict)
             for field_name, value in input_dict.iteritems() }


In [12]:
data = []

with open("stocks.txt", "rb") as f:
        reader = csv.DictReader(f, delimiter="\t")
        data = [parse_dict(row, { 'date' : dateutil.parser.parse,
                                  'closing_price' : float }) 
                for row in reader]
        
pprint(data[:10])

[{'closing_price': 112.98,
  'date': datetime.datetime(2015, 1, 23, 0, 0),
  'symbol': 'AAPL'},
 {'closing_price': 112.4,
  'date': datetime.datetime(2015, 1, 22, 0, 0),
  'symbol': 'AAPL'},
 {'closing_price': 109.55,
  'date': datetime.datetime(2015, 1, 21, 0, 0),
  'symbol': 'AAPL'},
 {'closing_price': 108.72,
  'date': datetime.datetime(2015, 1, 20, 0, 0),
  'symbol': 'AAPL'},
 {'closing_price': 105.99,
  'date': datetime.datetime(2015, 1, 16, 0, 0),
  'symbol': 'AAPL'},
 {'closing_price': 106.82,
  'date': datetime.datetime(2015, 1, 15, 0, 0),
  'symbol': 'AAPL'},
 {'closing_price': 109.8,
  'date': datetime.datetime(2015, 1, 14, 0, 0),
  'symbol': 'AAPL'},
 {'closing_price': 110.22,
  'date': datetime.datetime(2015, 1, 13, 0, 0),
  'symbol': 'AAPL'},
 {'closing_price': 109.25,
  'date': datetime.datetime(2015, 1, 12, 0, 0),
  'symbol': 'AAPL'},
 {'closing_price': 112.01,
  'date': datetime.datetime(2015, 1, 9, 0, 0),
  'symbol': 'AAPL'}]


In [13]:
max_aapl_price = max(row["closing_price"]
                     for row in data
                     if row["symbol"] == "AAPL")

max_aapl_price

119.0

In [17]:
# group rows by symbol
by_symbol = defaultdict(list)
for row in data:
    by_symbol[row["symbol"]].append(row)

    # use a dict comprehension to find the max for each symbol
max_price_by_symbol = { symbol : max(row["closing_price"]
                                     for row in grouped_rows)
                        for symbol, grouped_rows in by_symbol.iteritems() }
max_price_by_symbol

{'AAPL': 119.0, 'FB': 81.45, 'MSFT': 49.3}

In [19]:
def picker(field_name):
    """returns a function that picks a field out of a dict"""
    return lambda row: row[field_name]

def pluck(field_name, rows):
    """turn a list of dicts into the list of field_name values"""
    return map(picker(field_name), rows)



In [20]:
def group_by(grouper, rows, value_transform=None):
    # key is output of grouper, value is list of rows
    grouped = defaultdict(list)
    for row in rows:
        grouped[grouper(row)].append(row)

    if value_transform is None:
        return grouped
    else:
        return { key : value_transform(rows)
                 for key, rows in grouped.iteritems() }

In [21]:
max_price_by_symbol_2 = group_by(picker("symbol"),
                               data,
                               lambda rows: max(pluck("closing_price", rows)))

max_price_by_symbol_2

{'AAPL': 119.0, 'FB': 81.45, 'MSFT': 49.3}

#### largest and smallest one-day percent changes 

In [23]:
def percent_price_change(yesterday, today):
    return today["closing_price"] / yesterday["closing_price"] - 1

def day_over_day_changes(grouped_rows):
    # sort the rows by date
    ordered = sorted(grouped_rows, key=picker("date"))

    # zip with an offset to get pairs of consecutive days
    return [{ "symbol" : today["symbol"],
              "date" : today["date"],
              "change" : percent_price_change(yesterday, today) }
            for yesterday, today in zip(ordered, ordered[1:])]

In [24]:
# key is symbol, value is list of "change" dicts
changes_by_symbol = group_by(picker("symbol"), data, day_over_day_changes)
changes_by_symbol

{'AAPL': [{'change': -0.045454545454545525,
   'date': datetime.datetime(1980, 12, 15, 0, 0),
   'symbol': 'AAPL'},
  {'change': -0.0714285714285714,
   'date': datetime.datetime(1980, 12, 16, 0, 0),
   'symbol': 'AAPL'},
  {'change': 0.025641025641025772,
   'date': datetime.datetime(1980, 12, 17, 0, 0),
   'symbol': 'AAPL'},
  {'change': 0.02499999999999991,
   'date': datetime.datetime(1980, 12, 18, 0, 0),
   'symbol': 'AAPL'},
  {'change': 0.07317073170731714,
   'date': datetime.datetime(1980, 12, 19, 0, 0),
   'symbol': 'AAPL'},
  {'change': 0.045454545454545414,
   'date': datetime.datetime(1980, 12, 22, 0, 0),
   'symbol': 'AAPL'},
  {'change': 0.04347826086956519,
   'date': datetime.datetime(1980, 12, 23, 0, 0),
   'symbol': 'AAPL'},
  {'change': 0.04166666666666674,
   'date': datetime.datetime(1980, 12, 24, 0, 0),
   'symbol': 'AAPL'},
  {'change': 0.10000000000000009,
   'date': datetime.datetime(1980, 12, 26, 0, 0),
   'symbol': 'AAPL'},
  {'change': 0.0181818181818183,
 

In [31]:
from random import sample
# collect all "change" dicts into one big list
all_changes = [change
               for changes in changes_by_symbol.values()
               for change in changes]
sample(all_changes, 5)

[{'change': 0.058495821727019504,
  'date': datetime.datetime(2000, 1, 25, 0, 0),
  'symbol': 'AAPL'},
 {'change': -0.005392799883399024,
  'date': datetime.datetime(2013, 1, 18, 0, 0),
  'symbol': 'AAPL'},
 {'change': -0.022900763358778664,
  'date': datetime.datetime(1995, 4, 13, 0, 0),
  'symbol': 'AAPL'},
 {'change': 0.00512820512820511,
  'date': datetime.datetime(1992, 12, 28, 0, 0),
  'symbol': 'MSFT'},
 {'change': -0.005638340716874635,
  'date': datetime.datetime(2008, 5, 2, 0, 0),
  'symbol': 'MSFT'}]

In [32]:
max(all_changes, key=picker("change"))

{'change': 0.3283582089552237,
 'date': datetime.datetime(1997, 8, 6, 0, 0),
 'symbol': 'AAPL'}

In [33]:
min(all_changes, key=picker("change"))

{'change': -0.5193370165745856,
 'date': datetime.datetime(2000, 9, 29, 0, 0),
 'symbol': 'AAPL'}

In [35]:
def combine_pct_changes(pct_change1, pct_change2):
    return (1 + pct_change1) * (1 + pct_change2) - 1

def overall_change(changes):
    return reduce(combine_pct_changes, pluck("change", changes))

overall_change_by_month = group_by(lambda row: row['date'].month,
                                   all_changes,
                                   overall_change)

overall_change_by_month

{1: 19.972214514609067,
 2: 0.11858483359214822,
 3: 2.8188500486033203,
 4: 6.934196784984757,
 5: 1.2555660890321572,
 6: -0.5977889232201739,
 7: -0.17183091713561516,
 8: 6.220055959879486,
 9: -0.7411625436428328,
 10: 21.55254125167998,
 11: 3.758572336791728,
 12: 2.2895332950255884}