# Data Files and Summary Statistics

In [1]:
import csv

%precision 2 # set the decimal places to 2 in the output

with open('./datasets/mpg.csv') as csvfile:
    mpg = list(csv.DictReader(csvfile))
# By using DictReader, we convert the data into a dictionary 
# and then use list() on that to convert it into a list of dictionaries
# Now, mpg is an OrderedDict which is a list of dictionaries

mpg[:3] # the first three dictionaries in the list

[{'': '1',
  'manufacturer': 'audi',
  'model': 'a4',
  'displ': '1.8',
  'year': '1999',
  'cyl': '4',
  'trans': 'auto(l5)',
  'drv': 'f',
  'cty': '18',
  'hwy': '29',
  'fl': 'p',
  'class': 'compact'},
 {'': '2',
  'manufacturer': 'audi',
  'model': 'a4',
  'displ': '1.8',
  'year': '1999',
  'cyl': '4',
  'trans': 'manual(m5)',
  'drv': 'f',
  'cty': '21',
  'hwy': '29',
  'fl': 'p',
  'class': 'compact'},
 {'': '3',
  'manufacturer': 'audi',
  'model': 'a4',
  'displ': '2',
  'year': '2008',
  'cyl': '4',
  'trans': 'manual(m6)',
  'drv': 'f',
  'cty': '20',
  'hwy': '31',
  'fl': 'p',
  'class': 'compact'}]

In [2]:
len(mpg) # show how many dictionaries there are in the list of dictionaries

234

In [3]:
mpg[0].keys() # show the keys for the first dictionary/columns

dict_keys(['', 'manufacturer', 'model', 'displ', 'year', 'cyl', 'trans', 'drv', 'cty', 'hwy', 'fl', 'class'])

In [5]:
sum(float(d['cty']) for d in mpg) / len(mpg) # calculate the average 'cty' (average city fuel economy)

16.86

In [6]:
sum(float(d['hwy']) for d in mpg) / len(mpg) # calculate the average 'hwy' (average high way fuel economy)

23.44

In [7]:
cylinders = set(d['cyl'] for d in mpg) # get unique number of cylinders
cylinders

{'4', '5', '6', '8'}

In [9]:
CtyMpgByCyl = []

for c in cylinders: # iterate over all the cylinder levels
    summpg = 0
    cyltypecount = 0
    for d in mpg: # iterate over all dictionaries
        if d['cyl'] == c: # if the cylinder level type matches,
            summpg += float(d['cty']) # add the cty mpg
            cyltypecount += 1 # increment the count
    CtyMpgByCyl.append((c, summpg / cyltypecount)) # append the tuple ('cylinder', 'avg mpg')b

CtyMpgByCyl.sort(key=lambda x: x[0]) # sort based on 'cylinder' in increasing order
CtyMpgByCyl

[('4', 21.01), ('5', 20.50), ('6', 16.22), ('8', 12.57)]

We are just iterating over the cylinder levels and collecting the average mpg levels that correspond to that cylinder level as a tuple in a list of tuples which is CtyMpgByCyl. Then, we sort this list by the first items in the tuple which means that our list is sorted by cylinder levels which is what key = lambda x: x[0] refers to.

In [10]:
vehicleclass = set(d['class'] for d in mpg)
vehicleclass

{'2seater', 'compact', 'midsize', 'minivan', 'pickup', 'subcompact', 'suv'}

In [11]:
HwyMpgByClass = []

for t in vehicleclass: # iterate over all the vehicle classes
    summpg = 0
    vclasscount = 0
    for d in mpg: # iterate over all dictionaries
        if d['class'] == t:
            summpg += float(d['hwy'])
            vclasscount += 1
    HwyMpgByClass.append((t, summpg / vclasscount)) # append the tuple ('class', 'avg mpg')

HwyMpgByClass.sort(key=lambda x: x[1]) # sort based on 'avg mpg' in increasing order
HwyMpgByClass

[('pickup', 16.88),
 ('suv', 18.13),
 ('minivan', 22.36),
 ('2seater', 24.80),
 ('midsize', 27.29),
 ('subcompact', 28.14),
 ('compact', 28.30)]