# Data files and Summary Statistics

In [1]:
import csv

%precision 2  # sets the floating point precision to 2

with open('mpg.csv') as csvfile:
    mpg = list(csv.DictReader(csvfile))  # csv.DictReader lets us read the csv file as a list of dictionaries
                                         # Each dict element of the list are each rows in CSV file. Each column appears as key and corresponding values appear as values.
mpg[:3]

ValueError: Precision must be int or format string, not '2  # sets the floating point precision to 2'

__Note:__ This error occured because I added the comment in the same line as %precision 2

In [3]:
import csv

%precision 2  

# sets the floating point precision to 2

with open('mpg.csv') as csvfile:
    mpg = list(csv.DictReader(csvfile))  # csv.DictReader lets us read the csv file as a list of dictionaries
                                         # Each dict element of the list are each rows in CSV file. Each column appears as key and corresponding values appear as values.
mpg[:3]

[{'': '1',
  'manufacturer': 'audi',
  'model': 'a4',
  'displ': '1.8',
  'year': '1999',
  'cyl': '4',
  'trans': 'auto(l5)',
  'drv': 'f',
  'cty': '18',
  'hwy': '29',
  'fl': 'p',
  'class': 'compact'},
 {'': '2',
  'manufacturer': 'audi',
  'model': 'a4',
  'displ': '1.8',
  'year': '1999',
  'cyl': '4',
  'trans': 'manual(m5)',
  'drv': 'f',
  'cty': '21',
  'hwy': '29',
  'fl': 'p',
  'class': 'compact'},
 {'': '3',
  'manufacturer': 'audi',
  'model': 'a4',
  'displ': '2',
  'year': '2008',
  'cyl': '4',
  'trans': 'manual(m6)',
  'drv': 'f',
  'cty': '20',
  'hwy': '31',
  'fl': 'p',
  'class': 'compact'}]

In [4]:
len(mpg)

234

the csv file has data on 234 vehicles

In [5]:
mpg[0].keys()

dict_keys(['', 'manufacturer', 'model', 'displ', 'year', 'cyl', 'trans', 'drv', 'cty', 'hwy', 'fl', 'class'])

This gives us the columns of the csv file

In [6]:
# we want to find the city average city MPG of the vehicles in the csv file
sum(float(d['cty']) for d in mpg)/len(mpg)  # This piece of code forms a tuple with city mpg of cars, then sums and then divides

16.86

__Note:__ Since all the data in the csv file are strings, we are converting them into float while forming tuples

In [7]:
sum(float(d['hwy']) for d in mpg)/len(mpg)

23.44

In [8]:
cylinders = set(d['cyl'] for d in mpg)
cylinders

{'4', '5', '6', '8'}

In [9]:
print(cylinders)

{'5', '8', '6', '4'}


In [10]:
type(cylinders)

set

Ooh okay so we created a set (which is a data type also covered by {} like dictionaries)

In [11]:
CtyMpgByCyl = []

for c in cylinders:
    summpg = 0
    cyltypecount = 0
    for d in mpg:
        if d['cyl'] == c:
            summpg += float(d['cty'])
            cyltypecount +=1
    CtyMpgByCyl.append((c,summpg/cyltypecount))

CtyMpgByCyl.sort(key=lambda x: x[0])
CtyMpgByCyl

[('4', 21.01), ('5', 20.50), ('6', 16.22), ('8', 12.57)]

In [12]:
vehicleclass = set(d['class'] for d in mpg)
vehicleclass

{'2seater', 'compact', 'midsize', 'minivan', 'pickup', 'subcompact', 'suv'}

In [15]:
HwyMpgByClass = []

for t in vehicleclass:
    summpg = 0
    classtypecount = 0
    for d in mpg:
        if d['class'] == t:
            summpg += float(d['hwy'])
            classtypecount +=1
    HwyMpgByClass.append((t, summpg/classtypecount))

HwyMpgByClass.sort(key=lambda x: x[1])
HwyMpgByClass

[('pickup', 16.88),
 ('suv', 18.13),
 ('minivan', 22.36),
 ('2seater', 24.80),
 ('midsize', 27.29),
 ('subcompact', 28.14),
 ('compact', 28.30)]