<br>
Let's import our datafile mpg.csv, which contains fuel economy data for 234 cars.

* mpg : miles per gallon
* class : car classification
* cty : city mpg
* cyl : # of cylinders
* displ : engine displacement in liters
* drv : f = front-wheel drive, r = rear wheel drive, 4 = 4wd
* fl : fuel (e = ethanol E85, d = diesel, r = regular, p = premium, c = CNG)
* hwy : highway mpg
* manufacturer : automobile manufacturer
* model : model of car
* trans : type of transmission
* year : model year

In [1]:
import csv

%precision 2

with open('mpg.csv') as csvfile:
    mpg = list(csv.DictReader(csvfile))

mpg[:1] # this first entry in the dictionary

[OrderedDict([('', '1'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '1.8'),
              ('year', '1999'),
              ('cyl', '4'),
              ('trans', 'auto(l5)'),
              ('drv', 'f'),
              ('cty', '18'),
              ('hwy', '29'),
              ('fl', 'p'),
              ('class', 'compact')])]

## csv.DictReader has read in each row of our csv file as a dictionary. 
##### len show shows that our list has 234 dictionaries

In [2]:
print(help(csv.DictReader))
len(mpg)

Help on class DictReader in module csv:

class DictReader(builtins.object)
 |  DictReader(f, fieldnames=None, restkey=None, restval=None, dialect='excel', *args, **kwds)
 |  
 |  Methods defined here:
 |  
 |  __init__(self, f, fieldnames=None, restkey=None, restval=None, dialect='excel', *args, **kwds)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __iter__(self)
 |  
 |  __next__(self)
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  
 |  fieldnames

None


234

In [3]:
mpg[0].keys()

odict_keys(['', 'manufacturer', 'model', 'displ', 'year', 'cyl', 'trans', 'drv', 'cty', 'hwy', 'fl', 'class'])

In [4]:
## we refer a particular value in dictionary with its associate key value, since it is in string we convert it to float and then
## we take sum of it dvide it with the length to get average 
print(sum(float(d['cty']) for d in mpg) / len(mpg))


### Likewise for hwy
print(sum(float(d['hwy']) for d in mpg) / len(mpg))

16.858974358974358
23.44017094017094


### Use `set` to return the unique values for the number of cylinders the cars in our dataset have.

In [5]:
cylinders = set(d['cyl'] for d in mpg)
cylinders

{'4', '5', '6', '8'}

In [6]:
### Grouping the cars by nymber of cyclinder, and finding he average cty mpg for each group

avg = []

for c in cylinders:
    sumx = 0
    countx = 0
    
    for d in mpg: ## iterate over all dictionaries
        if d['cyl'] == c:
            sumx += float ( d['cty'])
            countx += 1
    avg.append( (c,sumx/countx ) )

avg.sort(key=lambda x: x[0])
avg

[('4', 21.01), ('5', 20.50), ('6', 16.22), ('8', 12.57)]

In [7]:
for col in list(mpg[0].keys())[1:3]:
    print( col ," : ", set( d[str(col)] for d in mpg) )

manufacturer  :  {'ford', 'dodge', 'chevrolet', 'honda', 'jeep', 'land rover', 'nissan', 'pontiac', 'toyota', 'mercury', 'subaru', 'volkswagen', 'audi', 'lincoln', 'hyundai'}
model  :  {'sonata', 'forester awd', 'jetta', 'dakota pickup 4wd', 'corolla', 'toyota tacoma 4wd', 'mountaineer 4wd', 'camry', 'a4 quattro', 'gti', 'grand cherokee 4wd', 'k1500 tahoe 4wd', 'f150 pickup 4wd', '4runner 4wd', 'durango 4wd', 'mustang', 'civic', 'altima', 'tiburon', 'expedition 2wd', 'land cruiser wagon 4wd', 'passat', 'ram 1500 pickup 4wd', 'corvette', 'explorer 4wd', 'caravan 2wd', 'navigator 2wd', 'new beetle', 'a4', 'malibu', 'a6 quattro', 'pathfinder 4wd', 'range rover', 'impreza awd', 'camry solara', 'c1500 suburban 2wd', 'maxima', 'grand prix'}


### Unique values for vehicle class

In [8]:
vehicleclass = set(d['class'] for d in mpg)
vehicleclass

{'2seater', 'compact', 'midsize', 'minivan', 'pickup', 'subcompact', 'suv'}

In [9]:
### And here's an example of how to find the average hwy mpg for each class of vehicle in our dataset.
HwyMpgByClass = []

for t in vehicleclass: # iterate over all the vehicle classes
    summpg = 0
    vclasscount = 0
    for d in mpg: # iterate over all dictionaries
        if d['class'] == t: # if the cylinder amount type matches,
            summpg += float(d['hwy']) # add the hwy mpg
            vclasscount += 1 # increment the count
    HwyMpgByClass.append((t, summpg / vclasscount)) # append the tuple ('class', 'avg mpg')

HwyMpgByClass.sort(key=lambda x: x[1])
HwyMpgByClass

[('pickup', 16.88),
 ('suv', 18.13),
 ('minivan', 22.36),
 ('2seater', 24.80),
 ('midsize', 27.29),
 ('subcompact', 28.14),
 ('compact', 28.30)]