In [2]:
import csv
import pandas as pd
pd.set_option('display.precision', 2)

%precision %.2f

with open('mpg.csv') as csvfile:
    mpg = list(csv.DictReader(csvfile))

#converting the csv file to a list of dictionaries, one per each column (?)
mpg[:3]

[{'': '1',
  'manufacturer': 'audi',
  'model': 'a4',
  'displ': '1.8',
  'year': '1999',
  'cyl': '4',
  'trans': 'auto(l5)',
  'drv': 'f',
  'cty': '18',
  'hwy': '29',
  'fl': 'p',
  'class': 'compact'},
 {'': '2',
  'manufacturer': 'audi',
  'model': 'a4',
  'displ': '1.8',
  'year': '1999',
  'cyl': '4',
  'trans': 'manual(m5)',
  'drv': 'f',
  'cty': '21',
  'hwy': '29',
  'fl': 'p',
  'class': 'compact'},
 {'': '3',
  'manufacturer': 'audi',
  'model': 'a4',
  'displ': '2',
  'year': '2008',
  'cyl': '4',
  'trans': 'manual(m6)',
  'drv': 'f',
  'cty': '20',
  'hwy': '31',
  'fl': 'p',
  'class': 'compact'}]

In [3]:
len(mpg)

234

In [4]:
#finding the columns of the CSV file using the key method

mpg[133].keys()

dict_keys(['', 'manufacturer', 'model', 'displ', 'year', 'cyl', 'trans', 'drv', 'cty', 'hwy', 'fl', 'class'])

# Finding average miles per galon in the city and in the highway:

In [5]:
sum(float(d['cty']) for d in mpg)/len(mpg)

16.86

In [6]:
sum(float(d['hwy']) for d in mpg )/len(mpg)

23.44

In [7]:
#we're specifying that we are looking for elements referenced by the keys in the dicts
#i.e. d is a dictinary in mpg, therefore we can write d['key']

cylinders = set( d['cyl'] for d in mpg )
cylinders

{'4', '5', '6', '8'}

In [8]:
#The following is not possible because the indices in a list (a list of dicts in this case) must be integers or slices

#mpg['cyl']

# Finding the average mpg by cylinder in the city

In [9]:
CtyMpgByCyl = []

for c in cylinders:
    summpg = 0
    cyltypecount = 0. 
#apparently I can initialize variables within the for. DISCLAIMER: it will "remain" until the iteration is over

    for d in mpg:
        if d['cyl'] == c:
            
            summpg += float(d['cty'])
            cyltypecount += 1

    CtyMpgByCyl.append((c , summpg/cyltypecount))

CtyMpgByCyl.sort(key=lambda x: x[0])


print(CtyMpgByCyl)
            
    

[('4', 21.012345679012345), ('5', 20.5), ('6', 16.21518987341772), ('8', 12.571428571428571)]


# Finding the average mpg per classes

In [10]:
vehicleclasses = set(d['class'] for d in mpg)
vehicleclasses



{'2seater', 'compact', 'midsize', 'minivan', 'pickup', 'subcompact', 'suv'}

In [11]:
CtyMpgByClass = []


for vc in vehicleclasses:
    summpg = 0
    vc_count = 0

    for d in mpg:
        if d['class'] == vc:

            summpg += float(d['cty'])
            vc_count += 1

    CtyMpgByClass.append((vc, summpg/vc_count))

CtyMpgByClass.sort(key=lambda x: x[1])

CtyMpgByClass

[('pickup', 13.00),
 ('suv', 13.50),
 ('2seater', 15.40),
 ('minivan', 15.82),
 ('midsize', 18.76),
 ('compact', 20.13),
 ('subcompact', 20.37)]

# Functional programming and the map() function

In [15]:
people = ['Dr. Albert Einstein', 'Dr. Werner Heisenberg', 'Dr. Emily Noether']

def split_title_and_lastname(person):

    lastname = person.split()[-1]
    title = person.split()[0]

    return '{} {}'.format(title, lastname)

list(map(split_title_and_lastname, people))

['Dr. Einstein', 'Dr. Heisenberg', 'Dr. Noether']

# Lambdas as little functions

The cannot have too much complexity

In [18]:

first_lambda = lambda a, b, c : (a + b)*c

first_lambda(1, 2, 3)

9

In [19]:
people = ['Dr. Albert Einstein', 'Dr. Werner Heisenberg', 'Dr. Emily Noether']

def split_title_and_lastname(person):

    lastname = person.split()[-1]
    title = person.split()[0]

    return '{} {}'.format(title, lastname)

list(map(split_title_and_lastname, people))

['Dr. Einstein', 'Dr. Heisenberg', 'Dr. Noether']

In [None]:
#Converting the last function to a lambda function

FirstTry = lambda x : x.split()[0] + ' ' + x.split()[-1]

for person in people:
    print( split_title_and_lastname(person) == (lambda person : person.split()[0] + ' ' + person.split()[-1]) )

test = lambda person : person.split()[0] + ' ' + person.split()[-1]
print(test)

for person in people:
    print( split_title_and_lastname(person) == FirstTry(person))

print('2nd test')

list(map(split_title_and_lastname, people)) == list( map(FirstTry, people) )




# List comprehension

Compact form to create lists

In [43]:
even_numbers = []

for number in range(0, 101):
    if number % 2 == 0:
        even_numbers1.append(number)

print(even_numbers1)

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100]


In [46]:
compact_even= [number for number in range(0, 101) if (number%2 == 0)]

print(compact_even)

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100]


In [60]:
#example

def times_table1():
    lst = []

    for i in range(1, 10):
        for j in range(1,10):
            lst.append(i*j)

    return lst

times_table2 = [i*j for i in range(1,10) for j in range(1,10)]

print(times_table1() == times_table2)

print(times_table1())

True
[1, 2, 3, 4, 5, 6, 7, 8, 9, 2, 4, 6, 8, 10, 12, 14, 16, 18, 3, 6, 9, 12, 15, 18, 21, 24, 27, 4, 8, 12, 16, 20, 24, 28, 32, 36, 5, 10, 15, 20, 25, 30, 35, 40, 45, 6, 12, 18, 24, 30, 36, 42, 48, 54, 7, 14, 21, 28, 35, 42, 49, 56, 63, 8, 16, 24, 32, 40, 48, 56, 64, 72, 9, 18, 27, 36, 45, 54, 63, 72, 81]


In [62]:
lowercase = 'abcdefghijklmnopqrstuvwxyz'
digits = '0123456789' 

answer = [a+b+c+d for a in lowercase for b in lowercase for c in digits for d in digits] 
answer[:50] # Display first 50 ids

['aa00',
 'aa01',
 'aa02',
 'aa03',
 'aa04',
 'aa05',
 'aa06',
 'aa07',
 'aa08',
 'aa09',
 'aa10',
 'aa11',
 'aa12',
 'aa13',
 'aa14',
 'aa15',
 'aa16',
 'aa17',
 'aa18',
 'aa19',
 'aa20',
 'aa21',
 'aa22',
 'aa23',
 'aa24',
 'aa25',
 'aa26',
 'aa27',
 'aa28',
 'aa29',
 'aa30',
 'aa31',
 'aa32',
 'aa33',
 'aa34',
 'aa35',
 'aa36',
 'aa37',
 'aa38',
 'aa39',
 'aa40',
 'aa41',
 'aa42',
 'aa43',
 'aa44',
 'aa45',
 'aa46',
 'aa47',
 'aa48',
 'aa49']