Let's import our datafile mpg.csv, which contains fuel economy data for 234 cars.

* mpg : miles per gallon
* class : car classification
* cty : city mpg
* cyl : # of cylinders
* displ : engine displacement in liters
* drv : f = front-wheel drive, r = rear wheel drive, 4 = 4wd
* fl : fuel (e = ethanol E85, d = diesel, r = regular, p = premium, c = CNG)
* hwy : highway mpg
* manufacturer : automobile manufacturer
* model : model of car
* trans : type of transmission
* year : model year

In [10]:
import csv
%precision 2

'%.2f'

In [19]:
with open('mpg.csv') as csvFile:
    mpg = list(csv.DictReader(csvFile))

mpg[:3]

               

[OrderedDict([('', '1'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '1.8'),
              ('year', '1999'),
              ('cyl', '4'),
              ('trans', 'auto(l5)'),
              ('drv', 'f'),
              ('cty', '18'),
              ('hwy', '29'),
              ('fl', 'p'),
              ('class', 'compact')]),
 OrderedDict([('', '2'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '1.8'),
              ('year', '1999'),
              ('cyl', '4'),
              ('trans', 'manual(m5)'),
              ('drv', 'f'),
              ('cty', '21'),
              ('hwy', '29'),
              ('fl', 'p'),
              ('class', 'compact')]),
 OrderedDict([('', '3'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '2'),
              ('year', '2008'),
              ('cyl', '4'),
              ('trans', 'manual(m6)'),
              ('drv',

In [20]:
len(mpg)

234

In [21]:
mpg[0].keys()

odict_keys(['', 'manufacturer', 'model', 'displ', 'year', 'cyl', 'trans', 'drv', 'cty', 'hwy', 'fl', 'class'])

* This is how to find the average cty fuel economy across all cars. All values in the dictionaries are strings, so we need to convert to float.

In [23]:
sum(float(d['cty']) for d in mpg) / len(mpg)

16.86

### Similarly this is how to find the average hwy fuel economy across all cars.

In [24]:
sum(float(d['hwy']) for d in mpg) / len(mpg)

23.44

### Use set to return the unique values for the number of cylinders the cars in our dataset have.

In [26]:
cylinders = set(d['cyl'] for d in mpg)
cylinders

{'4', '5', '6', '8'}

### Here's a more complex example where we are grouping the cars by number of cylinder, and finding the average cty mpg for each group

In [35]:
CtyMpgByCyl = []

for c in cylinders: # iterate over all the cylinder levels
    summpg = 0
    cyltypecount = 0
    for d in mpg: # iterate over all dictionaries
        if d['cyl'] == c: # if the cylinder level type matches,
            summpg += float(d['cty']) # add the cty mpg
            cyltypecount += 1 # increment the count
    CtyMpgByCyl.append((c, summpg / cyltypecount)) # append the tuple ('cylinder', 'avg mpg')

CtyMpgByCyl.sort(key=lambda x: x[0])
CtyMpgByCyl

[('4', 21.01), ('5', 20.50), ('6', 16.22), ('8', 12.57)]

### Use set to return the unique values for the class types in our dataset.

In [37]:
vehicleclass = set(d['class'] for d in mpg)
vehicleclass

{'2seater', 'compact', 'midsize', 'minivan', 'pickup', 'subcompact', 'suv'}

### And here's an example of how to find the average hwy mpg for each class of vehicle in our dataset.

In [38]:
HwyMpgByClass = []

for t in vehicleclass: # iterate over all the vehicle classes
    summpg = 0
    vclasscount = 0
    for d in mpg: # iterate over all dictionaries
        if d['class'] == t: # if the cylinder amount type matches,
            summpg += float(d['hwy']) # add the hwy mpg
            vclasscount += 1 # increment the count
    HwyMpgByClass.append((t, summpg / vclasscount)) # append the tuple ('class', 'avg mpg')

HwyMpgByClass.sort(key=lambda x: x[1])
HwyMpgByClass

[('pickup', 16.88),
 ('suv', 18.13),
 ('minivan', 22.36),
 ('2seater', 24.80),
 ('midsize', 27.29),
 ('subcompact', 28.14),
 ('compact', 28.30)]

#  Dates and Times

In [39]:
import datetime as dt
import time as tm

### time returns the current time in seconds since the Epoch. (January 1st, 1970)

In [40]:
tm.time()

1549465259.24

### Convert the timestamp to datetime.

In [42]:
dtnow = dt.datetime.fromtimestamp(tm.time())
dtnow

datetime.datetime(2019, 2, 6, 20, 33, 19, 669935)

In [56]:
print(dtnow.year, dtnow.month, dtnow.day, dtnow.hour, dtnow.minute, dtnow.second, dtnow.microsecond)

2019 2 6 20 33 19 669935


### timedelta is a duration expressing the difference between two dates

In [65]:
delta = dt.timedelta(days = 100)
print(delta)

100 days, 0:00:00


### date.today() returns the current local date.

In [61]:
today = dt.date.today()
today

datetime.date(2019, 2, 6)

In [62]:
today - delta

datetime.date(2018, 10, 29)

In [63]:
today > today - delta

True

# The Python Programming Language: Objects and map()

In [68]:
people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']

def split_title_and_name(person):
        namef = person.split()
        return namef[0] + " "+ namef[2]

list(map(split_title_and_name, people))


['Dr.', 'Christopher', 'Brooks']
['Dr.', 'Kevyn', 'Collins-Thompson']
['Dr.', 'VG', 'Vinod', 'Vydiswaran']
['Dr.', 'Daniel', 'Romero']


['Dr. Brooks', 'Dr. Collins-Thompson', 'Dr. Vinod', 'Dr. Romero']

### An example of a class in python

In [69]:
class Person:
    department = 'School of Information' #a class variable

    def set_name(self, new_name): #a method
        self.name = new_name
    def set_location(self, new_location):
        self.location = new_location

In [70]:
person = Person()
person.set_name('Christopher Brooks')
person.set_location('Ann Arbor, MI, USA')
print('{} live in {} and works in the department {}'.format(person.name, person.location, person.department))

Christopher Brooks live in Ann Arbor, MI, USA and works in the department School of Information


### Here's an example of mapping the min function between two lists.

In [71]:
store1 = [10.00, 11.00, 12.34, 2.34]
store2 = [9.00, 11.10, 12.34, 2.01]
cheapest = map(min, store1, store2)
cheapest

<map at 0x2a5e7d15a20>

In [72]:
for i in cheapest:
    print(i)

9.0
11.0
12.34
2.01


In [75]:
lowercase = 'abcdefghijklmnopqrstuvwxyz'
digits = '0123456789'

answer = [ l1+l2+d1+d2 for l1 in lowercase for l2 in lowercase for d1 in digits for d2 in digits  ]

#  Lambda and List Comprehensions

### Here's an example of lambda that takes in three parameters and adds the first two.

In [76]:
my_function = lambda a,b,c : a+b

In [80]:
my_function(1,2,3)

3

In [85]:
my_list = []
for number in range(0, 1000):
    if number % 2 == 0:
        my_list.append(number)
my_list[:10]

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

In [84]:
my_list = [num for num in range(1000) if num % 2 ==0]
my_list[:10]

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

In [1]:
['a'] + [1]

['a', 1]

In [6]:
a = lambda x : x**2
a

<function __main__.<lambda>>

In [13]:
import numpy as np
x = np.arange(36)
x = x.reshape(6,6)
x

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

In [20]:
x[:,::7]

array([[ 0],
       [ 6],
       [12],
       [18],
       [24],
       [30]])

In [27]:
x.reshape(36)[::-7]

array([35, 28, 21, 14,  7,  0])

In [24]:
x

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

In [25]:
x[2:4,2:4]

array([[14, 15],
       [20, 21]])

# numpy 

In [28]:
import numpy as np

### creating arrays


* Create a list and convert it to a numpy array

In [29]:
my_list = [1,2,3,4]
np.array(my_list)

array([1, 2, 3, 4])

* Or just pass in a list directly

In [30]:
y = np.array([4,5,6])
y

array([4, 5, 6])

* Pass in a list of lists to create a multidimensional array.

In [32]:
m = np.array([[4,5,6],[7,8,9]])
m

array([[4, 5, 6],
       [7, 8, 9]])

* Use the shape method to find the dimensions of the array. (rows, columns)

In [33]:
m.shape

(2, 3)

In [35]:
n = np.arange(0,30,2)
n

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28])

In [40]:
n = n.reshape(3,5)
n

array([[ 0,  2,  4,  6,  8],
       [10, 12, 14, 16, 18],
       [20, 22, 24, 26, 28]])

* linspace returns evenly spaced numbers over a specified interval.

In [48]:
o = np.linspace(0,4,9)
o

array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. ])

* resize changes the shape and size of array in-place.

In [50]:
o.resize(3,3)
o

array([[0. , 0.5, 1. ],
       [1.5, 2. , 2.5],
       [3. , 3.5, 4. ]])

* ones returns a new array of given shape and type, filled with ones.

In [52]:
np.ones((5,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [53]:
np.zeros((3,2))

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

* eye returns a 2-D array with ones on the diagonal and zeros elsewhere.

In [54]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

* diag extracts a diagonal or constructs a diagonal array.

In [55]:
y

array([4, 5, 6])

In [56]:
np.diag(y)

array([[4, 0, 0],
       [0, 5, 0],
       [0, 0, 6]])

In [59]:
x = np.array([y,y])
x

array([[4, 5, 6],
       [4, 5, 6]])

In [62]:
t =np.diag(x)
t

array([4, 5])

In [63]:
np.diag(t)

array([[4, 0],
       [0, 5]])

* Create an array using repeating list

In [64]:
np.array([1,2,3] * 3)

array([1, 2, 3, 1, 2, 3, 1, 2, 3])

* Repeat elements of an array using repeat.

In [67]:
np.repeat([1,2,3],3)

array([1, 1, 1, 2, 2, 2, 3, 3, 3])

* Combining Arrays

In [68]:
p = np.ones([2,3],int)
p

array([[1, 1, 1],
       [1, 1, 1]])

In [71]:
np.ones([2,3])

array([[1., 1., 1.],
       [1., 1., 1.]])

* Use vstack to stack arrays in sequence vertically (row wise).

In [74]:
np.vstack([p,p*2])

array([[1, 1, 1],
       [1, 1, 1],
       [2, 2, 2],
       [2, 2, 2]])

* Use hstack to stack arrays in sequence horizontally (column wise).

In [75]:
np.hstack([p,p*2])

array([[1, 1, 1, 2, 2, 2],
       [1, 1, 1, 2, 2, 2]])

## Operations¶

* Use +, -, *, / and ** to perform element wise addition, subtraction, multiplication, division and power.

In [76]:
x

array([[4, 5, 6],
       [4, 5, 6]])

In [77]:
y

array([4, 5, 6])

In [78]:
x + y

array([[ 8, 10, 12],
       [ 8, 10, 12]])

<br>
**Dot Product:**  

$ \begin{bmatrix}x_1 \ x_2 \ x_3\end{bmatrix}
\cdot
\begin{bmatrix}y_1 \\ y_2 \\ y_3\end{bmatrix}
= x_1 y_1 + x_2 y_2 + x_3 y_3$

In [80]:
x.dot(y)

array([77, 77])

In [82]:
x =np.array([1,2,3])
y =np.array([4,5,6])

In [83]:
x.dot(y)

32

* number of rows of array

In [85]:
z = np.array([y,y**2])
z
len(z)

2

In [86]:
z

array([[ 4,  5,  6],
       [16, 25, 36]])

* Use .T to get the transpose.

In [87]:
z.T

array([[ 4, 16],
       [ 5, 25],
       [ 6, 36]])

In [88]:
z.T.shape

(3, 2)

* Use .dtype to see the data type of the elements in the array.

In [89]:
z.dtype

dtype('int32')

* Use .astype to cast to a specific type.

In [90]:
z.astype('f')

array([[ 4.,  5.,  6.],
       [16., 25., 36.]], dtype=float32)

### Math Functions
Numpy has many built in math functions that can be performed on arrays.

In [97]:
x = np.random.randint(0,10,(1,5))
x

array([[6, 1, 2, 8, 0]])

In [100]:
x.max()

8

In [101]:
x.sum()

17

In [102]:
x.min()

0

In [103]:
x.mean()

3.4

In [104]:
x.std()

3.072458299147443

### argmax() and argmin() return the index of the maximum and minimum values in the array.

In [108]:
x.argmax()


3

In [109]:
x.argmin()

4

# Indexing / Slicing

In [1]:
import numpy as np

s = np.arange(13)**2
s

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100, 121, 144],
      dtype=int32)

### [ ] Use bracket notation to get the value at a specific index. Remember that indexing starts at 0.

In [2]:
s[0], s[1], s[-1]

(0, 1, 144)

### array[start:stop] Use : to indicate a range. array[start:stop]

### Leaving start or stop empty will default to the beginning/end of the array.

In [3]:
s[1:9]

array([ 1,  4,  9, 16, 25, 36, 49, 64], dtype=int32)

### Use (-) negatives to count from the back.

In [4]:
s[-3:]

array([100, 121, 144], dtype=int32)

## array[start:stop:stepsize]
### A second : can be used to indicate step-size

### Here we are starting 5th element from the end, and counting backwards by 2 until the beginning of the array is reached.

In [10]:
print(s)

s[-5::-2]

[  0   1   4   9  16  25  36  49  64  81 100 121 144]


array([64, 36, 16,  4,  0], dtype=int32)

## multidimensional array.
### Let's look at a multidimensional array.

In [19]:
r = np.arange(36)
r.resize((6,6))
r

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

## array[row, column]
### Use bracket notation to slice: array[row, column]

In [20]:
r[2,2]

14

### And use : to select a range of rows or columns

In [24]:
r[5,2:5]
r

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

### Here we are selecting all the rows up to (and not including) row 2, and all the columns up to (and not including) the last column.

In [23]:
r
r[1:4,:-2]

array([[ 6,  7,  8,  9],
       [12, 13, 14, 15],
       [18, 19, 20, 21]])

### This is a slice of the last row, and only every other element.

arr[row, start:end:step]

In [27]:
r[-1,::-2]

array([35, 33, 31])

## conditional indexing
### We can also perform conditional indexing. Here we are selecting values from the array that are greater than 30

In [28]:
r[r>30]

array([31, 32, 33, 34, 35])

### np.where(condition) returns row index and column index


In [39]:
np.where(r>25)

(array([4, 4, 4, 4, 5, 5, 5, 5, 5, 5], dtype=int64),
 array([2, 3, 4, 5, 0, 1, 2, 3, 4, 5], dtype=int64))

## Copying Data

### Be careful with copying and modifying arrays in NumPy!

## r2 is a slice of r

In [42]:
r2 = r[:3, :2]
r2
r

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

#### Set this slice's values to zero ([:] selects the entire array)

In [44]:
r2[:] = 0
r2

array([[0, 0],
       [0, 0],
       [0, 0]])

In [45]:
r

array([[ 0,  0,  2,  3,  4,  5],
       [ 0,  0,  8,  9, 10, 11],
       [ 0,  0, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

### r has also been changed!

## .copy()
### To avoid this, use r.copy to create a copy that will not affect the original array

In [56]:
r_c = r.copy()
r_c

array([[ 0,  0,  2,  3,  4,  5],
       [ 0,  0,  8,  9, 10, 11],
       [ 0,  0, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

### Now when r_copy is modified, r will not be changed.

In [49]:
r_c[:] = 101
r_c

array([[101, 101, 101, 101, 101, 101],
       [101, 101, 101, 101, 101, 101],
       [101, 101, 101, 101, 101, 101],
       [101, 101, 101, 101, 101, 101],
       [101, 101, 101, 101, 101, 101],
       [101, 101, 101, 101, 101, 101]])

In [50]:
r

array([[ 0,  0,  2,  3,  4,  5],
       [ 0,  0,  8,  9, 10, 11],
       [ 0,  0, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

### Iterating Over Arrays

## np.random.randint(lowrange,highrange,(row,column))
### Let's create a new 4 by 3 array of random numbers 0-9.

In [58]:
s = np.random.randint(0,10,(4,3))
s

array([[7, 8, 2],
       [6, 1, 5],
       [8, 6, 2],
       [0, 9, 5]])

### Iterate by row:

In [59]:
for row in s:
    print(row)

[7 8 2]
[6 1 5]
[8 6 2]
[0 9 5]


### Iterate by index:

In [62]:
for i in range(len(s)):
    print("i:",i,s[i])

i: 0 [7 8 2]
i: 1 [6 1 5]
i: 2 [8 6 2]
i: 3 [0 9 5]


In [61]:
len(s)

4

### enumerate()
### return index and element


In [63]:
for i,n in enumerate(s):
    print(i,n)

0 [7 8 2]
1 [6 1 5]
2 [8 6 2]
3 [0 9 5]


### zip()

### Use zip to iterate over multiple iterables.

In [65]:
s

array([[7, 8, 2],
       [6, 1, 5],
       [8, 6, 2],
       [0, 9, 5]])

In [67]:
s2 = s**2
s2

array([[49, 64,  4],
       [36,  1, 25],
       [64, 36,  4],
       [ 0, 81, 25]], dtype=int32)

In [68]:
for r1, r2 in zip(s,s2):
    print(r1,"+",r2,"=",r1+r2)

[7 8 2] + [49 64  4] = [56 72  6]
[6 1 5] + [36  1 25] = [42  2 30]
[8 6 2] + [64 36  4] = [72 42  6]
[0 9 5] + [ 0 81 25] = [ 0 90 30]
