#Functions

In [1]:
x = 1
y = 2
x + y

3

In [2]:
# python interpreter is stateful: variables stay in memory
x

1

In [4]:
# define a function
def add_numbers(x, y):
    return x + y
print(add_numbers(1, 2))

3


In [6]:
# add default value, always at the end
def add_numbers(x ,y, z=None):
    if (z == None):
        return x + y
    else:
        return x + y + z
print(add_numbers(1 , 2))
print(add_numbers(1 , 2, 3))

3
6


In [8]:
# we can assign a function to a variable
a = add_numbers
a(1, 2)

3

In [11]:
def do_math(a, b, kind = 'add'):
    if (kind == 'add'):
        return a + b
    else:
        return a - b

do_math(1, 2)

3

#Types and Sequences

In [1]:
# the type function returns the type of a variable
type("string")

str

In [2]:
type(None)

NoneType

In [3]:
type(1)

int

In [4]:
type(1.0)

float

In [7]:
type(add_numbers)

function

In [8]:
# define a tuple. Values cannot be modified
x=(1, 'a', 2, 'b')
type(x)

tuple

In [18]:
# defie a list. Elements can be added, removed, changed
x=[1, 'a', 2, 'b']
type(x)

list

In [19]:
x.append(3.3)
for item in x:
    print(item)

1
a
2
b
3.3


In [21]:
i = 0
while (i != len(x)):
    print(x[i])
    i = i + 1

1
a
2
b
3.3


In [25]:
# + concatenates 2 lists
# in looks up a value in a list
3 in ([1, 2] + [3, 4])

True

In [43]:
# lists can be sliced. Note: strings are just lists of characters
x = "This is a string"
print(x[0:4])
print(x[-11:-6])
print(x[-6:])

This
is a 
string


In [49]:
x = 'Dr. Christopher Brooks'
x[4:15]

'Christopher'

In [4]:
firstname='Christopher'
lastname='Brooks'
print(firstname + ' ' + lastname)
print(3* firstname) # repeat the elements
print('Chris' in firstname)
print((firstname + ' ' + lastname).split(' ')[0]) # split

Christopher Brooks
ChristopherChristopherChristopher
True


In [22]:
# dictionary
x={"key1":"value1", "key2":"value2"}
x["key3"] = None
print(x)
for item in x:
    print(item + ' - ' + str(x[item]))
print(x.values())

{'key1': 'value1', 'key2': 'value2', 'key3': None}
key1 - value1
key2 - value2
key3 - None
dict_values(['value1', 'value2', None])


In [23]:
# iterate through both keys and values using the items() function
for k,v in x.items():
    print(k + ' - ' + str(v))

key1 - value1
key2 - value2
key3 - None


In [29]:
# unpacking
x1, x2 = [1, 2]
print(x1)

1


#More on Strings

In [1]:
sales_record = {'price': 3.24, 'num_items': 4}
s = '{} items bought at {}'
print(s.format(sales_record['num_items'], sales_record['price']))

4 items bought at 3.24


#Reading and Writing csv files

In [1]:
import csv
import os

%precision 2

with open(os.path.dirname(os.path.realpath('__file__')) + '\Introduction to Data Science in Python\week1\data\mpg.csv') as csvfile:
    mpg = list(csv.DictReader(csvfile))
    
mpg[:1] # The first dictionary in our list.

[OrderedDict([('', '1'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '1.8'),
              ('year', '1999'),
              ('cyl', '4'),
              ('trans', 'auto(l5)'),
              ('drv', 'f'),
              ('cty', '18'),
              ('hwy', '29'),
              ('fl', 'p'),
              ('class', 'compact')])]

In [17]:
# mpg is a list of dictionaries
print(len(mpg))
print(mpg[0].keys())
print(mpg[0]['year'])

234
odict_keys(['', 'manufacturer', 'model', 'displ', 'year', 'cyl', 'trans', 'drv', 'cty', 'hwy', 'fl', 'class'])
1999


In [19]:
sum(float(d['hwy']) for d in mpg) / len(mpg)

23.44

In [34]:
# find the average cty by cylinders category
cylinders = set(d['cyl'] for d in mpg)  # create a set of unique cylinder values 
CtyMpgByCyl = []
for c in cylinders:
    summpg = 0
    cyltypecount = 0
    for d in mpg:
        if(d['cyl']==c):
            summpg += float(d['cty'])
            cyltypecount = cyltypecount + 1
    CtyMpgByCyl.append((c, summpg/cyltypecount))
    CtyMpgByCyl.sort(key=lambda x: x[0]) # sort by descending values. Replace by x[1] for ascending sort 
CtyMpgByCyl

[('4', 21.01), ('5', 20.50), ('6', 16.22), ('8', 12.57)]

# Python dates and times

In [38]:
import datetime as dt
import time as tm

# get current time since the epoch: January 1st 1970
print(tm.time())

dtnow = dt.datetime.fromtimestamp(tm.time())
print(dtnow)
dtnow

1500341554.3436482
2017-07-17 21:32:34.343648


datetime.datetime(2017, 7, 17, 21, 32, 34, 343648)

In [39]:
dtnow.year, dtnow.month, dtnow.day

(2017, 7, 17)

In [41]:
delta = dt.timedelta(days=100)
delta

datetime.timedelta(100)

In [42]:
today = dt.date.today()
today - delta

datetime.date(2017, 4, 8)

#Advanced Python Objects, map()

In [43]:
class Person:
    department = 'School of Information'    # default
    def set_name(self,new_name):
        self.name=new_name
    def set_location(self, new_location):
        self.location=new_location

In [48]:
c = Person()
c.set_name('Tom')
print(c.name + ', ' + c.department)

Tom, School of Information


In [56]:
# the map function map(f, i) applies a function i to an iterable i
def square(x):
    return x*x
print(map(square,[1,2]))
# lazy evaluation. Does not perform the calculation until we specifically ask for a value
# allows more efficient memory management

<map object at 0x00000255F4A00B38>


In [64]:
people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']

def split_title_and_name(person):
    v = person.split(' ')
    return v[0] + ' ' + v[-1]

list(map(split_title_and_name, people))

['Dr. Brooks', 'Dr. Collins-Thompson', 'Dr. Vydiswaran', 'Dr. Romero']

#Lambdas and list comprehensions

In [81]:
# lambdas are short lived functions that have no name
my_function = lambda a, b, c: a + b
my_function(1, 2, 4)

3

In [89]:
people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']

def split_title_and_name(person):
    return person.split()[0] + ' ' + person.split()[-1]

#option 1
for person in people:
  print(split_title_and_name(person) == (lambda x:x.split()[0] + ' ' + x.split()[-1])(person))

#option 2
list(map(split_title_and_name, people)) == list(map(lambda person:person.split()[0] + ' ' + person.split()[-1], people))

True
True
True
True


True

In [93]:
# list comprehensions
my_list = [n for n in range(0,10) if n%2==0]
my_list

[0, 2, 4, 6, 8]

In [94]:
[i*j for i,j in range(10,10)]

[]

In [95]:
def times_tables():
    lst = []
    for i in range(10):
        for j in range (10):
            lst.append(i*j)
    return lst

times_tables() == [j*i for i in range(10) for j in range(10)]

True

In [100]:
lowercase = 'abcdefghijklmnopqrstuvwxyz'
digits = '0123456789'
# return all posiible combinations as a list comprehension
print(len([s + t + i + j for s in lowercase for t in lowercase for i in digits for j in digits]))

67600


#The Numerical Python Library (NumPy)

In [2]:
import numpy as np

##Arrays

In [47]:
mylist = [1, 2, 3]
x = np.array(mylist)
x

array([1, 2, 3])

In [36]:
y = np.array([4, 5, 6])
y

array([4, 5, 6])

In [11]:
m = np.array([[1, 2, 3], [4, 5, 6]])  # create a matrix by passing a list of lists
m.shape

(2, 3)

In [18]:
n = np.arange(0,30,2) # start, stop, stepsize
print(n)
n.reshape(3,5)

[ 0  2  4  6  8 10 12 14 16 18 20 22 24 26 28]


array([[ 0,  2,  4,  6,  8],
       [10, 12, 14, 16, 18],
       [20, 22, 24, 26, 28]])

In [24]:
o = np.linspace(0, 4, 9) # start, end, nbsteps
print(o)

[ 0.   0.5  1.   1.5  2.   2.5  3.   3.5  4. ]


In [26]:
o.resize(3,3)
o

array([[ 0. ,  0.5,  1. ],
       [ 1.5,  2. ,  2.5],
       [ 3. ,  3.5,  4. ]])

In [25]:
o

array([ 0. ,  0.5,  1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ])

In [30]:
print(np.ones((2, 3)))
print(np.zeros((2,3)))

[[ 1.  1.  1.]
 [ 1.  1.  1.]]
[[ 0.  0.  0.]
 [ 0.  0.  0.]]


In [32]:
np.eye(3) # identity matrix

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [37]:
np.diag(y)

array([[4, 0, 0],
       [0, 5, 0],
       [0, 0, 6]])

In [40]:
# create an array with repeated values
print(np.array([1, 2, 3] * 3))
print(np.repeat([1, 2, 3], 3))

[1 2 3 1 2 3 1 2 3]
[1 1 1 2 2 2 3 3 3]


In [42]:
# combine arrays
p = np.ones([2, 3], int)
p

array([[1, 1, 1],
       [1, 1, 1]])

In [44]:
np.vstack([p, 2*p])

array([[1, 1, 1],
       [1, 1, 1],
       [2, 2, 2],
       [2, 2, 2]])

In [45]:
np.hstack([p, 2*p])

array([[1, 1, 1, 2, 2, 2],
       [1, 1, 1, 2, 2, 2]])

##Operations

In [48]:
x+y

array([5, 7, 9])

In [50]:
x*y # element-wise multiplication

array([ 4, 10, 18])

In [51]:
x**2

array([1, 4, 9])

In [53]:
x.dot(y)    # dot product

32

In [56]:
z = np.array([y, y**2]) # 2 by 3 array
z

array([[ 4,  5,  6],
       [16, 25, 36]])

In [57]:
z.T # transpose

array([[ 4, 16],
       [ 5, 25],
       [ 6, 36]])

In [58]:
z.dtype

dtype('int32')

In [64]:
z = z.astype('f')
print(z)
z.dtype

[[  4.   5.   6.]
 [ 16.  25.  36.]]


dtype('float32')

In [59]:
n = np.array([-4, -2, 1, 3, 5])

In [66]:
print(n.sum())
print(n.min())
print(n.max())
print(n.mean())
print(n.std())

3
-4
5
0.6
3.26190128606


In [71]:
# find the index of the max
print(n.argmin())
print(n.argmax())

0
4


##Indexing and Slicing

In [74]:
s = np.arange(13)**2
s

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100, 121, 144])

In [76]:
s[0],s[4],s[0:3]

(0, 16, array([0, 1, 4]))

In [77]:
s[-4:]

array([ 81, 100, 121, 144])

In [79]:
s[-5::-2] #start at 5 from the end and count backwards by -2

array([64, 36, 16,  4,  0])

In [82]:
# extension to multidimensional arrays
r = np.arange(36).reshape(6,6)
r

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

In [83]:
r[3,3:6] #3rd row, columns 3 to 6

array([21, 22, 23])

In [86]:
r[:2,:-1] # get the first 2 rows, and all but the last columns

array([[ 0,  1,  2,  3,  4],
       [ 6,  7,  8,  9, 10]])

In [87]:
r[-1,::2]

array([30, 32, 34])

In [88]:
r>30

array([[False, False, False, False, False, False],
       [False, False, False, False, False, False],
       [False, False, False, False, False, False],
       [False, False, False, False, False, False],
       [False, False, False, False, False, False],
       [False,  True,  True,  True,  True,  True]], dtype=bool)

In [89]:
r[r>30]

array([31, 32, 33, 34, 35])

In [91]:
r[r>30] = 30 #cap all values to 30
r

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

In [94]:
r2 = r[:3, :3]
r2

array([[ 0,  1,  2],
       [ 6,  7,  8],
       [12, 13, 14]])

In [96]:
r2[:] = 0 # set all the elements of the array to 0
r2

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [97]:
# Important: note that the values in r also changed
r

array([[ 0,  0,  0,  3,  4,  5],
       [ 0,  0,  0,  9, 10, 11],
       [ 0,  0,  0, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

In [99]:
# to create a copy of the array that will not change, we can use .copy()
r_copy = r.copy()
r_copy

array([[ 0,  0,  0,  3,  4,  5],
       [ 0,  0,  0,  9, 10, 11],
       [ 0,  0,  0, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

In [101]:
r_copy[:]  = 10
r

array([[ 0,  0,  0,  3,  4,  5],
       [ 0,  0,  0,  9, 10, 11],
       [ 0,  0,  0, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

##Iterate over arrays

In [104]:
test = np.random.randint(0, 10, (4,3))
test

array([[6, 1, 7],
       [1, 0, 7],
       [4, 5, 7],
       [8, 8, 5]])

In [105]:
for row in test:
    print(row)

[6 1 7]
[1 0 7]
[4 5 7]
[8 8 5]


In [106]:
for i in range(len(test)):
    print(test[i])

[6 1 7]
[1 0 7]
[4 5 7]
[8 8 5]


In [107]:
for i,row in enumerate(test):
    print('row',i,'is',row)

row 0 is [6 1 7]
row 1 is [1 0 7]
row 2 is [4 5 7]
row 3 is [8 8 5]


In [109]:
test2 = test**2
test2

array([[36,  1, 49],
       [ 1,  0, 49],
       [16, 25, 49],
       [64, 64, 25]])

In [111]:
# iterate over both arrays using zip
for i,j in zip(test,test2):
    print(i,'+',j,'=',i+j)

[6 1 7] + [36  1 49] = [42  2 56]
[1 0 7] + [ 1  0 49] = [ 2  0 56]
[4 5 7] + [16 25 49] = [20 30 56]
[8 8 5] + [64 64 25] = [72 72 30]
