# Python Fundamentals

## Types and Sequences

In [3]:
type("This is a string")

str

In [4]:
type(None)

NoneType

In [5]:
type(1)

int

In [6]:
type(1.0)

float

In [7]:
def add_numbers(x, y):
    return x + y

add_numbers(1,2)

3

In [8]:
type(add_numbers)

function

In [9]:
x = (1, "hi", 2, "hey")
type(x)

tuple

In [11]:
x = [1, "hi", 2, "hey"]
type(x)

list

In [12]:
x.append(3.3)
x

[1, 'hi', 2, 'hey', 3.3]

In [14]:
i = 0

while i < len(x):
    print(x[i])
    i += 1

1
hi
2
hey
3.3


In [15]:
[1,2] + [3,4]

[1, 2, 3, 4]

In [16]:
[1] * 3

[1, 1, 1]

In [17]:
# in operator
1 in [1,2,3]

True

In [18]:
# slicing
first = "Christopher" 
second = "Brooks"

print(first + second)
print(first * 3)
print("Chris" in first)

ChristopherBrooks
ChristopherChristopherChristopher
True


In [19]:
first = "Noah Thomsen Kawasaki".split(" ")[0]
last = "Noah Thomsen Kawasaki".split(" ")[-1]

print(first)
print(last)

Noah
Kawasaki


In [198]:
# dictionaries
dict_names = {"Noah Kawasaki":"ntkawasaki@gmail.com", 
     "Nic Kawasaki":"nickawasaki@salesforce.com"}
dict_names["Noah Kawasaki"]

'ntkawasaki@gmail.com'

In [199]:
# iterate keys
for name in dict_names.keys():
    print(name)

Noah Kawasaki
Nic Kawasaki


In [200]:
# iterate values
for email in dict_names.values():
    print(email)

ntkawasaki@gmail.com
nickawasaki@salesforce.com


In [201]:
# iterate keys and values
for name, email in dict_names.items():
    print(name + " " + email)

Noah Kawasaki ntkawasaki@gmail.com
Nic Kawasaki nickawasaki@salesforce.com


In [26]:
# unpacking a tuple
x = ("Noah", "Kawasaki", "ntkawasaki@gmail.com")
first, last, email = x  # must assign same number of values to unpack
print(x)

('Noah', 'Kawasaki', 'ntkawasaki@gmail.com')


## More on Strings

In [27]:
print("Noah" + 2)

TypeError: must be str, not int

In [202]:
print("Noah " + str(2))

Noah 2


In [32]:
# formatting language
sales_record = {"price":3.24,
                "num_items":4,
                "person":"Noah"}

sales_statement = "{} bought {} items for a total of ${}".format(sales_record["person"], 
                                                                sales_record["num_items"],
                                                                (sales_record["num_items"]*sales_record["price"]))
print(sales_statement)

Noah bought 4 items for a total of $12.96


## Reading and Writing CSV Files

In [204]:
import csv
%precision 2

abs_path = "/Users/noahkawasaki/Desktop/Programs/Python/Applied Data Science with Python Specialization/Introduction to Data Science in Python/downloads/mpg.csv"

with open (absolute_path) as csv_file:
    mpg = list(csv.DictReader(csv_file))
    
# first 3 dictionaries in list
mpg[:3]

[OrderedDict([('', '1'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '1.8'),
              ('year', '1999'),
              ('cyl', '4'),
              ('trans', 'auto(l5)'),
              ('drv', 'f'),
              ('cty', '18'),
              ('hwy', '29'),
              ('fl', 'p'),
              ('class', 'compact')]),
 OrderedDict([('', '2'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '1.8'),
              ('year', '1999'),
              ('cyl', '4'),
              ('trans', 'manual(m5)'),
              ('drv', 'f'),
              ('cty', '21'),
              ('hwy', '29'),
              ('fl', 'p'),
              ('class', 'compact')]),
 OrderedDict([('', '3'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '2'),
              ('year', '2008'),
              ('cyl', '4'),
              ('trans', 'manual(m6)'),
              ('drv',

In [34]:
# number of rows
len(mpg)

234

In [208]:
mpg[0].keys()
mpg[0].values()
mpg[0].items()

odict_items([('', '1'), ('manufacturer', 'audi'), ('model', 'a4'), ('displ', '1.8'), ('year', '1999'), ('cyl', '4'), ('trans', 'auto(l5)'), ('drv', 'f'), ('cty', '18'), ('hwy', '29'), ('fl', 'p'), ('class', 'compact')])

In [209]:
# average city mpg
# (must convert strings to float for math)
sum(float(row["cty"]) for row in mpg) / len(mpg)  # these are list comprehensions

16.86

In [210]:
# average highway mpg
sum(float(row["hwy"]) for row in mpg) / len(mpg)

23.44

In [211]:
# average city mpg, grouped by cylinders

# make set of cylinder types
# set(): Build an unordered collection of unique elements.
cylinders = set(row["cyl"] for row in mpg)
cylinders

{'4', '5', '6', '8'}

In [218]:
city_mpg_by_cyl = []

# iterate over each type of cylinder
for c in cylinders:
    sum_mpg = 0
    cylinder_count = 0
    
    # iterate over each row, if cyl equals current cylinder --> sum mpg and average over number of cyl count
    for row in mpg:
        if row["cyl"] == c:
            sum_mpg += float(row["cty"])
            cylinder_count += 1
        
    # append to list of tuples
    city_mpg_by_cyl.append((c, (sum_mpg/cylinder_count)))
    
# reorder by keys
city_mpg_by_cyl.sort(key=lambda x: x[0])
city_mpg_by_cyl
            
            

[('4', 21.01), ('5', 20.50), ('6', 16.22), ('8', 12.57)]

In [219]:
# average highway mpg by class of vehicle

vehicle_class = set(row["class"] for row in mpg)
vehicle_class

{'2seater', 'compact', 'midsize', 'minivan', 'pickup', 'subcompact', 'suv'}

In [220]:
hwy_mpg_by_class = []

# iterate over each type of vehicle
for v in vehicle_class:
    sum_mpg = 0
    class_count = 0
    
    # iterate over each row, if class equals current class --> sum mpg and average over number of class count
    for row in mpg:
        if row["class"] == v:
            sum_mpg += float(row["hwy"])
            class_count += 1
        
    # append to list of tuples
    hwy_mpg_by_class.append((v, (sum_mpg/cylinder_count)))
    
# reorder by values
hwy_mpg_by_class.sort(key=lambda x: x[1])
hwy_mpg_by_class

[('2seater', 1.53),
 ('minivan', 3.04),
 ('pickup', 6.88),
 ('subcompact', 12.16),
 ('midsize', 13.81),
 ('suv', 13.88),
 ('compact', 16.42)]

## Dates and Times

In [47]:
import datetime as dt
import time as tm

In [48]:
# current time in seconds from epoch
tm.time()

1514148140.07

In [50]:
dt_now = dt.datetime.fromtimestamp(tm.time())
dt_now

datetime.datetime(2017, 12, 24, 10, 43, 54, 46766)

In [51]:
# attributes
dt_now.year, dt_now.month, dt_now.day, dt_now.hour, dt_now.minute, dt_now.second

(2017, 12, 24, 10, 43, 54)

In [53]:
delta = dt.timedelta(days=100)
delta

datetime.timedelta(100)

In [56]:
today = dt.date.today()
today

datetime.date(2017, 12, 24)

In [57]:
today - delta

datetime.date(2017, 9, 15)

In [58]:
today > today - delta

True

## Objects and map()

In [60]:
class Person():
    department = "School of Information"
    
    def set_name(self, name):
        self.name = name
    
    def set_location(self, location):
        self.location = location

In [63]:
person = Person()
person.set_name("Noah")
person.set_location("Hawaii")

print("{} lives in {} and works for the department: {}".format(person.name, person.location, person.department))

Noah lives in Hawaii and works for the department: School of Information


In [64]:
# map() function
# dont use () for functions

# Imagine we have two list of numbers, maybe prices from two different stores on exactly the same items. 
# And we wanted to find the minimum that we would have to pay if we bought the cheaper item between the two stores. 
# To do this, we could iterate through each list, comparing items and choosing the cheapest. 

store_1 = [10.00, 11.00, 12.34, 2.34]
store_2 = [9.00, 11.10, 12.34, 2.01]

cheapest = map(min, store_1, store_2)

In [222]:
# returns map object (lazy evaluation)
cheapest

<map at 0x1122787f0>

In [68]:
# exercise
people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']

def split_title_and_name(person):
    title = person.split()[0]
    lastname = person.split()[-1]
    
    return '{} {}'.format(title, lastname)

# map() runs a function on every item in a list
list(map(split_title_and_name, people))

['Dr. Brooks', 'Dr. Collins-Thompson', 'Dr. Vydiswaran', 'Dr. Romero']

## Lambda and List Comprehensions

In [223]:
# anonymous functions that are simple/short-lived so its easier to write out the function in one line

# You declare a lambda function with the word lambda followed by a list of arguments, followed by a colon 
# and then a single expression. There's only one expression to be evaluated in a lambda. 
# The expression value is returned on execution of the lambda. 

In [226]:
my_function = lambda a, b, c : a + b

In [227]:
my_function(1, 2, 5)

3

In [90]:
# exercise: rewrite this function in a lambda expression
people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']

def split_title_and_name(person):
    return person.split()[0] + ' ' + person.split()[-1]


# option 1
for person in people:
    print(split_title_and_name(person) == (lambda x: x.split()[0] + " " + x.split()[-1])(person))  # pass x value this way?

# option 2
list(map(split_title_and_name, people)) == list(map(lambda person: person.split()[0] + " " + person.split()[-1], people))

True
True
True
True


True

In [93]:
# list comprehensions
my_list = []

# for loop version
for num in range(0,100):
    if num % 2 == 0:
        my_list.append(num)
        
my_list

[0,
 2,
 4,
 6,
 8,
 10,
 12,
 14,
 16,
 18,
 20,
 22,
 24,
 26,
 28,
 30,
 32,
 34,
 36,
 38,
 40,
 42,
 44,
 46,
 48,
 50,
 52,
 54,
 56,
 58,
 60,
 62,
 64,
 66,
 68,
 70,
 72,
 74,
 76,
 78,
 80,
 82,
 84,
 86,
 88,
 90,
 92,
 94,
 96,
 98]

In [94]:
# list comprehension version
# formula: value, for loop, condition
my_list = [num for num in range(0,100) if num % 2 == 0]
my_list

[0,
 2,
 4,
 6,
 8,
 10,
 12,
 14,
 16,
 18,
 20,
 22,
 24,
 26,
 28,
 30,
 32,
 34,
 36,
 38,
 40,
 42,
 44,
 46,
 48,
 50,
 52,
 54,
 56,
 58,
 60,
 62,
 64,
 66,
 68,
 70,
 72,
 74,
 76,
 78,
 80,
 82,
 84,
 86,
 88,
 90,
 92,
 94,
 96,
 98]

In [229]:
# exercise: convert to list comprehension

def times_tables():
    lst = []
    for i in range(10):
        for j in range (10):
            lst.append(i*j)
    return lst

# solution
times_tables() == [i*j for i in range(10) for j in range(10)]

True

In [99]:
# exercise: write list comprehension for all combos of two letters/two numbers

lowercase = 'abcdefghijklmnopqrstuvwxyz'
digits = '0123456789'

answer = [(l1 + l2 + d1 + d2) for l1 in lowercase for l2 in lowercase for d1 in digits for d2 in digits]
answer

['aa00',
 'aa01',
 'aa02',
 'aa03',
 'aa04',
 'aa05',
 'aa06',
 'aa07',
 'aa08',
 'aa09',
 'aa10',
 'aa11',
 'aa12',
 'aa13',
 'aa14',
 'aa15',
 'aa16',
 'aa17',
 'aa18',
 'aa19',
 'aa20',
 'aa21',
 'aa22',
 'aa23',
 'aa24',
 'aa25',
 'aa26',
 'aa27',
 'aa28',
 'aa29',
 'aa30',
 'aa31',
 'aa32',
 'aa33',
 'aa34',
 'aa35',
 'aa36',
 'aa37',
 'aa38',
 'aa39',
 'aa40',
 'aa41',
 'aa42',
 'aa43',
 'aa44',
 'aa45',
 'aa46',
 'aa47',
 'aa48',
 'aa49',
 'aa50',
 'aa51',
 'aa52',
 'aa53',
 'aa54',
 'aa55',
 'aa56',
 'aa57',
 'aa58',
 'aa59',
 'aa60',
 'aa61',
 'aa62',
 'aa63',
 'aa64',
 'aa65',
 'aa66',
 'aa67',
 'aa68',
 'aa69',
 'aa70',
 'aa71',
 'aa72',
 'aa73',
 'aa74',
 'aa75',
 'aa76',
 'aa77',
 'aa78',
 'aa79',
 'aa80',
 'aa81',
 'aa82',
 'aa83',
 'aa84',
 'aa85',
 'aa86',
 'aa87',
 'aa88',
 'aa89',
 'aa90',
 'aa91',
 'aa92',
 'aa93',
 'aa94',
 'aa95',
 'aa96',
 'aa97',
 'aa98',
 'aa99',
 'ab00',
 'ab01',
 'ab02',
 'ab03',
 'ab04',
 'ab05',
 'ab06',
 'ab07',
 'ab08',
 'ab09',
 'ab10',
 

## NumPy
Fundamental package for scientific computing

In [230]:
import numpy as np

In [231]:
# create array
my_list = [1, 2, 3]
x = np.array(my_list)
x

array([1, 2, 3])

In [103]:
# put list directly into np.array()
y = np.array([4, 5, 6])
y

array([4, 5, 6])

In [105]:
# two dimensional array
m = np.array([[7, 8, 9], [10, 11, 12]])
m

array([[ 7,  8,  9],
       [10, 11, 12]])

In [107]:
# shape of array in dimensions
m.shape

(2, 3)

In [109]:
n = np.arange(0, 30, 2)
n

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28])

In [233]:
# temporary reshaping for output
n.reshape(3, 5)

array([[ 0,  2,  4,  6,  8],
       [10, 12, 14, 16, 18],
       [20, 22, 24, 26, 28]])

In [112]:
# over an interval
o = np.linspace(0, 4, 9)
o

array([ 0. ,  0.5,  1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ])

In [235]:
# resize() changes the shape of the array permanently
o.resize(3, 3)
o

array([[ 0. ,  0.5,  1. ],
       [ 1.5,  2. ,  2.5],
       [ 3. ,  3.5,  4. ]])

In [117]:
np.ones((3, 2))

array([[ 1.,  1.],
       [ 1.,  1.],
       [ 1.,  1.]])

In [118]:
np.zeros((2,3))

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [121]:
np.eye(3)

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [236]:
y

array([4, 5, 6])

In [237]:
np.diag(y)

array([[4, 0, 0],
       [0, 5, 0],
       [0, 0, 6]])

In [238]:
np.array([1, 2, 3] * 3)

array([1, 2, 3, 1, 2, 3, 1, 2, 3])

In [239]:
np.repeat([1, 2, 3], 3)

array([1, 1, 1, 2, 2, 2, 3, 3, 3])

In [126]:
# stacking arrays
p = np.ones([2, 3], int)
p

array([[1, 1, 1],
       [1, 1, 1]])

In [127]:
np.vstack([p, p*2])

array([[1, 1, 1],
       [1, 1, 1],
       [2, 2, 2],
       [2, 2, 2]])

In [128]:
np.hstack([p, p*2])

array([[1, 1, 1, 2, 2, 2],
       [1, 1, 1, 2, 2, 2]])

### Operations

In [240]:
x

array([1, 2, 3])

In [241]:
y

array([4, 5, 6])

In [129]:
x + y

array([5, 7, 9])

In [130]:
x * y

array([ 4, 10, 18])

In [131]:
x ** 2

array([1, 4, 9])

In [132]:
# summation of xi*yi
x.dot(y)

32

In [140]:
z = np.array([y, y**2])
z

array([[ 4,  5,  6],
       [16, 25, 36]])

In [141]:
z.shape

(2, 3)

In [142]:
# transpose
z.T.shape

(3, 2)

In [146]:
# datatype attribute
z.dtype

dtype('int64')

In [242]:
z = z.astype("float")
z.dtype
print(z)

[[  4.   5.   6.]
 [ 16.  25.  36.]]


In [148]:
a = np.array([-4, -2, 1, 3, 5])
a

array([-4, -2,  1,  3,  5])

In [243]:
# aggregate functions

In [155]:
a.sum()

3

In [156]:
a.max()

5

In [157]:
a.min()

-4

In [158]:
a.mean()

0.60

In [159]:
a.std()

3.26

In [160]:
# index of highest value
a.argmax()

4

In [161]:
# index of lowest value
a.argmin()

0

### Indexing and Slicing

In [163]:
s = np.arange(13) ** 2
s

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100, 121, 144])

In [164]:
s[0], s[4], s[:3]

(0, 16, array([0, 1, 4]))

In [165]:
s[1:5]

array([ 1,  4,  9, 16])

In [167]:
s[-4:]

array([ 81, 100, 121, 144])

In [168]:
# reverse sliced
s[-5::-2]

array([64, 36, 16,  4,  0])

In [170]:
# slicing two dimensional arrays
r = np.arange(36)
r.resize((6,6))
r

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

In [171]:
r[2, 2]

14

In [172]:
r[3, 3:6]

array([21, 22, 23])

In [173]:
r[:2, :-1]

array([[ 0,  1,  2,  3,  4],
       [ 6,  7,  8,  9, 10]])

In [174]:
r[-1:, ::2]

array([[30, 32, 34]])

In [175]:
# conditions
r[r > 30]

array([31, 32, 33, 34, 35])

In [177]:
# set max to 30
r[r > 30] = 30
r

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

In [179]:
r2 = r[:3, :3]
r2

array([[ 0,  1,  2],
       [ 6,  7,  8],
       [12, 13, 14]])

In [180]:
r2[:] = 0
r2

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [181]:
# remember that arrays referencing other arrays affect the original array
r

array([[ 0,  0,  0,  3,  4,  5],
       [ 0,  0,  0,  9, 10, 11],
       [ 0,  0,  0, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

In [182]:
# to make a copy
r_copy = r.copy()
r_copy

array([[ 0,  0,  0,  3,  4,  5],
       [ 0,  0,  0,  9, 10, 11],
       [ 0,  0,  0, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 30, 30, 30, 30, 30]])

In [183]:
# now r wont be affected
r_copy[:] = 10
print(r_copy)
print()
print(r)

[[10 10 10 10 10 10]
 [10 10 10 10 10 10]
 [10 10 10 10 10 10]
 [10 10 10 10 10 10]
 [10 10 10 10 10 10]
 [10 10 10 10 10 10]]

[[ 0  0  0  3  4  5]
 [ 0  0  0  9 10 11]
 [ 0  0  0 15 16 17]
 [18 19 20 21 22 23]
 [24 25 26 27 28 29]
 [30 30 30 30 30 30]]


In [184]:
# exercise: what will the output be?
old = np.array([[1, 1, 1],
                [1, 1, 1]])

new = old
new[0, :2] = 0

# ([0, 0, 1],
#  [1, 1, 1])

print(old)

[[0 0 1]
 [1 1 1]]


### Iterating over Arrays

In [244]:
test = np.random.randint(low=0, high=10, size=(4, 3))
test

array([[9, 6, 0],
       [5, 2, 7],
       [4, 1, 2],
       [5, 6, 3]])

In [191]:
# value
for row in test:
    print(row)

[2 6 0]
[1 8 6]
[1 4 9]
[9 5 3]


In [188]:
# indices
for i in range(len(test)):
    print(test[i])

[2 6 0]
[1 8 6]
[1 4 9]
[9 5 3]


In [189]:
# both
for i, row in enumerate(test):
    print("row", i, "is", row)

row 0 is [2 6 0]
row 1 is [1 8 6]
row 2 is [1 4 9]
row 3 is [9 5 3]


In [192]:
test_2 = test**2
test_2

array([[ 4, 36,  0],
       [ 1, 64, 36],
       [ 1, 16, 81],
       [81, 25,  9]])

In [245]:
# zip(): Make an iterator that aggregates elements from each of the iterables.
zip(test, test_2)

<zip at 0x113da3748>

In [246]:
for i, j in zip(test, test_2):
    print(i, "+", j, "=", i + j)

[9 6 0] + [ 4 36  0] = [13 42  0]
[5 2 7] + [ 1 64 36] = [ 6 66 43]
[4 1 2] + [ 1 16 81] = [ 5 17 83]
[5 6 3] + [81 25  9] = [86 31 12]
