## Notes for Python for Data Analysis 2nd Edition
## Chapter 3 Built-in Data Structure, Functions, and Files

# Tuples

In [5]:
# Tuples are immutable, don't need parenthesis, but can help
tup = 1, 2, 3
nested_tup = ((2, 3, 4),(7,8))
print(nested_tup)
print(nested_tup[1])
print(nested_tup[1][1])

((2, 3, 4), (7, 8))
(7, 8)
8


In [12]:
# Can convert sequnce into tuple with 'tuple'
stup = tuple('string')
print(stup)
# Cant modify tuple, but can modify objects within if mutable
# stup[1] = 'b' # fails

l_tup = ([1, 2, 3],[4, 5, 6])
l_tup[0][2] = 'a'
print(l_tup)
# l_tup[0] = [7, 8, 9] #fails

a_tup = stup + l_tup
print(a_tup)

('s', 't', 'r', 'i', 'n', 'g')
([1, 2, 'a'], [4, 5, 6])
('s', 't', 'r', 'i', 'n', 'g', [1, 2, 'a'], [4, 5, 6])


In [14]:
tup1 = (1, 3, 4)
tup2 = (5, 6, 7)
# Can't multiply tuple by tuple
# print(tup1 * tup2)

# But can multiply tuply by number
print(tup1 * 4)

(1, 3, 4, 1, 3, 4, 1, 3, 4, 1, 3, 4)


In [118]:
# You can unpack tuples
seq = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
for a, b, c in seq:
    print("a = {}, b = {}, c = {}".format(a, b, c,))
    
# Can use special '*' syntax to capture remaining
values = 1, 2, 3, 5
a, b, *erthin = values
print(a, b)
print(erthin)

# Tuples have methods 'index' and 'count'
print(values.index(2)) # Returns index of query
print(values.count(2)) # Returns occurences of query

a = 1, b = 2, c = 3
a = 4, b = 5, c = 6
a = 7, b = 8, c = 9
1 2
[3, 5]
1
1


# Lists

In [47]:
# Lists can be modified
lst = ['arf', 'asdf', 'af', 'fsf']
lst.append('cat')
lst.index('cat')
lst[4][0]
# Can't modify strings in lists
# lst[4][0] = 1 #fails

# Use 'insert' to add to particular location; 'insert' computationally
# expensive so use sparingly

lst.insert(2, 'fant')
lst

# pop removes an object from an index, can be stored in new object
one = lst.pop(1)
print(one)
print(lst)

# 'remove' removes the first occurence of a particular object
lst.append('af')
print(lst)
lst.remove('af')
print(lst)

'af' in lst
'af' not in lst
# Slower to check wheter an object in a list than dict or set
# Extend cheaper than concatenating lists bc you don't have to create a 
# new list.

# sort() method does not make a new list.  Can sort by length. sorted()
# fuction makes a copy
lst.sort(key=len)
lst

asdf
['arf', 'fant', 'af', 'fsf', 'cat']
['arf', 'fant', 'af', 'fsf', 'cat', 'af']
['arf', 'fant', 'fsf', 'cat', 'af']


['af', 'arf', 'fsf', 'cat', 'fant']

In [52]:
# bisect.bisect() returns the location an element should be inserted to 
# keep order; bisect.insort insertes the item; does not check if sorted
import bisect

bisect.insort(lst, 'arfff') # sorts alphabetically
lst

['af', 'arf', 'arfff', 'fsf', 'cat', 'fant']

In [78]:
# negative slices arn't 0 indexed; slices end exclusive

hello = list('hello!')
print(hello)
print(hello[:5])
# buuut
print(hello[5])
# print(hello[6]) # Breaks
print(hello[-6])

# -0 = 0
print(hello[-0])
print(hello[-6:6])
print(hello[::-1])
print(hello[::-2])


['h', 'e', 'l', 'l', 'o', '!']
['h', 'e', 'l', 'l', 'o']
!
h
h
['h', 'e', 'l', 'l', 'o', '!']
['!', 'o', 'l', 'l', 'e', 'h']
['!', 'l', 'e']


In [100]:
# Make a dict using enumerate
dct = {}
for i, value in enumerate(hello):
    dct[i] = value
for key in dct:
    print(key, dct[key])
    
# 'sorted' returns a new list, can take 'key' parameter

print(sorted(lst, key=len))

# zip returns a list of tuples; number of elements is determined by the 
# shorted sequence

seq1 = [a, b, c, d]
seq2 = [1, 2, 3, 4]
seq3 = True, False, True

print(list(zip(seq1, seq2, seq3)), end='\n\n')

for i, (a,b) in enumerate(zip(seq1, seq2)):
    print('{}: {}, {}'.format(i, a, b))
    
# reversed creates a generator
for x in reversed(lst):
    print(x)
print()
for x in lst[::-1]:
    print(x)

0 h
1 e
2 l
3 l
4 o
5 !
['af', 'arf', 'fsf', 'cat', 'fant', 'arfff']
[(5, 1, True), (4, 2, False), (9, 3, True)]

0: 5, 1
1: 4, 2
2: 9, 3
3: 5, 4
fant
cat
fsf
arfff
arf
af

fant
cat
fsf
arfff
arf
af


# Dict

In [2]:
# Can use del or pop to remove key-value pairs, either by key

dct = {'a' : 2,
       'b' : 3,
       'c' : 4,
       'd' : 5
}

print(dct)
del dct['b']
dct
print(dct.keys())
print(dct.values())

# Can merge two dcts using 'update'; will change location of existing keys
dct.update({'cat' : 'dog', '4' : True})

print(dct)

# A dict can accept a list of two tuples 
mapping = dict(zip(range(5), range(5)[::-1]))
for k in mapping:
    print(k, mapping[k])

{'a': 2, 'b': 3, 'c': 4, 'd': 5}
dict_keys(['a', 'c', 'd'])
dict_values([2, 4, 5])
{'a': 2, 'c': 4, 'd': 5, 'cat': 'dog', '4': True}
0 4
1 3
2 2
3 1
4 0


In [14]:
# .get() method can be used to search a dict and return a value or
# a default value.

print(mapping.get(6, "not found"))
print(mapping.get(3, "not found"))

# .setdefault method allso references of dict values not yet created 
# during iteration
animals = ["cat", "caraboo", "dog", "bear"]

by_letter = {}
for animal in animals:
    letter = animal[0]
    by_letter.setdefault(letter, []).append(animal)
print(by_letter)



not found
1


{'c': ['cat', 'caraboo'], 'd': ['dog'], 'b': ['bear']}

In [15]:
# Values in a dict must be immutable; can convert lists to tuples if
# they are needed

# Sets

In [29]:
# Elements in set unique, non-mutable

s1 = {"a", "B", "b", "c", "d"}
s2 = {"a", "B", "e", "f"}

print(s1 - s2) 
print(s1 | s2) # 'OR'
print(s1 ^ s2) # 'XOR'
print(s1 & s2) # 'AND' in both
print(s1 <= s2) # is s1 in s2
print(s1 >= s2) # is s2 in s1

print({1,2,3} == {3,1,2})
print([1,2,3] == [3,1,2])

{'d', 'b', 'c'}
{'b', 'd', 'e', 'a', 'B', 'f', 'c'}
{'b', 'e', 'd', 'f', 'c'}
{'B', 'a'}
False
False
True
False


# List Comprehension

In [66]:
animals = ["dog", "cat", "hippo", "horse", "bat"]
print([x for x in range(10) if x % 2 == 0])
print({ele for ele in animals if len(ele) == 3})
print({k: v for k,v in enumerate(animals) if v[0] <= "d"})

# Nested List comprehension

tups = [(1, 2, 3), (4, 5, 4), (9, 3, 10), (1, 2, 3)]
flatted = {digit for tup in tups for digit in tup}
flatted

[0, 2, 4, 6, 8]
{'dog', 'bat', 'cat'}
{0: 'dog', 1: 'cat', 4: 'bat'}


{1, 2, 3, 4, 5, 9, 10}

# Functions

In [87]:
# If function get to end without encountering return, returns 'None'
# Keyword arguments MUST follow positional arguements
# The 'global'keyword can be used to call global variables

a = 'cat'

def f():
    global a
    a = 'dog'
    return a
print(f())
print(a)

# Multiple variables are returned as a tuple
def fun():
    a = 1
    b = 2
    c = 3
    return a, b, c
print(fun())

# but you can change this 
def fun2():
    a = 1
    b = 2
    c = 3
    d = 1
    return {a, b, c, d}
print(fun2())
    
# .title capatlized the first word in every string
print("all the Good ones".title())

dog
dog
(1, 2, 3)
{1, 2, 3}
All The Good Ones


In [107]:
# You can treat functions like objects and put them in lists
t = ["ats", "foof", "fun"]
funcs = [str.capitalize, str.isupper]
for i in t:
    for func in funcs:
        print(func(i))
# the function 'map' performs a function on each ele in a list
for x in map(str.capitalize, t):
    print(x)


Ats

False

Foof

False

Fun

False

Ats
Foof
Fun


In [137]:
# lamdba functions
b = ["ats", "foof", "fun", "adfaasd", "sdfas"]
for x in map(lambda x: x * 2, t):
    print(x)
print(sorted(b, key=lambda x: set(list(x))))

# Currying is using a function as an arguement to a second function
def add(x, y):
    return x + y

# You can define a function like this??
five = lambda y: add("cat", y)
five("5")


atsats
fooffoof
funfun
['ats', 'foof', 'fun', 'adfaasd', 'sdfas']


NameError: name 'y' is not defined

In [4]:
string = "ATCG"
print(eval(string))

NameError: name 'ATCG' is not defined