# Chapter 3: Built-in Data Structure, Functions and Files

Python's workhorse data-structures: tuples, lists, dicts and sets.

In [2]:
tup = 4, 5, 6

In [3]:
tup

(4, 5, 6)

### Lists

In [5]:
seq = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]

In [6]:
for a, b, c in seq:
    print(f'a={a}, b={b}, c={c}')

a=1, b=2, c=3
a=4, b=5, c=6
a=7, b=8, c=9


In [8]:
a = [7, 2, 5, 1, 3]

In [11]:
a.sort()

In [12]:
a

[1, 2, 3, 5, 7]

In [14]:
import bisect

In [19]:
b = [1, 2, 2, 2, 3, 4, 7]

In [20]:
bisect.bisect(b, 2)

4

In [21]:
bisect.bisect_left(b, 2)

1

In [23]:
some_list = ['foo', 'bar', 'baz']

In [25]:
print(enumerate(some_list))

<enumerate object at 0x7f123d166740>


### Zip

In [27]:
seq1 = ['foo', 'bar', 'baz']

In [28]:
seq2 = ['one', 'two', 'three']

In [30]:
zipped = zip(seq1, seq2)

In [31]:
list(zipped)

[('foo', 'one'), ('bar', 'two'), ('baz', 'three')]

In [32]:
for i, (a, b) in enumerate(zip(seq1, seq2)):
    print(f"{i}: {a}, {b}")

0: foo, one
1: bar, two
2: baz, three


In [2]:
pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens'), ('Schilling', 'Curt')]

In [6]:
first_names, last_names = zip(*pitchers)

In [7]:
first_names

('Nolan', 'Roger', 'Schilling')

### Unpacking

In [9]:
def add(x, y):
    return x + y

In [16]:
t = [1, 2]

In [17]:
add(t[0], t[1])

3

In [18]:
add(*t)

3

### dict

In [23]:
words = ['apple', 'bat', 'bar', 'atom', 'book']

In [24]:
by_letter = {}

In [26]:
for word in words:
    letter = word[0]
    if letter not in by_letter:
        by_letter[letter] = [word]
    else:
        by_letter[letter].append(word)

In [28]:
by_letter

{'a': ['apple', 'atom', 'apple', 'atom'],
 'b': ['bat', 'bar', 'book', 'bat', 'bar', 'book']}

### set

In [30]:
set([2, 2, 2, 1, 3, 3])

{1, 2, 3}

In [42]:
states =  ['    Alabama  ', '  Georgia!', '  Georgia', ' georgia', 'FlOrIda', 'south   carolina##', 'West virginia?']

In [43]:
result = [value.strip(' $!?#') for value in states]
result

['Alabama',
 'Georgia',
 'Georgia',
 'georgia',
 'FlOrIda',
 'south   carolina',
 'West virginia']

In [45]:
import re

In [50]:
def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

clean_ops = [str.strip, remove_punctuation, str.title]

def clean_strings(strings, ops):
    result = []
    for value in strings:
        for function in ops:
            value = function(value)
        result.append(value)
    return result

In [51]:
clean_strings(states, clean_ops)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [53]:
%timeit sum(x**2 for x in range(100))

19.5 µs ± 578 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [54]:
%timeit sum([x**2 for x in range(100)])

24.8 µs ± 1.1 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [59]:
def check_prime(number):    
    for divisor in range(2, int(number ** 0.5) + 1):        
        if number % divisor == 0:            
            return False    
        return True


In [None]:
class Primes:    
    def __init__(self, max):        
        self.max = max        
        self.number = 1

    def __iter__(self):        
        return self
    
     def __next__(self):        
        self.number += 1        
        if self.number >= self.max:            
            raise StopIteration        
        elif check_prime(self.number):            
            return self.number        
        else:            
            return self.__next__()
    

In [56]:
primes = (i for i in range(2, 100000000000) if check_prime(i))

In [60]:
for x in primes:
    print(x)

In [68]:
primes = (i for i in range(2, 1000) if check_prime(i))

## More on generators

In [70]:
def gen_nums():
    n = 0
    while n < 4:
        yield n
        n += 1

In [75]:
nums = gen_nums()

In [76]:
type(nums)

generator

In [77]:
for num in nums:
    print(num)

0
1
2
3


In [80]:
more_nums = gen_nums()

In [87]:
next(more_nums)

StopIteration: 

In [89]:
import math

In [99]:
math.comb(599, 2) / math.comb(600, 3)

0.005

In [94]:
math.comb(599, 3) / math.comb(600, 3)

0.995

In [95]:
0.995 * (math.comb(596, 2) / math.comb(597, 3))

0.005

In [98]:
(597/600)*(3/597)

0.005

## itertools module

In [1]:
import itertools 

In [2]:
first_letter = lambda x : x[0]

In [3]:
names = ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']

In [4]:
for letter, name in itertools.groupby(names, first_letter):
    print(letter, list(names))

A ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']
W ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']
A ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']
S ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']


## Files and operating system

In [5]:
import sys

In [6]:
sys.getdefaultencoding()

'utf-8'