In [None]:
import collections

#constructed a simple class representing individual caards using namedtuple
#collections.namedtuple can be used to build classes of objects that are bundles of attributes with no custommethods
#like a database record
Card = collections.namedtuple('Card', ['rank', 'suit'])

class FrenchDeck:
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = 'spades diamonds clubs hearts'.split()
    
    def __init__(self):
        self._cards = [Card(rank, suit) for suit in self.suits
                                          for rank in self.ranks]
        
    def __len__(self):
        return len(self._cards)
    
    #this function delegates to the []  operator of self._cards and as such our deck class that wraps it also automatically
    #supports slicing
    def __getitem__(self, position): 
        return self._cards[position]

In [None]:
deck = FrenchDeck()
from random import choice

choice(deck)

In [None]:
deck[0::13] #starts at the 0 card and skips 13 at a time

In [None]:
deck[0::4] #stards at the first card and skips 4 at a time


# Note
By implmementing the __getitem__ special method it also makes it so that our deck is iterable, and it can be iterated in reverse
namely for card in deck: and for card in reversed(deck): both work

if a collection has no __contains__ method the in operator does a sequential scan

# Note
Special methods are called by the python interpreter and not us. we dont do my_object.__len__() we do len(my_object) and if my_object is a user defined class than python will call the len method we implemented.

For built in types like list, str, byte array the interpreter returns the value of the ob_size field in the pyvarobject C struct that represents any variable sized built in object in memory. Namely the variable sized built in objects in python are stored in C structs. This ends up faster call than calling a method.

Only special method frequently called by user code is __init__ which invokes the initializer of the super class in our own __init__ implementation.

## Stripping alpha numeric characters


In [None]:
string = "alphanumeric@123432843248932.dsfadsf980u342;"
string_alnumway = ''.join(ch for ch in string if ch.isalnum())
print(string_alnumway)

In [None]:
string = "alphanumeric@123432843248932.dsfadsf980u342;"
string_filter_way = ''.join(filter(str.isalnum, string))
print(string_filter_way)

In [None]:
import re

string = "alphanumeric@123432843248932.dsfadsf980u342;"
s = re.sub(r'[^a-zA-Z0-9]', '', string)
print(s)

## Exceptions

In [None]:
nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

try:
    print("Printing index of number that is not in the list {}".format(nums.index(43)))
except ValueError:
    print("whoops not a real number in the nums list")

# Booleans

By default instances of user defined classes are considered truthy unless either of __ bool __ or __ len __ is implemented. Namley bool(x) calls x.__ bool __() and uses the result so if bool isnt implemented it calls x.__len __() and if that returns zero bool returns false otherwise it returns true. Otherwise it returns the result of bool. 

When python tries to determine whether x is true or false it just applies bool(x)

# __rep__
__ rep __ is used to get the string representation o an object for expression. applies to the %r operator. __ st r__ is called by the str() constructor and is implicitly used by the print fucntion

# Chapter 2: Sequences

### Types of Sequences
#### Container Sequences
    -list, tuple, and collections.deque which can all hold items of different types
    -hold references ot the objects they contain which may be of any type
#### Flat Sequences:
    -str, bytes, bytearray, memoryview, and array.array hold items of one type
    -physically store the value of each item within its own memory space and not as distinct objects
    -Are more compact but are limited to hbolding primitive values like characters, bytes, and numbers
### Alternative Way of Grouping
#### Mutable Sequences:
    -list, bytearray, array.array, collections.deque, and memoryview
#### Immutable Sequences:
    -tuple, str, and bytes

In [None]:
#List comprehensions are used to generate new lists 
symbols = '%$#%$#%#@$*&%'
codes = [ord(symbol) for symbol in symbols]
print(codes)
#if a list comprehension spnas more than two lines it is best to break it apart or rewrite it as a plain for loop
#in python3 list copmrehensions have their own scope so one can even oed something like and get the same result
codes = [ord(symbols) for symbols in symbols]
print(codes)
#list comprehensions are used to build lists from sequences or any other iterable type by filtering and transforming itmsm

In [None]:
#creating cartesian products with list comprehbensions
colors = ['black', 'white']
sizes = ['S', 'M', 'L']

tshirts = [(color, size) for color in colors for size in sizes]
print(tshirts)
tshirts = [(color, size) for color in colors
          for size in sizes] #for better readability
print(tshirts)
#if i want it to be arranged by size then color we cna do. however it still iterates over color first. 
tshirts = [(size, color) for color in colors
          for size in sizes]
print(tshirts)

#to arrange it by size and color but also iterate over size first do 
tshirts = [(size, color) for size in sizes
          for color in colors]
print(tshirts)

In [None]:
#generator expressions
#generators save memory because they yield items one by one using iterator protocol rather than building a whole list just to feed anotehr
#constructor 
#same syntax as listcomps but use parentheses rather than brackets
import array
symbols = '%$#@$#@$#@'
tuple(ord(symbol) for symbol in symbols)
array.array('I', (ord(symbol) for symbol in symbols))

In [None]:
colors = ['black', 'white']
sizes = ['S', 'M', 'L']

for tshirt in ('%s %s' % (c, s) for c in colors for s in sizes):
    print(tshirt)
    
#this is the generator way of doing the cartesian product as opposed to the list comprehension. The advantage of this way is that
#the items are yielded one by one whereas in the list comprehehnsion way a list with all 6 t shirts variation is produced. if teh lists get larger
# this would save a ton of memory for a 1000 x 1000 cartesian product as the list comp would do 1000 * 1000 list creation

In [None]:
#tuples are not immutable lists necessarily. they can be used as those but they can also be used as records iwth no field names
#each item in the tuple holds data for one field and the position gives it meaning
#when using a tuple as a collection of fields the number of itemsis often fixed and their order is always vital

In [None]:
lax_coordinates = (33.9425, -118.408056)
city, year, pop, chg, area = ('Tokyo', 2003, 32450, 0.66, 8014)
traveler_ids = [('USA', '31195855'), ('BRA', 'CE342567'), ('ESP', 'XDA205856')]

for passport in sorted(traveler_ids):
    print('%s/%s' % passport) #as you can see the string formatting recognizes passport as a tuple and reads the fields appropriately
    
for country, _ in traveler_ids: #still can iterate over and grab every field itself from a tuple _ is a dummy variable as we are not interested in it
    print(country) 

In [None]:
#tuple unpacking
#parallel assignment: assigning items from an iterable to a tuple of variables
lax_coordinates = (33.9425, -118.408056)
lat, long = lax_coordinates
print("{} {}".format(lat, long))
#can swap values without using temp variable
lat, long = long, lat
print("{} {}".format(lat, long))

In [None]:
#can unpack a tuple in a function call by prefixing the argument with a star
print(divmod(20, 8))
t = (20, 8)
print(divmod(*t))

In [None]:
#os.path.split() is an example of tuple unpacking as what it does is when it atkes in a path is split into a tuple 
#like (path, file_name)

import os
path, filename = os.path.split('/home/luciano/.ssh/idrsa.pub')
print(filename)
print(path)

In [None]:
#defining function params with *args to grab arbitrary excess arguments is a classic python feature
#py3 extens that concept to parallel assignment as well

i, j, *rest = range(10)
print("{} {} {}".format(i, j, rest))
i, j,  *rest = range(3)
print("{} {} {}".format(i, j, rest))

In [None]:
#unpacking nested tuples
metro_areas = [
    ('Tokyo', 'JP', 36.933, (35.689722, 139.691667)),
    ('Dehli NCR', 'IN', 21.935, (28.613889, 77.208889)),
    ('Mexico City', 'MX', 20.142, (19.43333, -99.13333))
]

print('{:15} | {:^9} | {:^9}'.format('', 'lat.', 'long.'))
fmt = '{:15} | {:^9} | {:^9}'

for name, cc, pop, (latitude, longitude) in metro_areas:
    print(fmt.format(name, latitude, longitude))

In [None]:
#collections.namedtuple function is a factory that produces subclasses of tuple enhanced with field names and a class name

from collections import namedtuple

#params necessary for named tuple are class name and a list of field names (which should be iterble of strings or a single space
#delimited string)
City = namedtuple('City', 'name country population coordinates')
tokyo = City('Tokyo', 'JP', 36.933, (35.689, 139.691)) #data passed as positional arguments ot the constructor
print(tokyo)
print(tokyo.population)
print(tokyo.coordinates)
print(tokyo._fields)
dehli_data = ('Delhi NCR', 'IN', 21.935, (28.1613, 77.208))
deli = City._make(dehli_data)
print(deli._asdict())

#  Tuple as Immutable Lists
### When using a tuple as an immutable variation of a list its important to know that a tuple supports all methods of a list that do not involve adding or removing items except for the __reversed__ method. reversed(my_tuple) works

# Slicing

In [None]:
# Last item in a slice is excluded as it works well with zero based index.
#makes it easier to split a sequence in two parts at an index x. can do my_list[:x] and mylist[x:]
#s[a:b:c] can be used to specify a stride or step c causing the resulting slice to skip items.
#stride can also be negative returning items in reverse

s = 'cat in the hat'
print(s[::3] + ": " + s[::-1] + ": " + s[::2] + ": " + s[::-2])

In [None]:
#the notation of a:b:c is only valid within [] when used as the indeing or subscript operator, and it produces a slice object

In [None]:
#the [] operator can also take multiple indexes or slices separated by commas 
#ie in numpy items of a 2 dimensional numpy.ndarray can be fetched using syntax a[i, j] and a 2d slice can be obtained with 
#a[m:n, k:l]
#__get_item__ and __set_item__ are what handle the [] operator and they simply receive the indices as a tuple.
#ie a[i, j] == a.__get_item__((i, j))
#wont work with built in sequence types in python as they are one dimensional and they support only one index or slice, not a tuple
#of htem

In [None]:
#slices are useful to extract information from sequences, but they are also used to change mutable sequences in place
l = list(range(10))
print(l)
l[2:5] = [20, 30]
print("l is {} and len is {}".format(l, len(l)))
del l[5:7]
print(l)
l[3::2] = [11, 22]
print(l)
#l[2:5] = 100 #this is an error as it is not an iterable
l[2:5] = [100]
#when the target of an assignment is a slice the right hand side must be an iterable object, even if it just has one item

In [None]:
#when you use + and * wiht sequences they will create a new sequence of the same type. However both operands must be sequences
#of hte same type
l = [1, 2, 3]
r = [4, 5, 6]
print(l + r)
print(r * 2)
print(l * 5)

#both these operators create a new object and never change their operands

In [None]:
#the best way to initialize a list with a certain number of nested lists is to use list comprehension
board = [['_'] * 3 for i in range(3)] #builds a nested list where each element is a list full of 3 elements each which are the 
#character _
print(board)
board[1][2] = 'X'
print(board)

In [None]:
#incorrect way to initialize a list iwth a certain number of nested lists
weird_board = [['_'] * 3] * 3
print(weird_board)
weird_board[1][2] = '0'
print(weird_board)
#all the 3rd elements in each list cget changed because the way this nested list is made is the outer list is made up of three
#references to the same inner list

In [None]:
#special method that makes += work is __iad__ (inplace addition) however if __iadd__ not implemented thna python calls __add__
#a += b if a implmenets __iadd__ like a mutable sequence like list, bytearray, array.array a will be changed in place. ie 
#effect will be a.extend(b)
#however if a doesnt implement iadd the effect is akin to a = a + b. aka expression a + b is evaluated, new sequence created and then
#a is assigned to the reference of the new object. ie identity of hte object would change. if iadd is implemented the identity
#doesnt change and the change takes inplace otherwise identity is different.
#in general mutable seequences have __iadd__ and += is in place 

In [None]:
#repeated concatenations of immutable sequences is inefficient because the interpreter has to copy the whole target sequence
#to create a new one wiht the new items concatenated rather than adding in new items in place.

In [None]:
#do not put mutable items in tuples

In [None]:
#list.sort sorts a list in place. it returns None to remind us that it just changes the target object and does not create a new list
#this is a python api convention, functions or methods that change an object in place should return None to make it clear to the
#caller that the object itself was changed and no new object was created.

#in cotrast the built in function sorted() creates a new list and returns it. sorted accepts any iterable object as an argument
#including immutable sequences and generators

#both list.sort and sorted take two optional keyword arguments reverse and key. reverse =True if to sort in descending order
#key is a one argument function that will be applied to each item to produce its sorting key, ie key=str.lower to perform
#a case insensitive sort or key=len to sort the strings by character lenght
#Timsort is the algorithm that python uses and it is stable aka it preserves the relative ordering of items that compare equal

In [None]:
fruits = ['grape', 'raspberry', 'apple', 'banana']
sorted(fruits) #sorted alphabetically

In [None]:
fruits #unchanged

In [None]:
sorted(fruits, reverse=True)

In [None]:
sorted(fruits, key=len) #sorts by the lenght of the string

In [None]:
sorted(fruits, key=len, reverse=True) #sorts by the length of the string in descending order

In [None]:
fruits.sort() #sorts in place
fruits

In [None]:
#bisect module of the standard python library has binary search and also bisect.insort funciton which allows us to ensure that 
#sorted seqeuences stay sorted
#two main functions of bisect are bisect.bisect and insort. they use the binary search algorithm to quickly find and insert items
#in any sorted sequence 

#bisect(haystack, needle) does a binary search for needle in haystack. haystack must be a sorted sequence. it locates the position
#where needle can be inserted while maintaining haystack in ascending order. aka items up to that point are <= needle.
#can use result of bisect(haystack, needle) as the index argument to haystack.insert(index, needle). however you can use 
#insort to do both steps faster

import bisect
import sys

HAYSTACK = [1, 4, 5, 6, 8, 12, 15, 20, 21, 23, 23, 26, 29, 30]
NEEDLES = [0, 1, 2, 5, 8, 10, 22, 23, 29, 30, 31]

ROW_FMT = '{0:2d} @ {1:2d}    {2}{0:<2d}'

def demo(bisect_fn):
         for needle in reversed(NEEDLES):
            position = bisect_fn(HAYSTACK, needle)
            offset = position * '   |'
            print(ROW_FMT.format(needle, position, offset))
            
if __name__ == '__main__':
    if sys.argv[-1] == 'left':
        bisect_fn = bisect.bisect_left
    else:
        bisect_fn = bisect.bisect

print('DEMO:', bisect_fn.__name__)
print("haystack ->", ' '.join('%2d' %n for n in HAYSTACK))

In [None]:
#insort(seq, item) inserts item into seq so as to keep seeq in ascending order 
import bisect
import random

SIZE = 7
random.seed(1729)
my_list = []

for i in range(SIZE):
    new_item = random.randrange(SIZE*2)
    bisect.insort(my_list, new_item)
    print('%2d ->' % new_item, my_list)

In [None]:
#the list type is flexible and easy to use but depending on specific requirements there are better options. ie array is more 
#efficient for 10 million floating point values because an array doesnt actually hold full fledged float objects but only the
#packed bytes representing their machine values 
#another time the list wouldnt be correct is if you are constantly adding and removing items form the ends of a list as a FIFO
#or a LIFO data structure. in that case a deque (double ended queue) works better and faster
#if my code does a lot of containment checks ie item in my_collection than consider using a set instead as they are optimized 
#for fast membership checking. however sets arent sequences as they are unordered.

In [None]:
#an array supports all mutable sequence operations (.pop, .insert, .extend) and adds methods for fast loading and saving such
#as .frombytes and .tofile
#a python array is as lean as a c array
#when you instantiate an array you also instantiate the type of tghe elmenets within. Array will only hold that element type

from array import array
from random import random
floats = array('d', (random() for i in range(10**7)))
floats[-1]



In [None]:
#saving with array.tofile is about 7 tiumes faster than writing one float per line in a text file.
#also the size of a binary file vs a text file is about 50% smaller for the same amount of data.

In [None]:
#the arary type does not have an inplace sort method. if there exists a need to sort an array us the sorted functon to rebuild it
#a = array.array(a.typecode, sorted(a))

In [None]:
#memoryview class is a built in class which is a shared memory sequence type that allows us to handle slices of arrays without
#copying bytes.
#memoryview is a generalized numpy array structure in python. allolws us to share memory between datastructures without copying

In [None]:
#memorylview.cast allows us to change the way multiple bytes are read or written as units without moving bits around
#returns another memory view object which ust shares the same memory
numbers = array('h', [-2, -1, 0, 1, 2])
memv = memoryview(numbers)
print(len(memv))
print(memv[0])
print(memv.tolist())
memv_oct = memv.cast('B') #casting elements of v to type code 'B' which is $unsigned char
print(memv_oct.tolist())
memv_oct[5] = 4 #assigning the value 4 to byte offset 5. 
print(memv_oct.tolist())
print(numbers)

In [None]:
#numpy + scipy are used for advanced array and matrix operations.
#numpy implmements multi dimensional homogenous arrays and matrix types
#scipy is a library written on top of numpy that offers many scientific computing algorithms from linear algebra, numerical
#calculus and statistics. scipy leverages widely used C and Fortran codebase from netlib repo.


In [None]:
#.append and .pop methods make a list uasable as a stack or a queu however inserting and removing from the left of a list
#0 index is costly as entire list must be shifted.
#class collections.deque is a thread safe double ended queue designed for fast inserting and removing from both ends
from collections import deque
dq = deque(range(10), maxlen=10)
print(dq)
dq.rotate(3)
print(dq)
dq.rotate(-4)
print(dq)
dq.appendleft(-1)
print(dq)
dq.extend([11, 22, 33])
print(dq)
dq.append(13)
print(dq)
dq.extendleft([10, 20, 30, 40])
print(dq)

In [None]:
#deque's implmement most of the list methods and add a few specific to its design ie popleft and rotate. however removing items 
#from the middle of a deque are expensive. it is optimized for appending and popping from the ends.
#append and popleft are atomic operations so won't have to use a lock/mutex in multi threaded applications

In [None]:
#other qeueu datatypes are queue.
from queue import Queue
q = Queue(maxsize=3)
print(q.empty())
print(q.qsize())
q.put(70)
print(q.qsize())
print(q.get())
q.put(80)
q.put(90)
print(q.get())

In [2]:
#python sequeences can be thought of as immutable vs mutable or also flat sequences vs container sequences. flat are more compact,
#faster and easier to use but limited to storing atomic data ie nums, ch, bytes. container sequenfces are more flexibble but
#can surprise us when holding mutable objects and also incur more overhead.
#tuples in python have 2 roles, recoreds with unnamed ifelds and also immutable lists