# Core Python 2

The examples in this notebook cover issues commonly encountered by someone new to Python and interested in Data Science.

## String Formatting

For an excellent discussion of string formatting see: https://pyformat.info/

In [1]:
class Data(object):

    def __str__(self):
        return '__str__  was called'

    def __repr__(self):
        return '__repr__ was called'
    
# create instance
d = Data()

# display instance in both str and repr forms
print('{0!s}'.format(d))
print('{0!r}'.format(d))
print()

# use Python 3.6+ f strings to do the same
print(f'{d!s}')
print(f'{d!r}')

__str__  was called
__repr__ was called

__str__  was called
__repr__ was called


In [2]:
# string formatting
s = 'test'
print('{:10}'.format(s))
print('{:>10}'.format(s))
print('{:-^10}'.format(s))

test      
      test
---test---


In [3]:
# string formatting with f strings
print(f'{s:10}')
print(f'{s:>10}')
print(f'{s:-^10}')

test      
      test
---test---


In [4]:
pi = 3.141592653589793
print('%7.5f' % pi) # old style, not recommended
print('{:7.5f}'.format(pi)) # recommended
print(f'{pi:7.5f}') # reommended for Python 3.6 and above

3.14159
3.14159
3.14159


### Recursion Example

In [5]:
import collections.abc

def list_flatten(my_iterable, a=None):
    """Flatten a list/tuple
    """
    
    # idiom for a mutable default argument
    if a is None:
        a = []

    for item in my_iterable:
        if isinstance(item, collections.abc.Iterable):
            list_flatten(item, a)
        else:
            a.append(item)
    return a

my_list = [1,2,(3, 4, (5, 6, (7,))), 8, [9, 10]]
my_flat_list = list_flatten(my_list)
print(f'{my_list} -> {my_flat_list}')

[1, 2, (3, 4, (5, 6, (7,))), 8, [9, 10]] -> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]


## Specifying Key with Max, Sorted

In [6]:
import numpy as np
np.random.seed(seed=28)
a = np.random.randint(-50, 50, 50)
a

array([-49, -45, -28, -18, -47,  37, -38,  14,   1, -27, -38,  19, -42,
       -26,  44,  -7,  42,  38, -43, -49,  28,  -5,  10, -16,   1,  46,
        46, -13,  20,  18, -22, -35, -24, -46, -43, -38,  -6,  26,  47,
       -18,  17, -38,   7,  30,   8,  15, -49,  24, -33,  24])

In [7]:
# get min and max values of the array
min(a), max(a)

(-49, 47)

In [8]:
# get min and max of the absolute values of the array
min(a, key=lambda x: abs(x)), max(a, key=lambda x: abs(x))

(1, -49)

In [9]:
# sort the array by its absolute value
x = sorted(a, key=lambda x: abs(x))
x[:9]

[1, 1, -5, -6, -7, 7, 8, 10, -13]

# Python 3.6 NamedTuple

In [10]:
from typing import NamedTuple

class Point(NamedTuple):
    x: float
    y: float

In [11]:
# note that __slots__ is defined
# this makes for more efficient storage as by defintion a data class may hold lots of data
'__slots__' in dir(Point)

True

In [12]:
a = Point(0, 0)
print(a)

Point(x=0, y=0)


In [13]:
try:
    a.z = 10
except AttributeError as msg:
    print(msg)

'Point' object has no attribute 'z'


In [14]:
import numpy as np
np.random.seed(seed=101)
x = np.random.normal(0, 1, 3)
y = np.random.normal(0, 1, 3)
z = list(zip(np.round(x,2),np.round(y,2)))
z

[(2.71, 0.5), (0.63, 0.65), (0.91, -0.32)]

In [15]:
points = [Point._make(t) for t in z]
points

[Point(x=2.71, y=0.5), Point(x=0.63, y=0.65), Point(x=0.91, y=-0.32)]

In [16]:
points2 = list(map(Point._make, z))
points2

[Point(x=2.71, y=0.5), Point(x=0.63, y=0.65), Point(x=0.91, y=-0.32)]

In [17]:
# list equality checks for equality element by element
# each element is a NamedTuple which defines __equals__
# so this works
points == points2

True

In [18]:
# data is immutable as with namedtuples
try:
    points[0].x = 1.11
except AttributeError as error:
    print(error)

can't set attribute


In [19]:
# but you can "replace" a value (which creates a new instance)
print(points[0])
new_point = points[0]._replace(x=-1.1)
print(points[0])
print(new_point)

Point(x=2.71, y=0.5)
Point(x=2.71, y=0.5)
Point(x=-1.1, y=0.5)


In [20]:
# **kwargs
my_point_dict = {'x':1.1, 'y':2.2}
my_point = Point(**my_point_dict)
my_point

Point(x=1.1, y=2.2)

In [21]:
# create a mutable point with an ordering
class MyPoint():
    def __init__(self, x, y):
        self.x = x
        self.y = y
        
    def __eq__(self, other):
        if isinstance(other, MyPoint):
            return (self.x, self.y) == (other.x, other.y)
        else:
            return False
        
    def __lt__(self, other):
        return (self.x, self.y) < (other.x, other.y)
        
    def __repr__(self):
        return f'MyPoint(x={self.x},y={self.y})'

In [22]:
# note that __slots__ is not defined
'__slots__' in dir(MyPoint)

False

In [23]:
p1 = MyPoint(x=1, y=2)
p2 = MyPoint(x=1, y=2)
print(p1 is p2)
print(p1 == p2)
print(p1 != p2) # __ne__ delgates to not __eq__

False
True
False


In [24]:
p1.y = 3
pts1 = [p1, p2]
pts1

[MyPoint(x=1,y=3), MyPoint(x=1,y=2)]

In [25]:
p1 > p2 # > delegates to not __lt__

True

In [26]:
sorted(pts1) # uses __lt__

[MyPoint(x=1,y=2), MyPoint(x=1,y=3)]

In [27]:
# easier way to create a mutable point class
from dataclasses import dataclass

@dataclass(order=True)
class MyPoint():
    x: float
    y: float

In [28]:
p1 = MyPoint(x=1, y=2)
p2 = MyPoint(x=1, y=2)
print(p1 is p2)
print(p1 == p2)
print(p1 != p2) # __ne__ delgates to not __eq__

False
True
False


In [29]:
p1.y = 3
pts1 = [p1, p2]
pts1

[MyPoint(x=1, y=3), MyPoint(x=1, y=2)]

In [30]:
p1 > p2 # > delegates to not __lt__

True

In [31]:
sorted(pts1) # uses __lt__

[MyPoint(x=1, y=2), MyPoint(x=1, y=3)]

In [32]:
# note that __slots__ is not defined
'__slots__' in dir(MyPoint)

False

In [33]:
p1 = MyPoint(x=1, y=2)
p1.z = 10

In [34]:
# create a mutable point class with fixed fields x and y
from dataclasses import dataclass

@dataclass(order=True)
class MyPoint():
    x: float
    y: float
    
    __slots__ = ['x', 'y']

In [35]:
p1 = MyPoint(x=1, y=2)
try:
    p1.z = 10
except AttributeError as msg:
    print(msg)

'MyPoint' object has no attribute 'z'


## Dict with integer keys

This cannot be done with the dict() constructor, instead use {}

In [36]:
d = {1:11, 2:22, 3:33}
d

{1: 11, 2: 22, 3: 33}

## Dict with lists for value

In [37]:
from collections import defaultdict
d = defaultdict(list)
d["a"] = [1]
d["b"] = [2]
d["a"].append(11)
d["b"].append(22)

In [38]:
'a' in d

True

In [39]:
'a' in d.keys()

True

In [40]:
11 in d['a']

True

In [41]:
d = defaultdict(list)
d["a"].append(1)
d["a"].append(2)

# create a shallow copy
d2 = d.copy()

# modify d["a"]
d["a"].append(101)

# d2["a"] is also modified
print(d)
print(d2)

defaultdict(<class 'list'>, {'a': [1, 2, 101]})
defaultdict(<class 'list'>, {'a': [1, 2, 101]})


In [42]:
# same as above with deepcopy
from copy import deepcopy
d = defaultdict(list)
d["a"].append(1)
d["a"].append(2)

# create a deep copy
d2 = deepcopy(d)

# modify d["a"]
d["a"].append(101)

# d2["a"] is not modified
print(d)
print(d2)

defaultdict(<class 'list'>, {'a': [1, 2, 101]})
defaultdict(<class 'list'>, {'a': [1, 2]})


## Iterator and Iterable

In [43]:
from collections.abc import Iterable, Iterator

# a list is iterable, but it is not an iterator
a = [1, 2, 3]
print(f'isinstance(a, Iterable): {isinstance(a, Iterable)}')
print(f'isinstance(a, Iterator): {isinstance(a, Iterator)}')

isinstance(a, Iterable): True
isinstance(a, Iterator): False


In [44]:
# a list is not an iterator, so it does not have a __next__ method
try:
    next(a)
except TypeError as msg:
    print(msg)

'list' object is not an iterator


In [45]:
# but you can get an iterator from a list
i = iter(a)
print(f'isinstance(i, Iterator): {isinstance(i, Iterator)}')
[next(i) for x in range(len(a))]

isinstance(i, Iterator): True


[1, 2, 3]

In [46]:
# an iterator has a __next__ method
i = iter(range(2,4))
try:
    print(next(i))
except TypeError as msg:
    print(msg)

2


## Closure

A closure is a function that remember values in an enclosing scope.

A closure is sometimes called a function factory, as it creates a specialized function.

In [47]:
# function factory to make addition functions
def make_adder(n):
    def add(x):
        return x + n
    return add

In [48]:
plus_2 = make_adder(2)
plus_9 = make_adder(9)
print(plus_2(100))
print(plus_9(100))

102
109


In [49]:
# closure example 2
def upper(f):
    def wrapped(name):
        return f(name).upper()
    return wrapped

@upper
def my_function(name):
    """My Function Doc String"""
    return name

print(my_function('hello world'))
print(my_function.__doc__)

HELLO WORLD
None


In [50]:
import functools

# closure example 2b
def upper(f):
    @functools.wraps(f)
    def wrapped(name):
        return f(name).upper()
    return wrapped

@upper
def my_function(name):
    """My Function Doc String"""
    return name

print(my_function('hello world'))
print(my_function.__doc__)

HELLO WORLD
My Function Doc String
