# Setup and get data

In [10]:
import pandas as pd
import numpy as np
import sklearn
from sklearn import datasets

iris = sklearn.datasets.load_iris()
# convert to pandas df
iris = pd.DataFrame(np.concatenate((iris.data, np.array([iris.target]).T), axis=1), 
                    columns=iris.feature_names + ['target'])
# clean col names
iris.columns = [c.replace(' ', '_') for c in iris.columns]
iris.rename(columns={'sepal_length_(cm)': 'sepal_length', 
                     'sepal_width_(cm)': 'sepal_width', 
                     'petal_length_(cm)':  'petal_length',
                     'petal_width_(cm)': 'petal_width'}, inplace=True)

## Methods and functions sometimes change object, sometimes don't

Calling a function does not change the object:

In [32]:
letters=list('liz')
sorted(letters) #this version of sorting has to be saved as a result
print(letters)

['l', 'i', 'z', 'z']


# Write function

In [52]:
def add_three(input):
    return input + 3

In [53]:
add_three(5)

8

`input` is the **parameter**, `5` is the **argument**.

### View properties of a function:

In [48]:
dir(add_three)[0:5]

['__annotations__', '__call__', '__class__', '__closure__', '__code__']

# generic functions 
use diff methods on diff classes (_function overloading_)

`print()` uses one method for factor class:

In [None]:
# this is in R

print(f)
#> [1] a b c
#> Levels: a b c

different method for integer class (i.e., for an factor stripped of its class):
print(unclass(f))
#> [1] 1 2 3

The class-specific methods are named `generic`.`class()` (eg, `print.factor()`)

Methods have all the arguments of the generic, plus unique ones
- print.Date adds max, print.factor adds levels

generics find the right method for the class, via “method dispatch”
- performed by UseMethod(), which every generic calls

What methods avail for a generic?
`methods(“print”)`
What methods avail for a class?
`methods(class = “factor”)`


# Argument passing

### Positional/required and default arguments

In [72]:
def print_cost(qty=5, item='eggs', price=15):
    print(qty, item, f'cost ${price:.2f}') # without f string
    print(f'{qty} {item} cost ${price:.2f}') # f string: another way to display var values


In [73]:
print_cost()

5 eggs cost $15.00
5 eggs cost $15.00


### Variables as arguments

Can pass **by value** : for ummutable objects: function cannot modify object in the calling environment.

In [87]:
def fxn(x):
    print("input was", x, ", identifier", id(x))
    x = 10
    print("new value is", x, ", identifier", id(x))
    

In [95]:
x = 5
fxn(x)
print(x)
print("Identifier is different: new object was only created inside the function, and global x is unchanged.")

input was 5 , identifier 4459145552
new value is 10 , identifier 4459145712
5
Identifier is different: new object was only created inside the function, and global x is unchanged.


The variable was bound to an immutable object: an integer.

Another example:

In [42]:
def add_three_assign(x):
    print("input was", x)
    x = x + 3
    print("new value is", x)

`x` variable is printed as my_var + 3:

In [43]:
my_var = 6
add_three_assign(my_var)

input was 6
new value is 9


but `my_var` is unchanged: 

In [44]:
my_var

6

To change immutables, assign to a new variable outside the calling environment:

In [2]:
def add_three(input):
    return input + 3

In [9]:
my_var = 6
my_var = add_three(my_var)
my_var

9

**Pass by reference**: can modify

You CAN change the parameter when it's mutable:

In [85]:
def add_three_to_first(x):
    print("input was", x, ", identifier", id(x))
    x[0] = x[0] + 3
    print("new value is", x, ", identifier", id(x))

In [86]:
list = [1,2,3]
add_three_to_first(list)
print("The identifier is the same. So `list` was modified.")

input was [1, 2, 3] , identifier 4498282888
new value is [4, 2, 3] , identifier 4498282888
The identifier is the same. So `list` was modified.


This is considered a **side effect**: modifying its calling environment, like reassigning a parameter value.
<br>No `return` is needed.

# Docstring

In [68]:
%run -i functions.py

In functions.py:

In [39]:
def add_two(number):
    """one-line summary: add 2 to any number!

    further elaboration would go here
    this is how addition works
    """
    
    return number+2

When view properties of add_two, ['__doc__'] dunder means it has a docstring:

In [69]:
dir(add_two)[5:10]

['__defaults__', '__delattr__', '__dict__', '__dir__', '__doc__']

In [70]:
print(add_two.__doc__)

one-line summary: add 2 to any number!

    further elaboration would go here
    this is how addition works
    


In [71]:
help(add_two)

Help on function add_two in module __main__:

add_two(number)
    one-line summary: add 2 to any number!
    
    further elaboration would go here
    this is how addition works



# lambda functions

- single use, single line
- named or anonymous

### **General form**

`lambda arguments: expression`

In [1]:
lambda input: input + 2

<function __main__.<lambda>(input)>

### **WithOUT lambda:** long but get a reusable get_first function

In [24]:
dict = [(37,1,3),(2,5,7),(10,2,14)]

In [27]:
def get_first(x):  # return 1st element of a list or tuple
    return x[0]
dict.sort(key = get_first) # explicit, but hard to read: have to look elsewhere to know what get_first is
dict

[(2, 5, 7), (10, 2, 14), (37, 1, 3)]

### **With lambda:** short but no reusable function

basically don't need **def** or **return**

In [26]:
dict.sort(key = lambda x: x[0])
dict

[(2, 5, 7), (10, 2, 14), (37, 1, 3)]

### **Named lambda**

In [28]:
get_first = lambda x: x[0]
dict.sort(key = get_first)
dict

[(2, 5, 7), (10, 2, 14), (37, 1, 3)]

### Use inside other functions like **apply**, **map**

In [36]:
def size(sepal_length):
    if sepal_length > 5:
        return "big"
    
iris['size'] = iris.apply(lambda x: size(x['sepal_length']),axis=1)
iris[0:3]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target,size
0,5.1,3.5,1.4,0.2,0.0,big
1,4.9,3.0,1.4,0.2,0.0,
2,4.7,3.2,1.3,0.2,0.0,


Multiple conditions:

In [33]:
def size(sepal_length,sepal_width):
    if sepal_length > 4 and sepal_width > 3:
        return "big"
    else:
        return "small"
    
iris['size'] = iris.apply(lambda x: size(x['sepal_length'],x['sepal_width']),axis=1)
iris[0:3]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target,size
0,5.1,3.5,1.4,0.2,0.0,big
1,4.9,3.0,1.4,0.2,0.0,small
2,4.7,3.2,1.3,0.2,0.0,big


# try

In [None]:
import logging
logger = logging.getLogger('Training')
#logger.setLevel(LEVELS['DEBUG'])


In [None]:
 try:
            logger.info(f'Initializing the configuration for {kwargs["workspace"]} feature retrieval...')
        except Exception as e:
            #error
            logger.info(e)
            raise e

In [None]:
class Feature():
    def __init__(self, **kwargs):

         try:
            if not kwargs['num_months']:
                num_months = params['range']['num_months']
                if num_months < MIN_MONTHS | num_months > MAX_MONTHS:
                    raise RuntimeError(f'Invalid num_months specified {num_months} in config, the value'
                                        'should be between {MIN_MONTHS} & {MAX_MONTHS}')
            else:
                num_months = kwargs['num_months']
                params['range']['num_months'] = kwargs['num_months']

        except Exception as e:
            #error
            logger.exception(f'Error occurred :- {e}')
            raise e

# `*args` and `**kwargs`


variable-length arguments: let functions take an arbitrary number of keyword arguments ("kwargs" means "keyword arguments")


### `*args`

In [69]:
def myFun(normal_arg, *arglist):
    print("first normal arg:", normal_arg)
    for arg in arglist:
        print("another arg through *arglist:", arg)

myFun('1', 3, 'hi')

first normal arg: 1
another arg through *arglist: 3
another arg through *arglist: hi


`*` "unpacks" the passed tuple (NOT a list).

### `**kwargs`

In [134]:
def myFun(**kwargs):
    for key, value in kwargs.items():
        print ((key, value))
 
myFun(first = 'one', second = 'two', third = 'three')   

('first', 'one')
('second', 'two')
('third', 'three')


[because ** unpacks dictionaries](https://stackoverflow.com/questions/1769403/what-is-the-purpose-and-use-of-kwargs):


In [136]:
args_dict = {'first': 'one', 'second': 'two', 'third': 'three'}
myFun(**args_dict)

('first', 'one')
('second', 'two')
('third', 'three')


# Decorators

modify the behavior of a function without explicitly modifying our function.

In [None]:
import timeit 
import datetime

### Write and call a simple wrapper function 
(without decorating)

In [133]:
def log_timer_simple(func):
    def wrapper(): 
        print("start")
        start = datetime.datetime.now()
        func()
        runtime = datetime.datetime.now() - start
        print("end. It took", runtime, "seconds.")
        
    wrapper()

def main_fxn():
    print("main function running")

log_timer_simple(main_fxn)

start
main function running
end. It took 0:00:00.000023 seconds.


### Use wrapper as decorator with `@`

In [118]:
@log_timer_simple
def main_fxn():
    print("main function running")

main_fxn()

start
main function running
end.


# Problem: `wrapper()` doesn't work with *args

### `return wrapper` and calling `wrapper()` both work:

In [157]:
def main_fxn():
    print("main function running")

In [159]:
def log_wrapper_returnwrapper(func):
    def wrapper(): 
        print("start")
        func()
        print("end.")
        
    return wrapper

log_wrapper_returnwrapper(main_fxn)()

start
main function running
end.


In [165]:
@log_wrapper_returnwrapper
def main_fxn():
    print("main function running")
main_fxn()

start
main function running
end.


In [160]:
def log_wrapper_callwrapper(func):
    def wrapper(): 
        print("start")
        func()
        print("end.")
        
    wrapper()

log_wrapper_callwrapper(main_fxn)

start
main function running
end.


In [161]:
@log_wrapper_callwrapper
def main_fxn():
    print("main function running")


start
main function running
end.


### But only the `return` version works with `*args`:

In [None]:
def log_wrapper_returnwrapper(func):
    def wrapper(*args, **kwargs): 
        print("start")
        func(*args, **kwargs)
        print("end.")
        
    return wrapper

@log_wrapper_returnwrapper
def new_fxn(statement):
    print(statement)

new_fxn(statement = "print this")

`wrapper()` doesn't work:

In [191]:
def log_wrapper_callwrapper(func):
    def wrapper(*args, **kwargs): 
        print("start")
        func(*args, **kwargs)
        print("end.")
        
    wrapper()

@log_wrapper_callwrapper
def new_fxn(statement):
    print(statement)

new_fxn(statement = "print this")

start


TypeError: new_fxn() missing 1 required positional argument: 'statement'

### decorator / wrapper function with arguments

In [None]:
def log_timer(func, notation, roundto = 4):
    """add a log: start and end messages, with runtime.
    
    notation can be "datetime" (as timestamp) or "timeit" (as # seconds).
    If "timeit", can use "roundto" for # decimal places, or leave as default 4.
    
    """
    def wrapper(roundto): 
    

        print("start")
        
        if notation == "datetime":
            start = datetime.datetime.now()
        elif notation == "timeit":
            start = timeit.default_timer()
        else: 
            raise Exception("notation has to be `datetime` or `timeit`.")
            
        func()

        if notation == "datetime":
            runtime = datetime.datetime.now() - start
        elif notation == "timeit":
            stop = timeit.default_timer()
            runtime = f"{round(stop - start, roundto):6f}"
        else: 
            raise Exception("notation parameter has to be either \"datetime\" or \"timeit\".")
        
        
        print("end. It took", runtime, "seconds.")
        
    wrapper(roundto = roundto)

    
log_timer(main_fxn, notation = "timeit")

# Profiling

Run a function lots of times and get average runtime.

### Single line function with `%timeit`

In [193]:
%timeit add_three(4)

75.4 ns ± 0.459 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


### Multi line function with line_profiler

In [217]:
def add_nums(input):
    newnum = input + 3
    for i in range(1,2000):
        print(i)
    return newnum

In [194]:
import line_profiler

In [197]:
%load_ext line_profiler

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


In [218]:
%lprun -f add_nums add_nums(4)
# time in microseconds

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277


In [205]:
add_nums(4)

'hi {} newnum2'