# Setup and get data

In [10]:
import pandas as pd
import numpy as np
import sklearn
from sklearn import datasets

iris = sklearn.datasets.load_iris()
# convert to pandas df
iris = pd.DataFrame(np.concatenate((iris.data, np.array([iris.target]).T), axis=1), 
                    columns=iris.feature_names + ['target'])
# clean col names
iris.columns = [c.replace(' ', '_') for c in iris.columns]
iris.rename(columns={'sepal_length_(cm)': 'sepal_length', 
                     'sepal_width_(cm)': 'sepal_width', 
                     'petal_length_(cm)':  'petal_length',
                     'petal_width_(cm)': 'petal_width'}, inplace=True)

## Methods and functions sometimes change object, sometimes don't

Calling a function does not change the object:

In [32]:
letters=list('liz')
sorted(letters) #this version of sorting has to be saved as a result
print(letters)

['l', 'i', 'z', 'z']


# Write function

In [52]:
def add_three(input):
    return input + 3

In [53]:
add_three(5)

8

`input` is the **parameter**, `5` is the **argument**.

### View properties of a function:

In [48]:
dir(add_three)[0:5]

['__annotations__', '__call__', '__class__', '__closure__', '__code__']

# generic functions 
use diff methods on diff classes (_function overloading_)

`print()` uses one method for factor class:

In [None]:
# this is in R

print(f)
#> [1] a b c
#> Levels: a b c

different method for integer class (i.e., for an factor stripped of its class):
print(unclass(f))
#> [1] 1 2 3

The class-specific methods are named `generic`.`class()` (eg, `print.factor()`)

Methods have all the arguments of the generic, plus unique ones
- print.Date adds max, print.factor adds levels

generics find the right method for the class, via “method dispatch”
- performed by UseMethod(), which every generic calls

What methods avail for a generic?
`methods(“print”)`
What methods avail for a class?
`methods(class = “factor”)`


# Argument passing

### Positional/required and default arguments

In [72]:
def print_cost(qty=5, item='eggs', price=15):
    print(qty, item, f'cost ${price:.2f}') # without f string
    print(f'{qty} {item} cost ${price:.2f}') # f string: another way to display var values


In [73]:
print_cost()

5 eggs cost $15.00
5 eggs cost $15.00


### Variables as arguments

Can pass **by value** : for ummutable objects: function cannot modify object in the calling environment.

In [87]:
def fxn(x):
    print("input was", x, ", identifier", id(x))
    x = 10
    print("new value is", x, ", identifier", id(x))
    

In [95]:
x = 5
fxn(x)
print(x)
print("Identifier is different: new object was only created inside the function, and global x is unchanged.")

input was 5 , identifier 4459145552
new value is 10 , identifier 4459145712
5
Identifier is different: new object was only created inside the function, and global x is unchanged.


The variable was bound to an immutable object: an integer.

Another example:

In [42]:
def add_three_assign(x):
    print("input was", x)
    x = x + 3
    print("new value is", x)

`x` variable is printed as my_var + 3:

In [43]:
my_var = 6
add_three_assign(my_var)

input was 6
new value is 9


but `my_var` is unchanged: 

In [44]:
my_var

6

To change immutables, assign to a new variable outside the calling environment:

In [2]:
def add_three(input):
    return input + 3

In [9]:
my_var = 6
my_var = add_three(my_var)
my_var

9

**Pass by reference**: can modify

You CAN change the parameter when it's mutable:

In [85]:
def add_three_to_first(x):
    print("input was", x, ", identifier", id(x))
    x[0] = x[0] + 3
    print("new value is", x, ", identifier", id(x))

In [86]:
list = [1,2,3]
add_three_to_first(list)
print("The identifier is the same. So `list` was modified.")

input was [1, 2, 3] , identifier 4498282888
new value is [4, 2, 3] , identifier 4498282888
The identifier is the same. So `list` was modified.


This is considered a **side effect**: modifying its calling environment, like reassigning a parameter value.
<br>No `return` is needed.

# `*args` and `**kwargs`


variable-length arguments: let functions take an arbitrary number of keyword arguments ("kwargs" means "keyword arguments")


`*args`

In [69]:
def myFun(normal_arg, *arglist):
    print("first normal arg:", normal_arg)
    for arg in arglist:
        print("another arg through *arglist:", arg)

myFun('1', 3, 'hi')

first normal arg: 1
another arg through *arglist: 3
another arg through *arglist: hi


`*` "unpacks" the passed tuple (NOT a list).

`**kwargs`

In [71]:
def myFun(**kwargs):
    for key, value in kwargs.items():
        print ((key, value))
 
myFun(first ='li', mid ='re', now = 5)   

('first', 'li')
('mid', 're')
('now', 5)


Why?
<br>https://stackoverflow.com/questions/1769403/what-is-the-purpose-and-use-of-kwargs
<br>because ** unpacks dictionaries:


the `myFun(first ='li', mid ='re', now = 5)` above
<br> is the same as

In [74]:
args = {'first': 'li', 'mid': 're', 'now': 5}
myFun(**args)

('first', 'li')
('mid', 're')
('now', 5)


# Docstring

In [68]:
%run -i functions.py

In functions.py:

In [39]:
def add_two(number):
    """one-line summary: add 2 to any number!

    further elaboration would go here
    this is how addition works
    """
    
    return number+2

When view properties of add_two, ['__doc__'] dunder means it has a docstring:

In [69]:
dir(add_two)[5:10]

['__defaults__', '__delattr__', '__dict__', '__dir__', '__doc__']

In [70]:
print(add_two.__doc__)

one-line summary: add 2 to any number!

    further elaboration would go here
    this is how addition works
    


In [71]:
help(add_two)

Help on function add_two in module __main__:

add_two(number)
    one-line summary: add 2 to any number!
    
    further elaboration would go here
    this is how addition works



# lambda functions

- single use, single line
- named or anonymous

### **General form**

`lambda arguments: expression`

In [1]:
lambda input: input + 2

<function __main__.<lambda>(input)>

### **WithOUT lambda:** long but get a reusable get_first function

In [24]:
dict = [(37,1,3),(2,5,7),(10,2,14)]

In [27]:
def get_first(x):  # return 1st element of a list or tuple
    return x[0]
dict.sort(key = get_first) # explicit, but hard to read: have to look elsewhere to know what get_first is
dict

[(2, 5, 7), (10, 2, 14), (37, 1, 3)]

### **With lambda:** short but no reusable function

basically don't need **def** or **return**

In [26]:
dict.sort(key = lambda x: x[0])
dict

[(2, 5, 7), (10, 2, 14), (37, 1, 3)]

### **Named lambda**

In [28]:
get_first = lambda x: x[0]
dict.sort(key = get_first)
dict

[(2, 5, 7), (10, 2, 14), (37, 1, 3)]

### Use inside other functions like **apply**, **map**

In [36]:
def size(sepal_length):
    if sepal_length > 5:
        return "big"
    
iris['size'] = iris.apply(lambda x: size(x['sepal_length']),axis=1)
iris[0:3]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target,size
0,5.1,3.5,1.4,0.2,0.0,big
1,4.9,3.0,1.4,0.2,0.0,
2,4.7,3.2,1.3,0.2,0.0,


Multiple conditions:

In [33]:
def size(sepal_length,sepal_width):
    if sepal_length > 4 and sepal_width > 3:
        return "big"
    else:
        return "small"
    
iris['size'] = iris.apply(lambda x: size(x['sepal_length'],x['sepal_width']),axis=1)
iris[0:3]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target,size
0,5.1,3.5,1.4,0.2,0.0,big
1,4.9,3.0,1.4,0.2,0.0,small
2,4.7,3.2,1.3,0.2,0.0,big


# try

In [None]:
import logging
logger = logging.getLogger('Training')
#logger.setLevel(LEVELS['DEBUG'])


In [None]:
 try:
            logger.info(f'Initializing the configuration for {kwargs["workspace"]} feature retrieval...')
        except Exception as e:
            #error
            logger.info(e)
            raise e

In [None]:
class Feature():
    def __init__(self, **kwargs):

         try:
            if not kwargs['num_months']:
                num_months = params['range']['num_months']
                if num_months < MIN_MONTHS | num_months > MAX_MONTHS:
                    raise RuntimeError(f'Invalid num_months specified {num_months} in config, the value'
                                        'should be between {MIN_MONTHS} & {MAX_MONTHS}')
            else:
                num_months = kwargs['num_months']
                params['range']['num_months'] = kwargs['num_months']

        except Exception as e:
            #error
            logger.exception(f'Error occurred :- {e}')
            raise e

# Decorators

https://towardsdatascience.com/five-advanced-python-features-169c96682350
    
allow us to modify the behavior of a function without explicitly modifying our function.