# Python functions are objects themselves

You can reference python functions as objects

In [None]:
states = ['   Alabama ', 'Georgia!', 'Georgia', 'georgia', 
          'FlOrIda', 'south  carolina##', 'West virginia?']

In [None]:
import re  # package for regular expressions

# here is a function that applies a series of operations to clean up the strings

def clean_strings1(strings):
    result = []
    for value in strings:
        value = value.strip()  # strip whitespace
        value = re.sub('[!#?]', '', value)  # substitutes the characters !, #, ? with ''
        value = value.title()  # title case
        result.append(value)
    return result

In [None]:
clean_strings1(states)  # when we apply the function to the list, it cleans up the messy text

In [None]:
# we define a new function called remove_punctuation
def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

In [None]:
# this is a list of functions 
clean_ops = [str.strip, remove_punctuation, str.title]

In [None]:
# just to demonstrate what these functions do...
str.strip('    alabama    ')

In [None]:
# the function clean strings takes two arguments:
# a list of strings
# a list of functions
def clean_strings2(strings, ops):
    result = []
    for value in strings:            # we loop over each string
        for function in ops:         # for each string, we loop over the functions listed in ops
            value = function(value)  # we update the value each time
        result.append(value)         # we append the list results with the value
    return result

In [None]:
clean_strings2(states, clean_ops)

In [None]:
clean_strings2(states, [str.strip, remove_punctuation, str.upper, lambda x: re.sub('  ',' ', x)])  
# I can provide a different list of functions

In [None]:
# the python function map() takes in an function name as an argument and applies it to a list

map(str.strip, states)  # map returns a map object

In [None]:
# to see the contents of the map object, you can put it into a list:
# map only allows you to specify one function
list(map(str.strip, states))

# lambda functions

In one of the later examples, I created a lambda function

A lambda function allows you to create and use a new short function without having to formally define it.

In [None]:
import re
states = ['   Alabama ', 'Georgia!', 'Georgia', 'georgia', 
          'FlOrIda', 'south  carolina##', 'West virginia?']

In [None]:
# I could define a function that replaces  two spaces with one space:
def replace_space(x):
    return(re.sub('  ', ' ', x))

In [None]:
# and then apply it to the strings:
list(map(replace_space, states))

In [None]:
# however, because the code for the function is so short, it might be easier to just create
# a quick function without a formal name. These 'anonymous' functions are also known as lambda functions

list(map(lambda x: re.sub('  ',' ', x), states))

In [None]:
list(map(lambda string: string.title(), states))

In [None]:
# a slightly more complex example

list(map(lambda x: re.sub('[?#!]','', x.title().strip()) , states))

lambda functions are written in the form:

`lambda argument1, argument2, etc: expression to return`

In [None]:
# lambda functions can accept multiple arguments
# if you use it with map, you'll need to provide a list for each argument
list(map(lambda x, y: x + y, [1,2,3], [100,200,300]))

lambda functions are written in the form:

`lambda argument1, argument2, etc: expression to return`

In [None]:
# lambda functions can accept multiple arguments
# if you use it with map, you'll need to provide a list for each argument
list(map(lambda x, y: x + y, [1,2,3], [100,200,300]))

# Linear Algebra with NumPy

In [None]:
import numpy as np

In [None]:
x = np.array([[1,2],[3,4]])
print(x)

In [None]:
y = np.arange(1,5).reshape(2,2)
print(y)

In [None]:
x * x  # asterisk does elementwise multiplication (similar to R)

In [None]:
x @ x # @ sign does matrix multiplication, equivalent to R's %*%

In [None]:
np.dot(x, x)  # matrix multiplication can also be done via np.dot()

In [None]:
x @ x.T

## simple linear regression example

If we want to estimate the coefficients of a linear regression fit 

$$\hat{y} = \beta_0 + \beta_1 x$$

This can be achieved via linear algebra.

We present x as a matrix: one row for each observation, and a column of 1s to go with $\beta_0$ and the next column consists of values of x.

Y is a column matrix of values.

The coefficient estimates that minimize the sum of squares for linear regression is

$$\hat{\beta} = (x^Tx)^{-1} x^T y$$

In [None]:
x = np.array([[1,1,1,1],[1,2,3,4]]).T
y = np.array([2,6,4,8]).reshape(4,1)

In [None]:
x

In [None]:
y

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.scatter(x[:,1],y)
plt.show

The coefficient estimates that minimize the sum of squares for linear regression is

$$\hat{\beta} = (x^Tx)^{-1} x^T y$$

In [None]:
np.linalg.inv(x.T @ x) @ x.T @ y

(matches the results from R)

## other linear algebra functions

In [None]:
xtx = x.T @ x
print(xtx)

In [None]:
np.linalg.inv(xtx)

In [None]:
xtx @ np.linalg.inv(xtx)

In [None]:
a = np.linalg.cholesky(xtx)  # cholesky decomposition of a square matrix 
# produces a lower triangular matrix, that when multiplied by its transpose produces the orignal
print(a)

In [None]:
a @ a.T  # recreate the original matrix

In [None]:
q,r = np.linalg.qr(xtx)  # qr decomposition

In [None]:
q # q is orthogonal, shown later

In [None]:
r # r is upper triangular

In [None]:
q @ r  #q times r is the original matrix

In [None]:
q @ q.T  # q is orthogonal, so q times its transpose gives the identity matrix

In [None]:
val, vec = np.linalg.eig(xtx)  # eigen values and eigen vectors of the matrix

In [None]:
print(val)

In [None]:
print(vec)

In [None]:
xtx @ vec[:,0]  # the matrix times its eigen vector produces a vector, that is 

In [None]:
vec[:,0] * val[0]  # equivalent to the eigenvector multiplied by a scalar