# Week 3 : Python Data Science Toolbox (Part 1)

## Writing your own functions

### User-defined functions & Single parameter functions
- Built-in functions  

ex) `str()`  
    
 
- User-defined function  

ex)  

```
def func(value) : ## function header
    new_value = value ** 2  ## function parameter
    print(new_value)
```

```
def func() : ## function header
    new_value = value ** 2  ## function parameter
    return new_value  ## can assign to another variable
```

- Docstrings : documentation for your function
    - Placed in the immediate line after the function header

In [1]:
# Strings
object1 = "data" + "analysis" + "visualization"
object2 = 1 * 3
object3 = "1" * 3
print(object1)
print(object2)
print(object3)

dataanalysisvisualization
3
111


In [2]:
# Built-in functions
x = 4.89
y1 = str(x)
y2 = print(x)
print(type(y1))
print(type(y2))

4.89
<class 'str'>
<class 'NoneType'>


In [4]:
# Writing a simple function
def shout():
    """Print a string with three exclamation marks"""
    shout_word = 'congratulations'+'!!!'
    print(shout_word)

shout()

congratulations!!!


In [5]:
# Single-parameter function
def shout(word):
    """Print a string with three exclamation marks"""
    shout_word = word+'!!!'
    print(shout_word)

shout('congrats')

congrats!!!


In [7]:
# Functions that return single values
def shout(word):
    """Print a string with three exclamation marks"""
    shout_word = word+'!!!'
    return shout_word

yell = shout('congrats')
print(yell)

congrats!!!


### Multiple Parameters and Return Values
- Accept more than 1 parameter :  

ex)  
```
def raise_to_power(val1, val2) :
    '''Raise val1 to the power of val2'''
    new_value = val1 ** val2
    return new_value
```

- Tuples
    - Can contain multiple values
    - Immutable
    - Parentheses
    - Unpack a tuple into several variables
    - Access tuple elements like lists

- Returning multiple values

ex)
```
def raise_to_power(val1, val2) :
    '''Raise val1 to the power of val2 and vice versa'''
    new_value1 = val1 ** val2
    new_value2 = val2 ** val1
    new_tuple = (new_value1, new_value2)
    return new_value
```

In [8]:
# Functions with multiple parameters
def shout(word1, word2):
    shout1 = word1 + '!!!'
    shout2 = word2 + '!!!'
    new_shout = shout1 + shout2
    return new_shout

yell = shout('congratulations','you')
print(yell)

congratulations!!!you!!!


In [9]:
# Tuples and unpacking them
nums = (3, 4, 6)
n1, n2, n3 = nums
even_nums = (2, n2, n3)
print(even_nums)

(2, 4, 6)


In [10]:
# Functions that return multiple values
def shout_all(word1, word2):
    shout1 = word1 + '!!!'
    shout2 = word2 + '!!!'
    shout_words = (shout1, shout2)
    return shout_words

yell1, yell2 = shout_all('congratulations','you')
print(yell1)
print(yell2)

congratulations!!!
you!!!


### Bringing it all together
- Function for analyzing Twitter data

In [11]:
'''
# Import pandas
import pandas as pd

# Import Twitter data as DataFrame: df
df = pd.read_csv('tweets.csv')

# Initialize an empty dictionary: langs_count
langs_count = {}

# Extract column from DataFrame: col
col = df['lang']

# Iterate over lang column in DataFrame
for entry in col:

    # If the language is in langs_count, add 1 
    if entry in langs_count.keys():
        langs_count[entry] += 1
    # Else add the language to langs_count, set the value to 1
    else:
        langs_count[entry] = 1

# Print the populated dictionary
print(langs_count)
'''

"\n# Import pandas\nimport pandas as pd\n\n# Import Twitter data as DataFrame: df\ndf = pd.read_csv('tweets.csv')\n\n# Initialize an empty dictionary: langs_count\nlangs_count = {}\n\n# Extract column from DataFrame: col\ncol = df['lang']\n\n# Iterate over lang column in DataFrame\nfor entry in col:\n\n    # If the language is in langs_count, add 1 \n    if entry in langs_count.keys():\n        langs_count[entry] += 1\n    # Else add the language to langs_count, set the value to 1\n    else:\n        langs_count[entry] = 1\n\n# Print the populated dictionary\nprint(langs_count)\n"

In [12]:
'''
# Define count_entries()
def count_entries(df, col_name):
    """Return a dictionary with counts of 
    occurrences as value for each key."""

    # Initialize an empty dictionary: langs_count
    langs_count = {}
    
    # Extract column from DataFrame: col
    col = df[col_name]
    
    # Iterate over lang column in DataFrame
    for entry in col:

        # If the language is in langs_count, add 1
        if entry in langs_count.keys():
            langs_count[entry] +=1
        # Else add the language to langs_count, set the value to 1
        else:
            langs_count[entry] = 1

    # Return the langs_count dictionary
    return langs_count

# Call count_entries(): result
result = count_entries(tweets_df, 'lang')

# Print the result
print(result)
'''

'\n# Define count_entries()\ndef count_entries(df, col_name):\n    """Return a dictionary with counts of \n    occurrences as value for each key."""\n\n    # Initialize an empty dictionary: langs_count\n    langs_count = {}\n    \n    # Extract column from DataFrame: col\n    col = df[col_name]\n    \n    # Iterate over lang column in DataFrame\n    for entry in col:\n\n        # If the language is in langs_count, add 1\n        if entry in langs_count.keys():\n            langs_count[entry] +=1\n        # Else add the language to langs_count, set the value to 1\n        else:\n            langs_count[entry] = 1\n\n    # Return the langs_count dictionary\n    return langs_count\n\n# Call count_entries(): result\nresult = count_entries(tweets_df, \'lang\')\n\n# Print the result\nprint(result)\n'

## Default arguments, variable-length arguments and scope

### Scope and user-defined functions
- Scope : part of the programe where an object or name may be accessible
    - `global` : defined in the main body of a script
    - `local` : defined within the function -> cannot access outside function
    - `Built-in scope` : names pre-defined
    - First look at local scope, then look at global scope

In [14]:
# Scopes
num = 5

def func1():
    num = 3
    print(num)

def func2():
    global num
    double_num = num * 2
    num = 6
    print(double_num)

func1()
func2()
num

3
10


6

In [15]:
# global
team = "teen titans"
def change_team():
    """Change the value of the global variable team."""
    global team    # Use team in global scope
    team = 'justice league'

print(team)

change_team()

print(team)

teen titans
justice league


In [16]:
# built-ins scope
import builtins
dir(builtins)

['ArithmeticError',
 'AssertionError',
 'AttributeError',
 'BaseException',
 'BlockingIOError',
 'BrokenPipeError',
 'BufferError',
 'ChildProcessError',
 'ConnectionAbortedError',
 'ConnectionError',
 'ConnectionRefusedError',
 'ConnectionResetError',
 'EOFError',
 'Ellipsis',
 'EnvironmentError',
 'Exception',
 'False',
 'FileExistsError',
 'FileNotFoundError',
 'FloatingPointError',
 'GeneratorExit',
 'IOError',
 'ImportError',
 'IndentationError',
 'IndexError',
 'InterruptedError',
 'IsADirectoryError',
 'KeyError',
 'KeyboardInterrupt',
 'LookupError',
 'MemoryError',
 'ModuleNotFoundError',
 'NameError',
 'None',
 'NotADirectoryError',
 'NotImplemented',
 'NotImplementedError',
 'OSError',
 'OverflowError',
 'PermissionError',
 'ProcessLookupError',
 'RecursionError',
 'ReferenceError',
 'RuntimeError',
 'StopAsyncIteration',
 'StopIteration',
 'SyntaxError',
 'SystemError',
 'SystemExit',
 'TabError',
 'TimeoutError',
 'True',
 'TypeError',
 'UnboundLocalError',
 'UnicodeDecode

### Nested functions
- Function inside a function
    - Inner -> Outer

ex) 

```
def mod2plus(x1, x2, x3) :
    """Returns the remainder plus 5 of three values"""
    
    def inner(x) :
        """Returns the remainder plus 5 of a value"""
        return x % 2 + 5
    
    return (inner(x1), inner(x2), inner(x3))
```

- Returning functions
```
def raise_val(n) :
    """Returns inner function"""

    def inner(x) :
        """Raise x to the power of n."""
        raised = x ** n
        returns raised
    
    return inner
```

ex)
```
square = raise_val(2)  
cube = raise_val(3)  
print(square(2), cube(2)) # 4, 64
```

- `nonlocal` : for enclosing scope

In [17]:
# Nested functions 1
def three_shouts(word1, word2, word3):
    """Returns a tuple of strings concatenated with '!!!'."""

    def inner(word):
        """Returns a string concatenated with '!!!'."""
        return word + '!!!'

    return (inner(word1), inner(word2), inner(word3))

print(three_shouts('a', 'b', 'c'))

('a!!!', 'b!!!', 'c!!!')


In [18]:
# Nested functions 2
def echo(n):
    """Return the inner_echo function."""

    def inner_echo(word1):
        """Concatenate n copies of word1."""
        echo_word = word1 * n
        return echo_word

    return inner_echo

twice = echo(2)
thrice = echo(3)

print(twice('hello'), thrice('hello'))

hellohello hellohellohello


In [19]:
# nonlocal
def echo_shout(word):
    """Change the value of a nonlocal variable"""    
    echo_word = word*2
    print(echo_word)
    
    def shout():
        """Alter a variable in the enclosing scope"""    
        nonlocal echo_word
        echo_word = echo_word + '!!!'
    
    shout()
    print(echo_word)

echo_shout('hello')

hellohello
hellohello!!!


### Default and flexible arguments
*Default arguments*

ex)
```
def power(number, pow=1) :
    """Raise number to the power of pow."""
    new_value = number ** pow
    return new_value
```

Uses the default (`pow=1`) if `pow` is not defined

*Flexible arguments*

- `*args`

ex)
```
def add_all(*args) :
    """Sum all values in *args together."""
    sum_all = 0

    for num in args:
        sum_all += num
    
    return sum_all
```

- `**kwargs`

ex)
```
def print_all(**kwargs) :
    """Print out key-value pairs in **kwargs"""
    
    for key, value in kwarg.items() :
        print(key + ": " + value)
```

In [20]:
# Functions with one default argument
def shout_echo(word1, echo=1):
    """Concatenate echo copies of word1 and three
     exclamation marks at the end of the string."""

    echo_word = word1*echo
    shout_word = echo_word + '!!!'

    return shout_word

no_echo = shout_echo("Hey")
with_echo = shout_echo("Hey", echo=5)

print(no_echo)
print(with_echo)

Hey!!!
HeyHeyHeyHeyHey!!!


In [21]:
# Functions with multiple default arguments
def shout_echo(word1, echo=1, intense=False):
    """Concatenate echo copies of word1 and three
    exclamation marks at the end of the string."""
    
    echo_word = word1 * echo
    
    if intense is True:
        echo_word_new = echo_word.upper() + '!!!'
    else:
        echo_word_new = echo_word + '!!!'

    return echo_word_new

with_big_echo = shout_echo("Hey", echo=5, intense=True)
big_no_echo = shout_echo("Hey", intense=True)

print(with_big_echo)
print(big_no_echo)

HEYHEYHEYHEYHEY!!!
HEY!!!


In [23]:
# Define gibberish
def gibberish(*args):
    """Concatenate strings in *args together."""

    # Initialize an empty string: hodgepodge
    hodgepodge = ""

    # Concatenate the strings in args
    for word in args:
        hodgepodge += word

    # Return hodgepodge
    return hodgepodge

# Call gibberish() with one string: one_word
one_word = gibberish("luke")

# Call gibberish() with five strings: many_words
many_words = gibberish("luke", "leia", "han", "obi", "darth")

# Print one_word and many_words
print(one_word)
print(many_words)

luke
lukeleiahanobidarth


In [24]:
# Functions with variable-length keyword arguments (**kwargs)
def report_status(**kwargs):
    """Print out the status of a movie character."""

    print("\nBEGIN: REPORT\n")

    for key, value in kwargs.items():
        print(key + ": " + value)

    print("\nEND REPORT")

report_status(name='luke',affiliation='jedi',status='missing')
report_status(name='anakin', affiliation='sith lord', status='deceased')


BEGIN: REPORT

name: luke
affiliation: jedi
status: missing

END REPORT

BEGIN: REPORT

name: anakin
affiliation: sith lord
status: deceased

END REPORT


## Bringing it all together
- Count occurrences for an arbitrary column

In [26]:
'''
def count_entries(df, col_name):
    """Return a dictionary with counts of
    occurrences as value for each key."""

    cols_count = {}
    col = df[col_name]
    
    for entry in col:
        if entry in cols_count.keys():
            cols_count[entry] += 1
        else:
            cols_count[entry] = 1

    return cols_count

result1 = count_entries(tweets_df,'lang')
result2 = count_entries(tweets_df,'source')

# Print result1 and result2
print(result1)
print(result2)
'''

'\ndef count_entries(df, col_name):\n    """Return a dictionary with counts of\n    occurrences as value for each key."""\n\n    cols_count = {}\n    col = df[col_name]\n    \n    for entry in col:\n        if entry in cols_count.keys():\n            cols_count[entry] += 1\n        else:\n            cols_count[entry] = 1\n\n    return cols_count\n\nresult1 = count_entries(tweets_df,\'lang\')\nresult2 = count_entries(tweets_df,\'source\')\n\n# Print result1 and result2\nprint(result1)\nprint(result2)\n'

In [27]:
'''
# Define count_entries()
def count_entries(df, *args):
    """Return a dictionary with counts of
    occurrences as value for each key."""
    
    #Initialize an empty dictionary: cols_count
    cols_count = {}
    
    # Iterate over column names in args
    for col_name in args:
    
        # Extract column from DataFrame: col
        col = df[col_name]
    
        # Iterate over the column in DataFrame
        for entry in col:
    
            # If entry is in cols_count, add 1
            if entry in cols_count.keys():
                cols_count[entry] += 1
    
            # Else add the entry to cols_count, set the value to 1
            else:
                cols_count[entry] = 1

    # Return the cols_count dictionary
    return cols_count

# Call count_entries(): result1
result1 = count_entries(tweets_df, 'lang')

# Call count_entries(): result2
result2 = count_entries(tweets_df, 'lang', 'source')

# Print result1 and result2
print(result1)
print(result2)
'''

'\n# Define count_entries()\ndef count_entries(df, *args):\n    """Return a dictionary with counts of\n    occurrences as value for each key."""\n    \n    #Initialize an empty dictionary: cols_count\n    cols_count = {}\n    \n    # Iterate over column names in args\n    for col_name in args:\n    \n        # Extract column from DataFrame: col\n        col = df[col_name]\n    \n        # Iterate over the column in DataFrame\n        for entry in col:\n    \n            # If entry is in cols_count, add 1\n            if entry in cols_count.keys():\n                cols_count[entry] += 1\n    \n            # Else add the entry to cols_count, set the value to 1\n            else:\n                cols_count[entry] = 1\n\n    # Return the cols_count dictionary\n    return cols_count\n\n# Call count_entries(): result1\nresult1 = count_entries(tweets_df, \'lang\')\n\n# Call count_entries(): result2\nresult2 = count_entries(tweets_df, \'lang\', \'source\')\n\n# Print result1 and result2\npri

## Lambda functions and error-handling

### Lambda functions
```
raise_to_power = lambda x, y: x**y
```

- `map(func, seq)` : applies the function to ALL elements in the sequence
    - Here, using the lambda function may be useful
- `filter(func, seq)` :  filter out elements from a list that don't satisfy certain criteria
- `reduce(func, seq)` : `from functools import reduce`
    - `reduce(집계 함수, 순회 가능한 데이터[, 초기값])`

In [28]:
# lambda functions
add_bangs = lambda a: a + '!!!'
add_bangs('hello')

'hello!!!'

In [29]:
# Writing lambda functions
echo_word = lambda word1, echo: word1 * echo
result = echo_word('hey', 5)
print(result)

heyheyheyheyhey


In [31]:
# map() -> returns as 'map' object
spells = ["protego", "accio", "expecto patronum", "legilimens"]
shout_spells = map(lambda item : item + '!!!', spells)
print(type(shout_spells))

shout_spells_list = list(shout_spells)
print(shout_spells_list)

<class 'map'>
['protego!!!', 'accio!!!', 'expecto patronum!!!', 'legilimens!!!']


In [32]:
# filter() -> returns as 'filter' object
fellowship = ['frodo', 'samwise', 'merry', 'pippin', 'aragorn', 'boromir', 'legolas', 'gimli', 'gandalf']
result = filter(lambda a: len(a) > 6, fellowship)
print(type(result))

result_list = list(result)
print(result_list)

<class 'filter'>
['samwise', 'aragorn', 'boromir', 'legolas', 'gandalf']


In [33]:
# reduce()
from functools import reduce

stark = ['robb', 'sansa', 'arya', 'brandon', 'rickon']
result = reduce(lambda item1, item2: item1 + item2 , stark)
print(result)

robbsansaaryabrandonrickon


### Introduction to error handling
- Exceptions - caught during execution
- Catch execptions with `try-except` clause
    - Run the code following `try`
    - If there is `exception`, run the code following except
- Raise an error
    - `raise`

In [34]:
# errors
len(525600)

TypeError: object of type 'int' has no len()

In [35]:
# Error handling with try-except
def shout_echo(word1, echo=1):
    """Concatenate echo copies of word1 and three
    exclamation marks at the end of the string."""

    echo_word = ""
    shout_words = ""

    # Add exception handling with try-except
    try:
        echo_word = word1 * echo
        shout_words = echo_word + '!!!'
    except:
        print("word1 must be a string and echo must be an integer.")

    return shout_words

shout_echo("particle", echo="accelerator")

word1 must be a string and echo must be an integer.


''

In [40]:
# Error handling by raising an error
def shout_echo(word1, echo=1):
    """Concatenate echo copies of word1 and three
    exclamation marks at the end of the string."""

    if echo < 0:
        raise ValueError('echo must be greater than or equal to 0')

    echo_word = word1 * echo
    shout_word = echo_word + '!!!'

    return shout_word

print(shout_echo("particle", echo=5))
print(shout_echo("hello", echo=-1))

particleparticleparticleparticleparticle!!!


ValueError: echo must be greater than or equal to 0

### Bringing it all together

In [41]:
'''
result = filter(lambda x : x[0:2]=='RT', tweets_df['text'])
res_list = list(result)

print(res_list)
'''

"\nresult = filter(lambda x : x[0:2]=='RT', tweets_df['text'])\nres_list = list(result)\n\nprint(res_list)\n"

In [None]:
'''
# Define count_entries()
def count_entries(df, col_name='lang'):
    """Return a dictionary with counts of
    occurrences as value for each key."""

    # Initialize an empty dictionary: cols_count
    cols_count = {}

    # Add try block
    try:
        # Extract column from DataFrame: col
        col = df[col_name]
        
        # Iterate over the column in DataFrame
        for entry in col:
    
            # If entry is in cols_count, add 1
            if entry in cols_count.keys():
                cols_count[entry] += 1
            # Else add the entry to cols_count, set the value to 1
            else:
                cols_count[entry] = 1
    
        # Return the cols_count dictionary
        return cols_count

    # Add except block
    except:
        print('The DataFrame does not have a ' + col_name + ' column.')

# Call count_entries(): result1
result1 = count_entries(tweets_df, 'lang')

# Print result1
print(result1)
'''

In [None]:
'''
# Define count_entries()
def count_entries(df, col_name='lang'):
    """Return a dictionary with counts of
    occurrences as value for each key."""
    
    # Raise a ValueError if col_name is NOT in DataFrame
    if col_name not in df.columns:
        raise ValueError('The DataFrame does not have a ' + col_name + ' column.')

    # Initialize an empty dictionary: cols_count
    cols_count = {}
    
    # Extract column from DataFrame: col
    col = df[col_name]
    
    # Iterate over the column in DataFrame
    for entry in col:

        # If entry is in cols_count, add 1
        if entry in cols_count.keys():
            cols_count[entry] += 1
            # Else add the entry to cols_count, set the value to 1
        else:
            cols_count[entry] = 1
        
        # Return the cols_count dictionary
    return cols_count

# Call count_entries(): result1
result1 = count_entries(tweets_df)

# Print result1
print(result1)
'''