In [1]:
import pandas

In [2]:
import glob

In [None]:
glob.glob()

## making a function is a two-phase project:
1. create the function
2. call the function

In [6]:
# 1. create the function
def print_hello():
    # give your function a useful name, e.g. "print_hello"
    """This function provides me a hello"""
    # use triple quotes to describe what your function does = write documentation for your function
    print("hello")

In [5]:
# 2. call the function
print_hello()

hello


## you can also import functions from a file

In [7]:
from utils import print_goodbye
# this file needs to have a .py extension

In [9]:
print_goodbye()

goodbye


## create more powerful functions (with parameters)

In [10]:
def print_date(year, month, day):
    print(year, month, day)

In [11]:
print_date(2000, 11, 25)

2000 11 25


In [12]:
print_date(11, 1979, 25)

11 1979 25


In [15]:
print_date(month=11, year=1979, day=25)

1979 11 25


In [18]:
def print_date(year, month, day):
    assert month <= 12
    # make sure that the month number is smaller or equal to 12, this is the "pragmetic level of programming"
    assert day <= 31
    print(year, month, day)

In [17]:
print_date(month=34, year=1979, day=25)
# this way I'll create my own error, it's a way of communicating with your future self

AssertionError: 

In [19]:
def return_combined_date(year, month, day):
    assert month <= 12
    assert day <= 31
    return year + month + day
    # this won't give me what I want, see what happens when you call this function

In [21]:
return_combined_date(2000, 11, 2)


2013

In [24]:
def return_combined_date(year, month, day):
    """
    Return ISO 8601
    
    See Wikipedia: https://en.wikipedia.org/wiki/ISO_8601
    
    Parameters
    ----------
    year: int
        the year of interest
    month: int
        the month of interest (!! between 1 and 12 !!)
    day: int
        the day of interest (!! between 1 and 31 !!)
    """
    assert month <= 12
    assert day <= 31
    return str(year) + '-' + str(month) + '-' + str(day)

In [25]:
return_combined_date(2000, 11, 2)

'2000-11-2'

In [None]:
# Fill in the blanks to create a function that takes a single filename as an argument, loads the data in the file named by the argument, and returns the minimum value of column gdpPercap_1972 in that data:
    
import pandas

def min_in_data(___):
    data = ____
    return ____

In [28]:
# first try it without thinkin about a function
filename = '../data/gapminder_gdp_oceania.csv'
data = pandas.read_csv(filename, index_col = "country")
data["gdpPercap_1972"].min()

16046.03728

In [29]:
# if it works, turn it into a function
def min_in_data(filename):
    data = pandas.read_csv(filename, index_col = "country")
    return data["gdpPercap_1972"].min()

In [30]:
min_in_data('../data/gapminder_gdp_oceania.csv')

16046.03728

In [31]:
def min_in_data(filename, column_name):
    data = pandas.read_csv(filename, index_col = "country")
    return data[column_name].min()

In [32]:
min_in_data('../data/gapminder_gdp_oceania.csv', "gdpPercap_1972")

16046.03728

In [33]:
# you can provide a default value for one or more parameters
def min_in_data(filename, column_name = "gdpPercap_1972"):
    data = pandas.read_csv(filename, index_col = "country")
    return data[column_name].min()

In [34]:
# now, python will use "gdpPercap_1972" if you don't provide a column_name parameter, but you have to power to overwrite this default value
min_in_data('../data/gapminder_gdp_oceania.csv')

16046.03728

In [35]:
# also return the country to which the minimal value belongs
def min_in_data(filename, column_name = "gdpPercap_1972"):
    data = pandas.read_csv(filename, index_col = "country")
    return data[column_name].min(), data[column_name].argmin()

In [36]:
min_in_data('../data/gapminder_gdp_oceania.csv')

will be corrected to return the positional minimum in the future.
Use 'series.values.argmin' to get the position of the minimum now.
  after removing the cwd from sys.path.


(16046.03728, 'New Zealand')

In [37]:
# python suggested to use idxmin instead of argmin to return the index to which the minimal value belongs
def min_in_data(filename, column_name = "gdpPercap_1972"):
    """
    Provide minimal value and the country it belons to
    
    Parameters
    ----------
    
    """
    data = pandas.read_csv(filename, index_col = "country")
    return data[column_name].min(), data[column_name].idxmin()

In [38]:
min_in_data('../data/gapminder_gdp_oceania.csv')

(16046.03728, 'New Zealand')

In [39]:
value, country = min_in_data('../data/gapminder_gdp_oceania.csv')

In [40]:
value

16046.03728

In [41]:
country

'New Zealand'