# Week 08: Local/Global Variables

## 0. Import Libraries 

In [1]:
# the "pandas" library is for manipualting datasets
import pandas as pd
import numpy as np
np.random.seed(100)

## 1. Array Continued (Stacking)

<font size='4'>

In [2]:
# 9.1.
x = np.ones((4,))
print(x)
print()
my_array = np.array([1,2,3,4])
my_resized_array=np.resize(my_array,(2,4))
my_2d_array = np.array([[1,2,3,4], [5,6,7,8]], dtype=np.int64)

# Concatentate `my_array` and `x`
print(np.concatenate([my_array, x], axis=0)) 
print()
print(np.stack([my_array, x], axis=0)) # makes a 2d array (along a new axis))
print()
print(np.stack([my_array, x], axis=1))

[1. 1. 1. 1.]

[1. 2. 3. 4. 1. 1. 1. 1.]

[[1. 2. 3. 4.]
 [1. 1. 1. 1.]]

[[1. 1.]
 [2. 1.]
 [3. 1.]
 [4. 1.]]


In [7]:
# 9.2. Stack arrays row-wise (by Python's default)
print(my_array)
print(my_2d_array)
print()
print(np.vstack([my_array, my_2d_array])) 
print()

# Stack arrays horizontally
print(np.hstack([my_resized_array, my_2d_array])) 

# With vstack and hstack, do not need to separately define the axis

[1 2 3 4]
[[1 2 3 4]
 [5 6 7 8]]

[[1 2 3 4]
 [1 2 3 4]
 [5 6 7 8]]

[[1 2 3 4 1 2 3 4]
 [1 2 3 4 5 6 7 8]]


In [11]:
# 9.3.
my_array = np.array([1,2,3,4])
my_resized_array=np.resize(my_array,(2,4))
my_2d_array = np.array([[1,2,3,4], [5,6,7,8]], dtype=np.int64)

my_stacked_array = np.hstack([my_resized_array, my_2d_array])
print(my_stacked_array)
print()
# if the second argument is an integer, it will split the array into an N equal sub-arrays
# Split `my_stacked_array` horizontally into two equal sub-arrays
print(np.split(my_stacked_array, 2, axis=1))
print('or')
print(np.hsplit(my_stacked_array, 2))
# Split `my_stacked_array` vertically into two equal sub-arrays
print(np.split(my_stacked_array, 2, axis = 0))
print('OR')
print(np.vsplit(my_stacked_array, 2))

[[1 2 3 4 1 2 3 4]
 [1 2 3 4 5 6 7 8]]

[array([[1, 2, 3, 4],
       [1, 2, 3, 4]]), array([[1, 2, 3, 4],
       [5, 6, 7, 8]])]
or
[array([[1, 2, 3, 4],
       [1, 2, 3, 4]]), array([[1, 2, 3, 4],
       [5, 6, 7, 8]])]
[array([[1, 2, 3, 4, 1, 2, 3, 4]]), array([[1, 2, 3, 4, 5, 6, 7, 8]])]
OR
[array([[1, 2, 3, 4, 1, 2, 3, 4]]), array([[1, 2, 3, 4, 5, 6, 7, 8]])]


In [12]:
# 9.4. Use indices in the form of a numpy array (allows you to split at a particular location)
print(my_stacked_array)
print(np.split(my_stacked_array, np.array([2,5]), axis=1)) # cuts at index 2 and index 5 (results in three pieces)

[[1 2 3 4 1 2 3 4]
 [1 2 3 4 5 6 7 8]]
[array([[1, 2],
       [1, 2]]), array([[3, 4, 1],
       [3, 4, 5]]), array([[2, 3, 4],
       [6, 7, 8]])]


In [13]:
# 9.5. Let's create a practical example and call back from the blood pressue problem.
# (2, 10, 5) refers to two sites, 10 patients each site, and 5 time points per patient.
BP_3d_arr = np.random.uniform(low=70, high=100, size=(2, 10, 5))
print(BP_3d_arr)
print(BP_3d_arr.shape)

[[[86.30214825 78.35108155 82.73552772 95.34328397 70.14156569]
  [73.64707362 90.12247254 94.77558265 74.10119769 87.25279988]
  [96.73965863 76.27606366 75.55984659 73.25130671 76.59092478]
  [99.35871354 94.35049447 75.15823038 94.48674246 78.22221241]
  [82.95112551 98.20089459 94.52948136 80.0833585  75.26231361]
  [81.18496139 70.17065522 77.5727906  93.86987525 70.45764914]
  [87.96530131 88.11413617 73.15443056 81.45830335 71.0942817 ]
  [96.7123469  99.42762571 71.79825966 96.71637834 87.30704498]
  [92.27439067 88.90551809 87.45526577 70.61317396 76.30079733]
  [86.34054635 93.07345513 77.52085687 78.57687071 95.57185264]]

 [[99.25019481 96.5455988  80.78523532 87.96576838 80.64386835]
  [80.20570646 75.34242969 77.13082626 71.34586847 85.16294289]
  [81.28757363 87.78416203 88.89825627 74.27800943 98.01523898]
  [98.39139642 88.06889973 81.63298841 80.89564012 76.13035831]
  [78.30295184 77.39607644 75.20824005 98.99829083 98.71037801]
  [87.93921053 91.93902259 80.21155669

In [17]:
# 9.5.1 axis takes one number
BP_3d_arr_mean_1 = np.mean(BP_3d_arr, axis=0) # Mean bp for matching patient IDs and time for each site 
#(taking the mean of two values with the same position in the smaller arrays)

print(BP_3d_arr_mean_1)
print(BP_3d_arr_mean_1.shape) 
print()

BP_3d_arr_mean_2 = np.mean(BP_3d_arr, axis=1) #Calculating time-specific bp measure for each site
print(BP_3d_arr_mean_2)
print(BP_3d_arr_mean_2.shape)
print()

BP_3d_arr_mean_3 = np.mean(BP_3d_arr, axis=2) #Calculating mean blood pressure for each patient across all timepoints
print(BP_3d_arr_mean_3)
print(BP_3d_arr_mean_3.shape)

[[92.77617153 87.44834018 81.76038152 91.65452617 75.39271702]
 [76.92639004 82.73245111 85.95320446 72.72353308 86.20787139]
 [89.01361613 82.03011285 82.22905143 73.76465807 87.30308188]
 [98.87505498 91.2096971  78.3956094  87.69119129 77.17628536]
 [80.62703868 87.79848551 84.86886071 89.54082467 86.98634581]
 [84.56208596 81.05483891 78.89217364 83.31577168 77.18129485]
 [86.61313405 80.38397068 79.49774363 90.61152222 76.47267983]
 [88.39012008 96.79557092 82.21436475 88.05418579 88.16407274]
 [89.24326397 83.90466531 80.3894514  74.99619145 80.00508562]
 [88.05437423 85.35059033 83.37694732 77.29028946 92.6502984 ]]
(10, 5)

[[88.34762662 87.69923971 81.02602722 83.8500491  78.82014422]
 [86.66862331 84.04250486 82.48953043 82.07848968 86.68780236]]
(2, 5)

[[82.57472144 83.97982528 79.68356007 88.31527865 86.20543472 78.65118632
  80.35729062 90.39233112 83.10982917 86.21671634]
 [89.03813313 77.83755475 86.05264807 85.0238566  85.72318744 83.3512797
  85.07432955 87.05499459 8

In [18]:
# 9.5.1 axis takes one number (using negative index- counting from the right)
BP_3d_arr_mean_1 = np.mean(BP_3d_arr, axis=-3) # Mean bp for matching patient IDs and time for each site 
#(taking the mean of two values with the same position in the smaller arrays)

print(BP_3d_arr_mean_1)
print(BP_3d_arr_mean_1.shape) 
print()

BP_3d_arr_mean_2 = np.mean(BP_3d_arr, axis=-2) #Calculating time-specific bp measure for each site
print(BP_3d_arr_mean_2)
print(BP_3d_arr_mean_2.shape)
print()

BP_3d_arr_mean_3 = np.mean(BP_3d_arr, axis=-1) #Calculating mean blood pressure for each patient across all timepoints
print(BP_3d_arr_mean_3)
print(BP_3d_arr_mean_3.shape)

[[92.77617153 87.44834018 81.76038152 91.65452617 75.39271702]
 [76.92639004 82.73245111 85.95320446 72.72353308 86.20787139]
 [89.01361613 82.03011285 82.22905143 73.76465807 87.30308188]
 [98.87505498 91.2096971  78.3956094  87.69119129 77.17628536]
 [80.62703868 87.79848551 84.86886071 89.54082467 86.98634581]
 [84.56208596 81.05483891 78.89217364 83.31577168 77.18129485]
 [86.61313405 80.38397068 79.49774363 90.61152222 76.47267983]
 [88.39012008 96.79557092 82.21436475 88.05418579 88.16407274]
 [89.24326397 83.90466531 80.3894514  74.99619145 80.00508562]
 [88.05437423 85.35059033 83.37694732 77.29028946 92.6502984 ]]
(10, 5)

[[88.34762662 87.69923971 81.02602722 83.8500491  78.82014422]
 [86.66862331 84.04250486 82.48953043 82.07848968 86.68780236]]
(2, 5)

[[82.57472144 83.97982528 79.68356007 88.31527865 86.20543472 78.65118632
  80.35729062 90.39233112 83.10982917 86.21671634]
 [89.03813313 77.83755475 86.05264807 85.0238566  85.72318744 83.3512797
  85.07432955 87.05499459 8

In [23]:
# 9.5.2 axis takes two numbers
BP_3d_array_mean_4 = np.mean(BP_3d_arr, axis=(1,2)) # Mean BP for all patients and times for each individual site
print(BP_3d_array_mean_4) 
print(BP_3d_array_mean_4.shape) 

BP_3d_array_mean_5 = np.mean(BP_3d_arr, axis=(0,2))
print(BP_3d_array_mean_5) 
print(BP_3d_array_mean_5.shape) 

BP_3d_array_mean_6 = np.mean(BP_3d_arr, axis=(0,1))
print(BP_3d_array_mean_6) 
print(BP_3d_array_mean_6.shape) 

[83.94861737 84.39339013]
(2,)
[85.80642728 80.90869002 82.86810407 86.66956763 85.96431108 81.00123301
 82.71581008 88.72366285 81.70773155 85.34449995]
(10,)
[87.50812496 85.87087229 81.75777883 82.96426939 82.75397329]
(5,)


In [29]:
# 9.5.3. axis takes three numbers or None
BP_3d_array_mean_7 = np.mean(BP_3d_arr, axis=(0,1,2))
print(BP_3d_array_mean_7)
print(BP_3d_array_mean_7.shape)
print('OR')
BP_3d_array_mean_7 = np.mean(BP_3d_arr, axis=None) # None is the default if you do not define the axis
print(BP_3d_array_mean_7)
print(BP_3d_array_mean_7.shape)

84.17100375157764
()
OR
84.17100375157764
()


## 2. Local and Global Variables

<font size="4"> 

Most of the variables we've defined so far are "global"

- Stored in working environment
- Can be referenced in other parts of the notebook



<font size = "5">
Example:

In [30]:
message_hello = "hello"
number3       = 3

In [31]:
print(message_hello + " world")
print(number3 * 2)

hello world
6


<font size = "4">

Any "global" variable can be referenced inside functions

- However, this can lead to mistakes
- Preferrably, include **all** the inputs as parameters

<font size = "4">

$f(x,y,z) = x + y + z$

In [32]:
# 2.1. Correct Example:
def fn_add_recommended(x, y, z):
    return(x,y,z)

# Can pass arguments by name or position:
print(fn_add_recommended(x = 1, y = 2, z = 5))
print(fn_add_recommended(x = 1, y = 2, z = 10))

(1, 2, 5)
(1, 2, 10)


In [38]:
# 2.2. Example that runs (but not recommended) # This did not run for me
# Python will try to fill in any missing inputs
# with variables in the working environment
def fn_add_recommended(x, y, z):
    return(x,y,z)

z = 5
print(fn_add_recommended(x = 1, y = 2))

z=10 
print(fn_add_recommended(x = 1, y = 2))

TypeError: fn_add_recommended() missing 1 required positional argument: 'z'

In [46]:
# 2.2.1.
#del z # remove variables from a global scope
print(fn_add_recommended(x = 1, y = 2))

TypeError: fn_add_recommended() missing 1 required positional argument: 'z'

<font size ="4">

Variables defined inside functions are "local"

- Stored "temporarily" while running
- Includes: Parameters + Intermediate variables


### Local variables supercede global variables

In [45]:
# 2.3.
def print_x():
    x =20
    print(x) 

print_x()
print('outside')
print(x)

20
outside


NameError: name 'x' is not defined

In [49]:
# 2.4. An exception
depart_ls = ['bios', 'epid', 'bshes', 'eh', 'gh', 'hsrhp', 'nutrition']

for id_iter, depart_iter in enumerate(depart_ls):
    print(id_iter, depart_iter) 

#They are still global variables, although we do not explicitly define them
print(id_iter, depart_iter)

0 bios
1 epid
2 bshes
3 eh
4 gh
5 hsrhp
6 nutrition
6 nutrition


In [51]:
# 2.5. This is an example where we define a quadratic function
# (x,y) are both local variables of the function
# 
# When we call the function, only the arguments matter.
# any intermediate value inside the function are "invisible"
# or "inaccessible" when exiting the function.

def fn_square(x):
    y = x**2
    return y

x = 5
y = -5 
print(fn_square(x=1.2)) # When we exit this function, the values of the local variable will be destroyed

print(x)
print(y)

1.44
5
-5


### Local variables are **not** stored in the working environment

In [56]:
# 2.6. The following code assigns a global variable x and a global variable y
x = 5
y = 4

# x and y values change inside the function only
print(fn_square(x=10)) 

# When exiting the function, the values are still associated with global variables
# Any changes to x and y are not updated
print(x)
print(y)
print()

print(fn_square(x=20))
print(x)
print(y) 
print()

x = 6
y = 2
print(x)
print(y)

100
5
4

400
5
4

6
2


### To permanently modify a variable, use the "global" command

In [64]:
# 2.7.
def modify_x():
    global x 
    x = x + 5
# SHOULD NOT USE THIS "global" COMMAND IN PRACTICE
x = 1
print('x = {} before modification'.format(x))

# Now running the function will permanently increase x by 5
modify_x()
print('x = {} after modify_x function'.format(x))

x = 1 before modification
x = 6 after modify_x function


### Try it yourself:

<font size = "4">
    
- What happens if we run `modify_x` twice?
- What happens if we add `global y` inside `fn_square`?

In [66]:
print('x = {} after running the modify_x function once'.format(x))

# Running the function again will permanently increase x by 5
modify_x()
print('x = {} after the second run of the modify_x function'.format(x))

modify_x()
print('x = {} after the third run of the modify_x function'.format(x))

x = 11 after running the modify_x function once
x = 16 after the second run of the modify_x function
x = 21 after the third run of the modify_x function


In [69]:
# 2.9.
def fn_square_2(x):
    global y
    y = x**2
    return y

# Call the function
x = 5
y = -5

print ('Before running function')
print(x,y)
print()

print(fn_square_2(x = 1.2))
print()
print('After running function')
print(x) # Still equal to 5 because we did not make it a global variable within the function
print(y)
      

Before running function
5 -5

1.44

After running function
5
1.44


### Built-in scope (Variables defined in Python)

<font size='4'>

- Functions such as `print()`, `len()`, `sum()`, `isinstance()` are all built-in functions in Python.
- They are available in any part of your code, so no need to define them.
- Python has a list of variables that are always available to prevent you from using the same names.
- Most of them are error names.

In [70]:
# 2.10

# Do not use any of these as your variable name
import builtins
print(dir(builtins))



### Enclosing scope (Variables defined in enclosing functions)

<font size='4'>
    
- They refer to variables defined in enclosing functions
- Enclosing functions are functions that contain other functions (nested or hierarachical functions)
- Enclosing scope is between local and global scopes in the LEGB rule

In [72]:
# 2.11
# Define a function that contains another function 
# Nested function

def outer():
    x = 'outer -> x' # local to outer()

    def inner():
        x = 'inner -> x' # local to inner()
        print(x) # Line which give you the 'inner -> x' output below

    inner() # runs inner() when you call the outer function, then makes the 'inner -> x' disappear
    print(x) 

# This is only really a situation that arises when you define variables by the same name in both the outer and inner functions 
    #(in this case, you have two values for x in the respective functions)

In [73]:
# Call the function
outer() # run outer() 

inner -> x
outer -> x


## 3. Work with Python scripts

<font size='4'>

Please open `week_08_main_1.py` and `week_08_main_2.py`.