# Section 1: Introduction to Python

## Printing and Getting Help

In [2]:
## Use the print() command to display variables.
print('Hello, World!')

Hello, World!


In [3]:
## With Jupyter notebooks, you can also call the variable itself.
'Hello, World!'

'Hello, World!'

In [4]:
## Getting help is easy in Notebooks: append a '?' to the end of a function/variable.
print?

## Basic Data Types in Python

### Integers, Floats, and Mathematic Operations

In [None]:
## Integers are whole numbers (nothing after the 0s place).
print(1)

In [None]:
## Floats are decimal numbers.
print(1.3)

## Adding a decimal point will convert an integer to a float.
print(1.)

In [None]:
## These are the basic operators in python:
print(4 + 2)   # Addition
print(4 * 2)   # Multiplication
print(4 / 2)   # Division
print(4 % 2)   # Remainder
print(4 ** 2)  # Exponent

In [None]:
## Floats dominate integers in operations:
print(4 + 2.)
print(4 / 2.)
print(4 ** 2.)
print(4 // 2) # Except for integer division!

In [None]:
## These are the comparison operators in python:
print(4 > 2)    # Greater than
print(4 < 2)    # Less than
print(4 == 2)   # Equal to
print(4 >= 2)   # Greater than or equal to
print(4 <= 2)   # Less than or equal to
print(4 != 2)   # Not equal to to

In [None]:
## Python also makes use of the is/is not operators (more on these later):
print(4 is 2)
print(4 is not 2)

In [None]:
## Python also supports scientific notation and complex numbers:
print(1e3)
print(2+3j)

### Booleans

In [5]:
print(True, False) # True, False are the Boolean classes.

True False


In [6]:
print(True == 1)   # True is equivalent to 1.
print(False == 0)  # False is equivalent to 0.

True
True


### Lists and Tuples

In [8]:
## Lists are the most basic container and are denoted by brackets.
## Lists can store any pythonic type, and elements of a list do not need
## to be of the same type.

example_list = [1, 1., 1e3, 2+3j, True]
print(example_list)

[1, 1.0, 1000.0, (2+3j), True]


#### Important note: Python is a 0-indexed language.
The first element of a list is the 0th position of the list! 

In [9]:
## Use square brackets to index elements of a list.

print(example_list[0])    # First element of the list
print(example_list[2])    # Third element of the list

1
1000.0


In [None]:
## Slicing lists uses the colon operator:
print(example_list[1:3])    # Second-through-third elements (up-to-not-include)
print(example_list[1:])     # Second element onwards.
print(example_list[:2])     # Up-through-second element.

In [None]:
## Slicing also for the following operations:
print(example_list[-2:])    # Second-to-last element onwards
print(example_list[:-2])    # Up to second-to-last element

print(example_list[::2])    # Every other element
print(example_list[::-1])   # Elements in reverse

In [10]:
## Slicing operators can be combined:
print(example_list[1:-1:2])    # Every other element, starting from the 
                               # second through the second-to-last.

[1.0, (2+3j)]


#### Important note: Python is an object-oriented language
Every variable in Python is an **instance** (or object) of some type (e.g. integer, list, string) and thus has **attributes.** The following are some useful attributes (i.e. self-contained functions) of lists. Note that all of these attributes modify the variable **in-place**.

In [None]:
## Add new elements to the end of a list using append:
example_list.append( 111 )
print(example_list)

In [None]:
## Add new elements to a specific position in a list using insert:
example_list.insert(0, 222)
print(example_list)

In [None]:
## Remove an element from a specific position using pop. 
example_list.pop(3)
print(example_list)

#### Testing for elements in a list

In [None]:
## The contents of a list can also be tested with the "in" operator.
print( 111 in example_list )
print( 999 in example_list )

#### Important note: Tuples are immutable lists
Tuples are denoted by parantheses. Tuples are virtually identical to lists except that they are **immutable.** In other words, tuples cannot be modified once they are created. 

In [None]:
## We define an example string and tuple.
example_list = [1, 2, 3, 4]
example_tuple = (1, 2, 3, 4)

In [None]:
## Change the second element of the list.
example_list[1] = 9
print(example_list)

In [None]:
## Change the second element of the tuple.
example_tuple[1] = 9
print(example_tuple)

### Text and Strings 

In [None]:
## Strings are demarcated by single or double quotation marks.
example_string1 = 'how exciting is this?'
example_string2 = "how exciting is this?"
print(example_string1)
print(example_string1 == example_string2)

In [None]:
## Strings are essentially lists with characters.
print(example_string1[4:])    # Return string from the 4th character onwards.
print(example_string1[::-1])  # Return the reversed string.
print(example_string1[::2])   # Return every other character of the string.

In [None]:
## Strings also have their own attributes 
## These do not modify the string in-place.
print( example_string1.capitalize() )       # Capitalize first letter of string.
print( example_string1.upper() )            # Uppercase all characters.
print( example_string1.count('i') )         # Count 'i's in string.
print( example_string1.replace('i','o'))    # Replace 'i's in string with 'o's.

In [None]:
## Extended paragraphs (i.e. docstrings) can be written with triple:
example_docstring = '''You can use the triple quotes to write long paragraphs
of text that will maintain all of the breaklines. This can
be very helpful when writing docstrings of functions'''

print(example_docstring)

In [None]:
## One very useful feature is string substition, where we can
## insert variables/text into strings.

print('Hi, my name is %s!' %'Sam')              # Substitute a string.
print('Pi to the 2nd digit is %0.2f.' %3.14159) # Substitute and round a float.

In [None]:
## Strings are easily combined, through the addition operator or the join attribute.
print('This is the first half.' + ' ' + 'This is the second half.')
print(' '.join(['This is the first half.', 'This is the second half.']))

### Dictionaries

In [None]:
## Dictionaries are simple lookup tables. They are denoted by curly brackets.
example_dict = {'a':1, 'b':2, 'c':3}
print(example_dict)
print(example_dict['c'])

In [None]:
## Dictionaries can also be generated using the dict() command.
## Notice the slightly different syntax.
example_dict = dict(a=1, b=2, c=3)
print(example_dict)
print(example_dict['c'])

In [None]:
## Dictionaries are comprised of "keys" and "values".
print(example_dict.keys())
print(example_dict.values())

In [None]:
## Once initialized, new key/value pairs can be stored in a dictionary.
example_dict['d'] = 4
print(example_dict)

## Control Flow in Python

### For and While Loops

In [None]:
## For loops have a very simple syntax in python.
for x in range(5):
    print(x)

In [None]:
## Elements of a list can be directly iterated over in python.
example_list = [4, 1, 5, 2, 7, 1, 2, 3]
for x in example_list:
    print(x)

In [None]:
## For loops can be paired with the enumerate command for indexing.
for i, x in enumerate(example_list):
    print(i,x)

In [None]:
## While loops are similarly simple.
i = 0
while i < 5:
    print(i)
    i += 1  

### Conditional logic with if, elif, else

In [None]:
## In python, the three conditional statements are if, elif, and else.
## Here we will construct a simple for-loop testing parity.
example_list = [4, 7, 9.4]

for x in example_list:
    
    if x % 2 == 0: 
        print('%s is even.' %x)
        
    elif x % 2 == 1:
        print('%s is odd.' %x)
        
    else: 
        print('%s is not an integer.' %x)

### Contiue and Break statements
Conditional logic statements can be paired with the "continue" and "break" 
statments for additional control flow in For and While loops. The "continue"
statement skips the current iteration of a For/While loop, whereas the
"break" statement terminates the For/While loop.

In [None]:
## An example of the continue statement.
## The for loop skips at the odd numbers.
example_list = [4, 7, 9.4]

for x in example_list:
    
    if x % 2 == 0: 
        print('%s is even.' %x)
        
    elif x % 2 == 1:
        continue
        print('%s is odd.' %x)
        
    else: 
        print('%s is not an integer.' %x)

In [None]:
## An example of the break statement.
## The for loop terminates at the first odd number.
example_list = [4, 7, 9.4]

for x in example_list:
    
    if x % 2 == 0: 
        print('%s is even.' %x)
        
    elif x % 2 == 1:
        break
        print('%s is odd.' %x)
        
    else: 
        print('%s is not an integer.' %x)

### List comprehensions
Python also allows for embedding For loops within lists as a nifty way of constructing/modifying lists. List comprehensions are very powerful (though sometimes memory intensive) and can be constructed with a few different syntaxes.

In [None]:
## Inclusive/exclusive list comprehension: here we exclude variables from
## the list if they do not meet a certain criterion.
[x for x in range(10) if x > 5]

In [14]:
## Conditional list comprehension: here we transform variables from the 
## list based on whether they meet a certain criterion.
['odd' if x % 2 == 1 else 'even' for x in range(5)]

['even', 'odd', 'even', 'odd', 'even']

In [15]:
## Note that else statements can be chained in list comprehensions.
example_list = [4, 7, 9.4]

['odd' if x % 2 == 1 
 else 'even' if x % 2 == 0 
 else 'non-integer' 
 for x in example_list]

['even', 'odd', 'non-integer']

### Error handling with try/except logic.
Python allows for intelligent error handling with the "try" and "except" logics. Code nested under a "try" command will be evaluated. If an error arises under a try block, the code in the except block will be evaluated instead. This is useful for handling exceptions and preventing scripts from breaking. Use with caution though if you cannot predict error corner cases!

Here we will test try/except logic with a division-by-zero error. Note that I am specifying the error class, i.e. ZeroDivisionError. Python has a number of built-in error types, and it is better to specify the exact type of error you expect to ensure that only those types of errors are passed to the except block. Multiple excepts are permissible in try/except workflows.

In [None]:
## An example divide by zero error.
example_list = [2, 10, 0]

for x in example_list:
    print(20 / x)

In [None]:
## An example try/catch handling divide by zero errors.
example_list = [2, 10, 0]

for x in example_list:
    try:
        print(20 / x)
    except ZeroDivisionError: 
        print('You cannot divide 0, silly!')

## Defining Functions
Creating custom functions in python is very easy using the def/return commands.

In [None]:
## Here we will create a custom script for handling strings.
def angry_string(string):
    return(string.upper())

example_string = 'what are you doing?'
print(example_string)
print(angry_string(example_string))

In [None]:
## We can also define default inputs for a command.
def angry_string(string, add_exclamations=False):
    string = string.upper()
    if add_exclamations:
        string += '!' * add_exclamations
    return(string)

print( angry_string('hi there', 8) )

In [None]:
## The lambda operator can also be used to define short functions.
angry_string = lambda x: x.upper()
angry_string('hi there')

## Basic Commands in Python
Don't worry too much about this section. Most of these commands will be replaced or supplemented by NumPy shortly.

In [None]:
## The most important command is the range command. 
## This is identical to "seq" in R.
var = range(0,5)    # Equivalent to [0,1,2,3,4]. Remember python is 0-indexed!
var = range(5)      # Same as above.
var = range(0,5,2)  # Every other integer up-to-5.
print(var)

In [None]:
## There are commands to test  and convert between object classes.
print(list(var))    # Range as list.
print(tuple(var))   # Range as tuple.
print(str(var))     # Range as string (converts to string literally).

In [None]:
## There are commands to modify lists.
example_list = [4, 1, 5, 2, 7, 1, 2, 3]

print(sorted(example_list))    # Sort list.
print(set(example_list))       # Get unique elements of list. Returns set.

In [None]:
## There are also commands to summarize lists.
print(len(example_list))    # Count element in list.
print(sum(example_list))    # Sum across list.

In [None]:
## The any/all commands are very useful for testing if any 
## conditionals are met in a list.
example_list = [True if x > 2 else False for x in range(5)]
print(example_list)
print(any(example_list))
print(all(example_list))

# Section 2: Introduction to Numpy

## Importing Modules
If vanilla python seems rather lackluster, that's because it is. Fortunately, the scientific stack adds a broad and powerful array of python packages fill in the gaps. Once installed, packages in python are easily loaded for use.

In [None]:
import numpy
print(numpy.__version__)

In [11]:
## Commands from packages are like attributes of objects. 
## For convenience, we will import packages using shorthand.
import numpy as np
print(np.__version__)

1.13.0


## NumPy Arrays
### Why arrays improve on lists
Arrays are the most basic type of the NumPy package and are vectors, similar to pythonic lists. In contrast to lists, however, arrays have many more attributes and can be modified in substantially more ways.

In [None]:
## Comparing lists and arrays: 
example_list = list(range(5))
example_array = np.arange(5)

print(example_list)
print(example_array)

In [None]:
## Pythonic lists cannot be directly modified elementwise.
print(example_list * 5)

In [None]:
## Pythonic lists cannot be directly modified elementwise.
print(example_list * example_list)

In [None]:
## NumPy arrays, however, can be directly modified.
print(example_array * 5)
print(example_array * example_array)

In [None]:
## Every array has an implicitly stored object type.
## These can modified quickly and easily.
print(example_array, example_array.dtype)
example_array = example_array.astype(float)
print(example_array, example_array.dtype)

In [None]:
## Importantly, arrays store other important metadata.
print(example_array.shape)    # Print shape of array.
print(example_array.nbytes)   # Print bytes of array.

In [None]:
## Arrays now have a number of other built-in attributes 
## not available for lists.
print(example_array.min())     # Get max of array.
print(example_array.max())     # Get min of array.
print(example_array.mean())    # Get mean of array.
print(example_array.sum())     # Get sum of array.

### Generating NumPy Arrays
There are a multitude of methods and functions to generate NumPy arrays.

In [None]:
## Making an array from a list using the array command.
example_list = [4, 7, 9.4]
example_array = np.array(example_list)

print(example_list)
print(example_array)    # Note that the float-type is applied across the array.

In [None]:
## NumPy has recreated all of the standard R/Matlab commands for 
## generating arrays.
print(np.zeros(5))            # Length-5 array of zeros.
print(np.ones(5))             # Length-5 array of ones.
print(np.arange(5))           # Length-5 array of sequential integers
print(np.linspace(0,10,5))    # Length-5 evenly-spaced array from 0 to 10.

## NumPy Matrices
### Why matrices improve on lists
It is possible to represent matrices in pythonic lists, though it is inefficient. Similar to the benefits of arrays, NumPy matrices dramatically improve upon the numerical capabilities of core python.

In [None]:
## The following is a basic matrix represented in core python. 
## It is a list of lists.
nested_lists = [[1,2,3,4],
                [4,5,6,7]]
print(nested_lists)
print(nested_lists[1][2])   # To extract the 2nd row, 3rd column, two brackets are necessary.

In [None]:
## NumPy matrices make this much easier!
example_matrix = np.array(nested_lists)
print(example_matrix)
print(example_matrix[1,2])    # Much more efficient indexing!

In [None]:
## Indexing of NumPy matrices (and arrays for that matter)
## obey all of the slicing conventions of lists. Commas are
## used to demarcate which axis a slice operation is targeting.
print(example_matrix[1,2])    # Second row, third column.
print(example_matrix[0,:])    # All the first row.
print(example_matrix[:,-1])   # All of the final column.

In [None]:
## NumPy matrices have all the same attributes of NumPy arrays,
## but now functions can be applied to specific rows or columns
## in addition to the entire matrix.
print( example_matrix.sum() )          # Sum across matrix.
print( example_matrix.sum(axis=0) )    # Sum across columns.
print( example_matrix.sum(axis=1) )    # Sum across rows.

In [None]:
## Note that arrays and matrices, despite different sizes,
## are the actually same NumPy class under the hood.
print(type(example_array))
print(type(example_matrix))

### Generating NumPy Matrices

In [None]:
## As demonstrated above, a nested list-of-lists can be made
## into a NumPy matrix.
example_list = [[0, 1, 1],[2, 3, 5], [8, 13, 21]]
example_matrix = np.array(example_list)

print(example_list)
print(example_matrix)

In [None]:
## Matrices can also be formed by joining arrays using hstack, vstack, or concatenate.
## hstack joins column vectors, vstack joins row vectors, and concatenate
## allows the user to specify which axis to merge across.

example_array = np.arange(4).reshape(4,1)
print( np.hstack( [example_array,example_array] ) )    # Stack two arrays horizontally.

example_array = example_array.reshape(1,4)
print( np.vstack( [example_array,example_array] ) )    # Stack two arrays vertically.

print( np.concatenate( [example_array, example_array], axis=0 ))

In [None]:
## NumPy has recreated all of the standard R/Matlab commands for 
## generating matrices.
print( np.zeros( [3,3] ) )                 # 3x3 matrix of zeros.
print( np.ones( [3,3] ) )                  # 3x3 matrix of ones.
print( np.arange(9).reshape(3,3) )         # 3x3 matrix of sequential integers.
print( np.linspace(0,8,9).reshape(3,3) )   # 3x3 matrix evenly-spaced array from 0 to 8. 
print( np.identity(3) )                    # 3x3 identity matrix.

In [None]:
## Importantly, reshape can be used to change the shape of 
## NumPy arrays. The order flag can also change how they are
## organized (row-ordered vs. column-ordered).
example_array = np.arange(8)
print(example_array)
print(example_array.reshape(2,4,order='C'))    # Column-organized (default)
print(example_array.reshape(2,4,order='F'))    # Row-organized

In [None]:
## The flatten command can be used to flatten matrices to arrays.
example_matrix = np.identity(3)
print(example_matrix)
print(example_matrix.flatten())

## Core NumPy Functions
NumPy also introduces a number of useful functions designed to operate efficiently over NumPy arrays. The following is a non-exhaustive overview of some important NumPy functions.

### Rounding Functions

In [None]:
## NumPy has the same rounding functions as R.
example_array = np.linspace(0,1,4)
print(example_array)
print( np.round(example_array, 2) )    # Round to the 2nd decimal.
print( np.floor(example_array) )       # Round down.
print( np.ceil(example_array) )        # Round up.

### Mathematical functions

In [12]:
## NumPy includes a variety of mathematical functions.
## All of these can be applied across an entire matrix 
## or across arrays.

np.sum;       # Sum of an array or matrix.
np.cumsum;    # Cumulative sum over an array.
np.prod;      # Product of elements of an array.
np.divide;    # Element-wise division of two arrays.
np.diff;      # Pairwise difference of elements of an array.
np.exp;       # Exponential transform.
np.log;       # Natural logarithm.
np.log10;     # Base-10 logarithm.

### Summary Functions

In [None]:
## NumPy includes many functions to summarize an array.
## With the exception of correlate, all of these can be
## applied across an entire matrix or across arrays.

np.min;           # Return the smallest element.
np.max;           # Return the largest element.
np.argmin;        # Return the index of the smallest element.
np.argmax;        # Return the index of the largest element.
np.mean;          # Compute the mean of an array.
np.std;           # Compute the standard deviation of an array.
np.var;           # Compute the variance (sd^2) of an array.
np.percentile;    # Compute the xth percentile of an array.
np.corrcoef;      # Compute the row-/col-wise correlation of a matrix.

### Set Functions

In [None]:
## NumPy includes functions for identifying unique elements
## within or between arrays.
arr1 = np.array([41, 16, 34, 0, 2, 20, 19, 14, 22, 15, 18, 9, 35, 41])
arr2 = np.array([42, 22, 40, 7, 33, 0, 12, 19, 44, 10, 31, 11, 11, 49])

In [None]:
# Sort elements (ascending order).
np.sort(arr1)

In [None]:
# Return unique elements.
np.unique(arr1)

In [None]:
# Return unique elements, count number of appearances.
np.unique(arr1, return_counts=True)

In [None]:
# Find the elements of array-1 in array-2.
np.in1d(arr1, arr2)

In [None]:
# Return all unique elements of arrays 1 & 2.
np.union1d(arr1, arr2)

In [None]:
# Return all elements belonging to both arrays 1 & 2.
np.intersect1d(arr1, arr2)

### Shortcut Functions

In [None]:
## NumPy includes a number of very helpful functions that act to replace list 
## comprehensions (np.where) and for loops (np.apply_across_axis, np.apply_over_axes). 
## These are often more efficient than writing out a full For loop.
## We will emphasize these functions with a simple example of standard-scoring (z-scoring)
## a matrix.

## Define the standard score (z-score) function.
def zscore(arr): 
    '''Standard score'''
    return (arr - arr.mean()) / arr.std()

## Define a simple matrix.
mat = np.arange(12).reshape(2,6)
print(mat)

## Use np.appy_across_axis to apply our function across each row.
zmat = np.apply_along_axis(zscore, axis=1, arr=mat)
print(zmat.round(2))

## Use the np.where command to set all negative numbers to 0, else 1.
## np.where is identical to the which() command in R. 
amat = np.where(zmat < 0, 0, 1)
print(amat)

## If no transforms are specified, np.where returns the indices of the
## array where the conditional is met.
print( np.where(zmat < 0 ) )

### Linear Algebra Functions

In [None]:
## NumPy includes an entire submodule dedicated to efficient linear 
## algebra functions (though it should be noted that SciPy has
## reimplemented them for maximal efficiency). See np.linalg for 
## a full list of commands.
mat = np.arange(16).reshape(4,4)

print(mat)
print(mat.T)           # Transpose the matrix
print(np.diag(mat))    # Return diagonal of matrix
print(np.triu(mat))    # Return upper triangular matrix
print(mat.dot(mat))    # Matrix multiply itself.

In [None]:
## Linear algebra operations include:
np.linalg.norm;        # Vector or matrix norm
np.linalg.inv;         # Inverse of a square matrix
np.linalg.det;         # Determinant of a square matrix
np.linalg.eig;         # Eigenvalues and vectors of a square matrix
np.linalg.cholesky;    # Cholesky decomposition of a matrix
np.linalg.svd;         # Singular value decomposition of a matrix
np.linalg.lstsq;       # Solve linear least-squares problem

### Generating Random Data

In [None]:
## NumPy also includes many functions for generating random data. 
## For a full list of functions, tab-complete on np.random.

print( np.random.randint(0,10,10) )    # Generate ten random integers between 0-9.
print( np.random.normal(0,1,5) )       # Generate five random samples of a normal distribution with mu=0,sd=1.
print( np.random.binomial(1,0.5,10))   # Generate 10 random coin flips.

print( np.random.choice(np.arange(10), 5, replace=False) ) # Choose five numbers from 0-9 without replacement.