## Before continuing, please select menu option:  **Cell => All output => clear**

# For Loops
* Allows you to perform an action a set number of times
* Usually used on iterator types; lists, tuples, or generators

In [None]:
# You can use for loops to cycle through a list
grocery_list = ['Juice', 'Tomatoes', 'Potatoes', 'Bananas']
 
for i in grocery_list:
    print(i)

In [None]:
# You can also define a list of numbers to cycle through
for x in [2, 4, 6, 8, 10]:
    print(x)

In [None]:
# Or any object
def myfunc():
    pass

for i in [2, 4.3, 'hello', (8,10), myfunc]:
    print(i)

In [None]:
# You can double up for loops to cycle through lists
num_list =[[1, 2, 3],[10, 20, 30],[100, 200, 300]];
 
for x in range(0, 3):
    for y in range(0, 3):
        print(num_list[x][y])

### Exercise
1. Using two for loops simialr to above.
2. Iterate over the counters below and print out each item:

In [None]:
counters = [[1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11, 12]]

## Generators

In [None]:
# The range keyword returns a *list:
for x in range(0, 10):
    print(x , ' ', end="")
print('\n')

In [None]:
# However, in reality a range is a generator which yields the values back to the caller.
# In python 2 it returns a list containing the range so there is also xrange to return a generator
print('Python3 range =', range(5,10))  # Python2 generator is xrange
print('Python2 would use more memory and return a list =', list(range(5,10)))

In [None]:
# Testing range creation with a generator:
%timeit range(10000)

In [None]:
# As Python2 would work to generate a list:
%timeit list(range(10000))

In [None]:
# enumerate is a very useful feature;
list(enumerate(grocery_list, 5))

In [None]:
for count, item in enumerate(grocery_list):
    print('#', count, '=', item)

### Exercise
1. ???

# List comprehensions (& generator & dictionary) 
* Are a pythonic way of expressing a ‘for-loop’ that appends to a list in a single line of code.

A list comprehension typically has 3 components:

* The output (which can be string, number, list or any object you want to put in the list.)
* For Statements
* Conditional filtering (optional)

Below is a typical format of a list comprehension;

[(output expression) for (i in iterable) if (filter condition)]


In [None]:
result = []
for i in range(10):
    if i%2 == 0:
        result.append(i)
result

In [None]:
result = [i for i in range(10) if i % 2 == 0]
result

In [None]:
[i ** 2 if i%2==0 else i ** 3 for i in [1, 2, 3, 4, 5]]

**How can we improve above?**

In [None]:
# Flatten a list of lists
mat = [[1,2,3,4], [5,6,7,8], [9,10,11,12], [13,14,15,16]]
[i for row in mat for i in row if i % 2==0]

In [None]:
# For each number in list_b, get the number and its position in mylist as a list of tuples
mylist = [9, 3, 6, 1, 5, 0, 8, 2, 4, 7]
list_b = [6, 4, 6, 1, 2, 2]
[(i, mylist.index(i)) for i in list_b]

**Generator comprehension**

In [None]:
# List comprehension versus Generator comprehension
listcomp = [ f'Count{i}' for i in range(3) ]
iterator = ( f'Count{i}' for i in range(3) )

In [None]:
print(listcomp)
print(iterator)

In [None]:
for x in iterator:
    print(x)

**Try running the above a second time.  What happens?**

In [None]:
iterator = ( f'Count{i}' for i in range(3) )
next(iterator)

In [None]:
next(iterator)

In [None]:
next(iterator)

In [None]:
next(iterator)

In [None]:
iterator = ( f'Count{i}' for i in range(3) )
list(iterator)

In [None]:
# Generator comprehensions generally use a lot less memory:
import sys
nums_squared_lc = [i * 2 for i in range(100000)]
print(sys.getsizeof(nums_squared_lc))
87624
nums_squared_gc = (i ** 2 for i in range(100000))
print(sys.getsizeof(nums_squared_gc))

In [None]:
# If the list is smaller than available memory then list comprehensions can be faster to evaluate:
import cProfile
cProfile.run('sum([i * 2 for i in range(100000)])')

In [None]:
import cProfile
cProfile.run('sum((i * 2 for i in range(100000)))')

**Dictionary comprehension**

In [None]:
{i: mylist.index(i) for i in list_b}

In [None]:
words = '''
Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
'''.split()

In [None]:
{i:words.count(i) for i in words}

In [None]:
%%timeit
# it's not always faster then other methods
{i:words.count(i) for i in words}

In [None]:
%%timeit
d = {}
for w in words: d[w] = d.get(w, 0) + 1

In [None]:
# Thinking outside the box
numbers = '1 7 2 0 -11 14 -3 -7'
[x(numbers.split(), key=int) for x in (max, min)]

## Exercise
1. Explain the comprehension above?
2. What rule does it break?

# While Loops
* While loops are used when you don't know ahead of time how many times you'll have to loop

In [None]:
import random

random_num = random.randrange(0,100)
while (random_num != 15):
    print(random_num)
    random_num = random.randrange(0,100)

In [None]:
# An iterator for a while loop is defined before the loop
i = 0;
while (i <= 20):
    if(i%2 == 0):
        print(i)
    elif(i == 9):
        break  # Forces the loop to end all together
    else:
        i += 1  # Shorthand for i = i + 1
        continue  # Skips to the next iteration of the loop
 
    i += 1

# Functions
* Functions allow you to reuse and write readable code
* Type def (define), function name and parameters it receives
* return is used to return something to the caller of the function

In [None]:
def addNumbers(fNum, sNum):
    sumNum = fNum + sNum
    return sumNum

In [None]:
print(addNumbers)

In [None]:
print(addNumbers(1, 4))

In [None]:
print(fNum)
# Can't get the value of fNum because it was created in a function
# It is said to be out of scope

In [None]:
# If you define a variable outside of the function it is a global
aNum = 5;
def subNumbers(fNum, sNum):
    newNum = fNum - sNum + aNum
    return newNum

In [None]:
print(subNumbers(1, 4), aNum)

In [None]:
# Using default arguments
def addNumbers(fNum, sNum=10):
    sumNum = fNum + sNum
    return sumNum

print(addNumbers(7,1))
print(addNumbers(5))

**Usage of default arguments is very common in the standard library**<br>
For example:<br>
```python
open(file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None)
```

In [None]:
# GOTCHA - mutable default arguments...
def append_to(element, to=[]):
    to.append(element)
    return to

In [None]:
my_list = append_to(12)
print(my_list)

my_other_list = append_to(42)
print(my_other_list)
# You might expect the following output...
# [12]
# [42]
# but...

In [None]:
# Fixed version
def append_to(element, to=None):
    if to is None:
        to = []
    to.append(element)
    return to

In [None]:
my_list = append_to(12)
print(my_list)

my_other_list = append_to(42)
print(my_other_list)

In [None]:
def ends(l):
    return l[0], l[-1]

In [None]:
# Tuple unpacking
left, right = ends([3, 5, 8, 3, 9])
print(left)
print(right)

In [None]:
# An easier way with the (*) unpacking operator:
left, *middle, right = (3, 5, 8, 3, 9)
print(left, middle, right)

In [None]:
myrange = (3, 6)
for i in range(*myrange): print(i)

**Generator functions**

In [None]:
def yieldtest():
    s = 'This is the first string'
    yield s
    s = 'This is the second string'
    yield s

In [None]:
genobj = yieldtest()
print(next(genobj))

In [None]:
print(next(genobj))

In [None]:
print(next(genobj))

**Once generators are exhausted they raise a `stopIteration` exception**

In [None]:
# Create your own generator
def counter(n):
    i = 0
    while i < n:
        yield(i)
        i += 1

In [None]:
for x in counter(3):
    print(x)

**Try stepping through the above in Thonny**

**Example reading a large file:**
```python
csv_gen = csv_reader("some_csv.txt")
row_count = 0
for row in csv_gen:
    row_count += 1
print(f"Row count is {row_count}")
```
Looking at this example, you might expect csv_gen to be a list. To populate this list, csv_reader() opens a file and loads its contents into csv_gen. Then, the program iterates over the list and increments row_count for each row.

This is a reasonable explanation, but would this design still work if the file is very large? What if the file is larger than the memory you have available? To answer this question, let’s assume that `csv_reader()` just opens the file and reads it into an array:

```python
def csv_reader(file_name):
    file = open(file_name)
    result = file.read().split("\n")
    return result
```
This function opens a given file and uses `file.read()` along with `.split()` to add each line as a separate element to a list. If you were to use this version of csv_reader() in the row counting code block you saw further up, then you’d get the following output:

```
Traceback (most recent call last):
  File "ex1_naive.py", line 22, in <module>
    main()
  File "ex1_naive.py", line 13, in main
    csv_gen = csv_reader("file.txt")
  File "ex1_naive.py", line 6, in csv_reader
    result = file.read().split("\n")
MemoryError
```
In this case, open() returns a generator object that you can lazily iterate through line by line. However, ```file.read().split()``` loads everything into memory at once, causing the `MemoryError`.

Before that happens, you’ll probably notice your computer slow to a crawl. You might even need to kill the program with a KeyboardInterrupt. So, how can you handle these huge data files? Take a look at a new definition of `csv_reader()`:
```python
def csv_reader(file_name):
    for row in open(file_name, "r"):
        yield row
```
In this version, you open the file, iterate through it, and yield a row. This code should produce the following output, with no memory errors:
```
Row count is 64186394
```



## Exercise:
1. Without using `range` write a function that takes a parameter 'n' and returns a list of odd numbers from 0 up to 'n'.
2. Change the function to allow iteration when 'n' is a large number, but in a memory efficient way (do not test with very high numbers as it might hang your system).
3. Write another function which takes output of the first function to filter and print only primes.

*Prime number = A number that is divisible only by itself and 1*

**Advanced generators**

In [None]:
# function checks to see if number is same reversed (code does not matter for this demonstration)
def is_palindrome(num):
    # Skip single-digit inputs
    if num // 10 == 0:
        return False
    temp = num
    reversed_num = 0

    while temp != 0:
        reversed_num = (reversed_num * 10) + (temp % 10)
        temp = temp // 10

    if num == reversed_num:
        return True
    else:
        return False
    

# Generates a never ending supply of palindromes
def infinite_palindromes():
    num = 0
    while True:
        if is_palindrome(num):
            i = (yield num)  # yield is an expresion not a statement
            if i is not None:
                num = i
        num += 1

In [None]:
pal_gen = infinite_palindromes()
for i in pal_gen:
    print(i)
    digits = len(str(i))
    if digits > 5:
        pal_gen.close(ValueError("We don't like large palindromes")) # try changing throw(*) to close()
    pal_gen.send(10 ** digits) # sends data back to the coroutine

**args & kwargs**

In [None]:
# Normal function limited to two arguments:
def mysum(a, b):
    return a+b

mysum(3, 1)

In [None]:
# Improved to use a iterator so you can sum multiple values
def mysum(iterator):
    total = 0
    for i in iterator:
        total = total + i
    return total

x = [1, 2, 3]
mysum(x)

In [None]:
# Better to use the tuple unpacking operator:
def mysum(*args): # could be any name but *args is standard form
    total = 0
    for i in args:
        total += i
    return total

mysum(1,2,3,4)

In [None]:
# **kwargs works simialr to *args but provides named arguments:
def concatenate(**kwargs): # Again **kwargs is standard convention
    result = ''
    for arg in kwargs.values():  # kwargs is a dictionary
        result += arg
    return result

concatenate(a='Leap', b='Before', c='You', d='Look')    

**Usage of \*args & \*\*kwargs is very common for 3rd party libraries**<br>
For example:<br>
```python
DataFrame.plot(self, *args, **kwargs)
```

### Lambda
* AKA Anonymous functions
```python
lambda parameters: expression
```
Behaves like;
```python
def <lambda>(parameters):
    return expression
```

In [None]:
add = lambda x, y: x + y
print(add(3, 5))

In [None]:
l = [ (3,2), (3,1), (1,4), (2,0) ]
l.sort()
l

In [None]:
# Assign an anonymous function to return the second item in each tuple
l = [ (3,2), (3,1), (1,4), (2,0) ]
mysort = lambda x: x[1]
l.sort(key=mysort)
l

In [None]:
# Usually just pass the anomynous function directly to sort
l = [ (3,2), (3,1), (1,4), (2,0) ]
l.sort(key=lambda x: x[1])
l

In [None]:
# This used to work in Python2 but tuple parameter unpacking in function parameters was removed in ver 3
# Use the form above where tuples are passed as one parameter
# https://www.python.org/dev/peps/pep-3113/
l = [ (3,2), (3,1), (1,4), (2,0) ]
l.sort(key=lambda x,y : y)
l

## Exercise:
1. Write a function which adds multiple numbers and optional argument to multiply the sum.
2. Add a fourth keyword argument to supply a flag to toggle print out or just return the calcuated value.
3. There's more than one way to capture the arguments, can you provide an alternative to your first solution?

# File I/O

<br>Modes are:
- 'r'	open for reading (default)
- 'w'	open for writing, truncating the file first
- 'x'	open for exclusive creation, failing if the file already exists
- 'a'	open for writing, appending to the end of the file if it exists
- 'b'	binary mode
- 't'	text mode (default)
- '+'	open a disk file for updating (reading and writing)
- 'U'	universal newlines mode (deprecated)

Difference between binary/text:
- Files opened in binary mode return contents as bytes objects without unicode decoding. 
- In text mode the contents of the file are returned as str, After the bytes are decoded using a platform-dependent or specified encoding.

In [None]:
myfn = 'mytest.txt'
fd = open(myfn, 'wb')

In [None]:
# Get the file mode used
print(fd.mode)

In [None]:
# Get the files name
print(fd.name)

In [None]:
# Write text to a file with a newline
fd.write(bytes("Write me to the file\n", 'UTF-8'))

In [None]:
# Close the file
fd.close()

In [None]:
# Opens a file for reading and writing
fd = open(myfn, "rb+")
# Read text from the file
text = fd.read()
print(type(text))
print(text)

In [None]:
# Implicitly closed before re-opening:
fd = open(myfn, "r+")
# Read text from the file
text = fd.read()
print(type(text))
print(text)

In [None]:
# Close the file
fd.close()

In [None]:
# using the with statement context manager:
with open(myfn, "r+") as fd:
    # Read text from the file
    text = fd.read()
    print(type(text))
    print(text)
# file is automatically closed
fn.seek(0)

In [None]:
import os
# Delete the file
os.remove(myfn)

In [None]:
# Looking to see if the demo files are available with a Jupyter special execute prefix (!):
!dir SSHOW_SYS.txt AllConf.csv

In [None]:
filename = 'SSHOW_SYS.txt'
filename = 'AllConf.csv'
if os.path.exists(filename): print('Yes') 

In [None]:
if os.path.exists(filename):
    with open(filename) as fd:
        for i, line in enumerate(fd):
            line=line.strip()
            print(f'[{i}] {line}')
            if i > 10: break            

## Exercise
- Write some code to process either the `SSHOW_SYS.txt` or `Allconf.csv` and print out your favorite section.
    - `SSHOW_SYS.txt` example: text between `???/switchshow` to blank line.  
    - `Allconf.csv` example: text between `<<System Option Information>>` to next `<<?>>` section.

## Generators are great for processing files & pipelines

Imagine a large dataset:

> permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round<br>
> digg,Digg,60,web,San Francisco,CA,1-Dec-06,8500000,USD,b<br>
> digg,Digg,60,web,San Francisco,CA,1-Oct-05,2800000,USD,a<br>
> facebook,Facebook,450,web,Palo Alto,CA,1-Sep-04,500000,USD,angel<br>
> facebook,Facebook,450,web,Palo Alto,CA,1-May-05,12700000,USD,a<br>
> photobucket,Photobucket,60,web,Palo Alto,CA,1-Mar-05,3000000,USD,a<br>
> ...

Strategy:

1. Read every line of the file.
2. Split each line into a list of values.
3. Extract the column names.
4. Use the column names and lists to create a dictionary.
5. Filter out the rounds you aren’t interested in.
6. Calculate the total and average values for the rounds you are interested in.

In [None]:
# Is the sample available:
!dir techcrunch.csv

In [None]:
# Read in the file:
file_name = "techcrunch.csv"
lines = (line for line in open(file_name))

In [None]:
# Split each line ito values:
list_line = (s.rstrip().split(",") for s in lines)

In [None]:
# Get just the header row:
cols = next(list_line)

In [None]:
# Convert data into a dictionary:
company_dicts = (dict(zip(cols, data)) for data in list_line)

In [None]:
# Filter the rounds you are not interested in:
funding = (
    int(company_dict["raisedAmt"])
    for company_dict in company_dicts
    if company_dict["round"].upper() == "A"
)

In [None]:
# Calculate the total:
total_series_a = sum(funding)
print(f"Total series A fundraising: ${total_series_a}")

## Exercise
 1. When does the code to read the data lines from the file get executed above?
 2. Modify above to calcuate the average of the filtered rounds.

# CLASSES AND OBJECTS
* The concept of OOP allows us to model real world things using code
* Every object has attributes (e.g. color, height, weight) which are object variables
* Every object has abilities (walk, talk, eat) which are object functions (or methods)

In [None]:
class Animal:
    # None signifies the lack of a value (like null)
    # You can make a variable private by starting it with __
    # This is a class attribute: 
    __count = 0

 
    # The constructor is called to set up or initialize an object
    # self allows an object to refer to itself inside of the class
    def __init__(self, name, height, weight, sound):
        self.name = name
        self.__height = height
        self.__weight = weight
        self.__sound = sound
        Animal.__count += 1
        
    def count():
        return Animal.__count
 
    def set_name(self, name):
        self.__name = name
 
    def set_height(self, height):
        self.__height = height
 
    def set_weight(self, height):
        self.__height = height
 
    def set_sound(self, sound):
        self.__sound = sound
 
    def get_name(self):
        return self.__name
 
    def get_height(self):
        return str(self.__height)
 
    def get_weight(self):
        return str(self.__weight)
 
    def get_sound(self):
        return self.__sound
 
    def get_type(self):
        print("Animal")
 
    def toString(self):
        return "{} is {} cm tall and {} kilograms and says {}".format(self.__name, self.__height, self.__weight, self.__sound)

In [None]:
print(Animal)

In [None]:
Animal.__count

In [None]:
# How to create a Animal object
cat = Animal('Whiskers', 33, 10, 'Meow')

In [None]:
Animal.count()

In [None]:
cat.__name

In [None]:
print(cat.toString())

In [None]:
# You can't access this value directly because it is private
print(cat.__name)

In [None]:
# INHERITANCE -------------
# You can inherit all of the variables and methods from another class

class Dog(Animal):
    __owner = None
 
    def __init__(self, name, height, weight, sound, owner):
        self.__owner = owner
        self.__animal_type = None
 
        # How to call the super class constructor
        super(Dog, self).__init__(name, height, weight, sound)
 
    def set_owner(self, owner):
        self.__owner = owner
 
    def get_owner(self):
        return self.__owner
 
    def get_type(self):
        print ("Dog")
 
    # We can overwrite functions in the super class
    def toString(self):
        return "{} is {} cm tall and {} kilograms and says {}. His owner is {}".format(self.get_name(), self.get_height(), self.get_weight(), self.get_sound(), self.__owner)
 
    # You don't have to require attributes to be sent
    # This allows for method overloading
    def multiple_sounds(self, how_many=None):
        if how_many is None:
            print(self.get_sound)
        else:
            print(self.get_sound() * how_many)
 

In [None]:
spot = Dog("Spot", 53, 27, "Ruff", "Derek")
print(spot.toString())

In [None]:
# Polymorphism allows use to refer to objects as their super class
# and the correct functions are called automatically
 
class AnimalTesting:
    def get_type(self, animal):
        animal.get_type()

In [None]:
test_animals = AnimalTesting()
 
test_animals.get_type(cat)
test_animals.get_type(spot)

In [None]:
spot.multiple_sounds(4)

## Namespaces

In [None]:
# Functions local scopes:
def square(base):
    result = base ** 2
    print(f'The square of {base} is: {result}')

# Same local variables in the new function 
def cube(base):
    result = base ** 3
    print(f'The cube of {base} is: {result}')

In [34]:
# you cannot access names within the functions above
result

NameError: name 'result' is not defined

In [35]:
base

NameError: name 'base' is not defined

In [None]:
# inspecting the __code__ attribute which holds information about the function
print('varnames:', square.__code__.co_varnames)
print('arg count:', square.__code__.co_argcount)
print('constants:', square.__code__.co_consts)

In [8]:
# Scopes within namespaces:
x = "2"  # x is now defined within the module namespace
def example():
    x = "3" # x is now defined as 3 within the local namespace of example
    def method():
        # global x  # try running a second time with this uncommented
        # nonlocal x   # also try with nonlocal instead of global
        x = "4" # x is now defined as 4 within the local namespace of method
        def function():
            x = "5" # x is now defined as 5 within the local namespace of function
            print("Function Scope: " + x)
        function()
        print("Method Scope: " + x)
    method()
    print("Example Scope: " + x)
example()
print("Module Scope: " + x)

Function Scope: 5
Method Scope: 4
Example Scope: 3
Module Scope: 2


In [9]:
x = "2"  # x is now defined within the module namespace
def example():
    nonlocal x  # x will now be defined as being within the example scope
    x = "3" # x is now defined as 3 within the local namespace of example

SyntaxError: no binding for nonlocal 'x' found (<ipython-input-9-3dc9e3b80f42>, line 3)

**Note**: Global names can be updated or modified from any place in your global Python scope. Beyond that, the global statement can be used to modify global names from almost any place in your code, as you’ll see in The global Statement.

Modifying global names is generally considered bad programming practice because it can lead to code that is:

- **Difficult to debug**: Almost any statement in the program can change the value of a global name.
- **Hard to understand**: You need to be aware of all the statements that access and modify global names.
- **Impossible to reuse**: The code is dependent on global names that are specific to a concrete program.

Good programming practice recommends using local names rather than global names. Here are some tips:

- **Write** self-contained functions that rely on local names rather than global ones.
- **Try** to use unique objects names, no matter what scope you’re in.
- **Avoid** global name modifications throughout your programs.
- **Avoid** cross-module name modifications.
- **Use** global names as constants that don’t change during your program’s execution.

Python scopes are implemented as dictionaries that map names to objects. These dictionaries are commonly called namespaces. These are the concrete mechanisms that Python uses to store names. They’re stored in a special attribute called `.__dict__.`

In [16]:
import sys
sys.__dict__.keys()

dict_keys(['__name__', '__doc__', '__package__', '__loader__', '__spec__', 'breakpointhook', 'callstats', '_clear_type_cache', '_current_frames', 'displayhook', 'exc_info', 'excepthook', 'exit', 'getdefaultencoding', 'getallocatedblocks', 'getfilesystemencoding', 'getfilesystemencodeerrors', 'getrefcount', 'getrecursionlimit', 'getsizeof', '_getframe', 'getwindowsversion', '_enablelegacywindowsfsencoding', 'intern', 'is_finalizing', 'setcheckinterval', 'getcheckinterval', 'setswitchinterval', 'getswitchinterval', 'setprofile', 'getprofile', 'setrecursionlimit', 'settrace', 'gettrace', 'call_tracing', '_debugmallocstats', 'set_coroutine_origin_tracking_depth', 'get_coroutine_origin_tracking_depth', 'set_coroutine_wrapper', 'get_coroutine_wrapper', 'set_asyncgen_hooks', 'get_asyncgen_hooks', '__displayhook__', '__excepthook__', '__breakpointhook__', 'version', 'hexversion', '_git', '_framework', 'api_version', 'copyright', 'platform', 'maxsize', 'float_info', 'int_info', 'hash_info', 'ma

In [11]:
# Access namespace names via dot notation:
sys.ps1

'In : '

In [13]:
# Or using the dunder dictionary:
sys.__dict__['ps1']

'In : '