# Setup and get data

In [12]:
import pandas as pd
import numpy as np
import sklearn
from sklearn import datasets

iris = sklearn.datasets.load_iris()
# convert to pandas df
iris = pd.DataFrame(np.concatenate((iris.data, np.array([iris.target]).T), axis=1), 
                    columns=iris.feature_names + ['target'])
# clean col names
iris.columns = [c.replace(' ', '_') for c in iris.columns]
iris.rename(columns={'sepal_length_(cm)': 'sepal_length', 
                     'sepal_width_(cm)': 'sepal_width', 
                     'petal_length_(cm)':  'petal_length',
                     'petal_width_(cm)': 'petal_width'}, inplace=True)

### Iterables: 
any object with an iter() method. 
<br>lists, strings, range objects, dicts. 

Applying `iter()` to an `iterable` creates an `iterator`:

In [41]:
iterable = ["a","b","c"]
iterator = iter(iterable)
iterator

<list_iterator at 0x160d18e48>

### Iterator:
keeps state and returns next values. 
<br>any object with a next() method

In [30]:
print(next(iterator))
print(next(iterator))
print(next(iterator))
next(iterator)

a
b
c


StopIteration: 

All elements of an iterator:

In [42]:
print(*iterator)

a b c


In [40]:
print(*iterable)

a b c


# Loops

In [73]:
for number in [1,2,3]:
    print(number)

1
2
3


With assignment:

In [97]:
numbers = [1,2,3]
new = []

In [98]:
for number in numbers:
    new.append(number+1)
new

[2, 3, 4]

Not pythonic:

In [72]:
numbers = [1,2,3]
for number in range(len(numbers)):
    print(numbers[number])    

1
2
3


### Accumulator/counter

Counter: a counter is a variable that is incremented every time a given event is verified

In [34]:
count = 0 
for num in range(4):
    count = count + 1  
print(count)

4


## [Enumerate](https://realpython.com/python-enumerate/): count without a counter variable


In [69]:
colors=['red', 'green', 'blue']

As for loop:

In [50]:
for color in colors:
    print(color)

red
green
blue


As enumerate:

In [66]:
for index, color in enumerate(colors):
     print(index, color)

0 red
1 green
2 blue


An **enumerate object** has each element + their index

In [54]:
enumerate(colors)

<enumerate object at 0x160d190d8>


## `zip`: use any # of iterables 

In [7]:
colors=['red', 'green', 'blue']
animals=['cat', 'cow', 'dog']
verbs=['runs','sleeps', 'flies']

zip(colors, animals, verbs) is an iterator of tuples:

In [6]:
zip(colors, animals, verbs)

<zip at 0x107acb508>

one tuple:

In [85]:
tuple = next(zip(colors, animals, verbs))

In [86]:
tuple

('red', 'cat', 'runs')

stops when either list is exhausted. izip_longest stops when both foo and bar are exhausted. see itertools. ([SO](https://stackoverflow.com/questions/1663807/how-to-iterate-through-two-lists-in-parallel))


### Get all the tuples:

List the zip object:

In [3]:
list(zip(range(len(colors)), colors))  

[(0, 'red'), (1, 'green'), (2, 'blue')]

Use a for loop:

In [88]:
for color, animal, verb in zip(colors, animals, verbs):
    print(color, animal, verb) 

red cat runs
green cow sleeps
blue dog flies


Or star/splat operator:

In [89]:
print(*zip(colors, animals, verbs))

('red', 'cat', 'runs') ('green', 'cow', 'sleeps') ('blue', 'dog', 'flies')


### Enumerate on zipped iterables to get indexed list of tuples

Enumerate on zipped iterables:

In [14]:
enumerated_lists = list(enumerate(zip(colors, animals)) )

[(0, ('red', 'cat')), (1, ('green', 'cow')), (2, ('blue', 'dog'))]

To get indexed list of tuples:

In [22]:
enumerated_lists

[(0, ('red', 'cat')), (1, ('green', 'cow')), (2, ('blue', 'dog'))]

Access tuples and indices:

In [15]:
enumerated_lists[0]

(0, ('red', 'cat'))

In [19]:
enumerated_lists[0][0]

0

In [21]:
letter_counter = 0
colors=['red', 'green', 'blue']
for index,color in zip(enumerate(colors), colors):  # zip lets us use 2 lists. 
    letter_counter = letter_counter+len(word)
    print("On iteration", index[0], ", we added '", color, "' and now have", letter_counter, " letters") 


On iteration 0 , we added ' red ' and now have 3  letters
On iteration 1 , we added ' green ' and now have 6  letters
On iteration 2 , we added ' blue ' and now have 9  letters


- for this to work, 'colors' has to allow access to its items using integer indices. 
- **Sequences** are the only type of iterable that allows this. 
- The other type of iteratable is a **generator**.
- If you use numeric indexing[2] on a non-sequence, you get a TypeError.

# Map

functional programming way to transform iteratable without a loop
<br>[List comprehensions are preferred over map](https://stackoverflow.com/questions/1247486/list-comprehension-vs-map)

<br> `map (function, list)`


As list comprehension:

In [3]:
[n+1 for n in [1,2,3]]

[2, 3, 4]

As map:

In [8]:
map(lambda x:x+1, [1,2,3])

<map at 0x102ed7470>

In [9]:
list(map(lambda x:x+1, [1,2,3]))

[2, 3, 4]

# List comprehensions:
Compact, SQL-like loop replacements
<br>The standard iteration in Python

Regular loop doesn't preserve list format:

In [136]:
for number in [1,2,3]:
    print(number+1)

2
3
4


To have as list, must make empty list and append:

In [137]:
newlist = []
for number in [1,2,3]:
    newlist.append(number)
newlist

[1, 2, 3]

In [109]:
data = [500, 200, 1400]

In [124]:
for datum in data:
    if datum > 300:
        print(datum + 100)

600
1500


**List comprehension:**

[expression for item in iterable if condition == True]

Does preserve list format:

In [101]:
[n+1 for n in numbers]

[2, 3, 4]

In [127]:
[datum for datum in data if datum > 300]

[500, 1400]

In [102]:
[n+1 for n in numbers if n > 2]

[4]

**If long, format like SQL:**
<br>    SELECT
<br>    FROM
<br>    WHERE

In [None]:
[n+1 
 for n in numbers 
 if n > 2]


# Apply

In [39]:
print("across rows:")
print("")
print(iris.apply(sum, axis =1)[0:3])
print("")
print("across cols:")
iris.apply(sum, axis =0)

across rows:

0    10.2
1     9.5
2     9.4
dtype: float64

across cols:


sepal_length    876.5
sepal_width     458.6
petal_length    563.7
petal_width     179.9
target          150.0
dtype: float64

### with a user defined function

In [42]:
def my_calc(x):
    return x.sum()+15

iris.apply(my_calc, axis =1)[0:3]

0    25.2
1    24.5
2    24.4
dtype: float64

### with a lambda/anonymous function:

In [44]:
print(iris.apply(lambda x: x.sum()+15,
                 axis=0))

sepal_length    891.5
sepal_width     473.6
petal_length    578.7
petal_width     194.9
target          165.0
dtype: float64
