# Loop

## Looping Data Structures
### Loop over dictionary

```python
for key, value in world.items() :
    code here
```

In [1]:
# Definition of dictionary
europe = {'spain':'madrid', 'france':'paris', 'germany':'bonn', 
          'norway':'oslo', 'italy':'rome', 'poland':'warsaw', 'australia':'vienna' }
          
# Iterate over europe
for country, capital in europe.items():
    print("{a}'s capital is {b}".format(a=country, b=capital))

australia's capital is vienna
italy's capital is rome
spain's capital is madrid
germany's capital is bonn
poland's capital is warsaw
norway's capital is oslo
france's capital is paris


### Loop over Numpy array

If you're dealing with a 1D Numpy array, looping over all elements can be as simple as:
```python
for x in my_array :
    ...
```

If you're dealing with a 2D Numpy array, it's more complicated. A 2D array is built up of multiple 1D arrays. To explicitly iterate over all separate elements of a multi-dimensional array, you'll need this syntax:
```python
for x in np.nditer(my_array) :
    ...
```

In [2]:
# Import numpy as np
import numpy as np

# For loop over rand_1d
rand_1d = np.random.rand(10)
for num in rand_1d:
    print(num)

0.0277066719767
0.565342129993
0.246867839245
0.693330323204
0.173823244411
0.216526572351
0.924296008524
0.243447846157
0.212433779243
0.585218499416


In [3]:
rand_2d = np.random.rand(10).reshape(5,2)
# For loop over rand_2d
for num in (rand_2d):
    print(num)

[ 0.01295309  0.41630104]
[ 0.39832819  0.08128262]
[ 0.5987957   0.41718892]
[ 0.59157439  0.62928409]
[ 0.72003707  0.45421513]


In [4]:
# For loop over rand_2d elements
for num in np.nditer(rand_2d):
    print(num)

0.012953094449703717
0.41630104397729006
0.3983281927604809
0.08128261542699311
0.5987956955838
0.4171889176497322
0.5915743882644279
0.6292840925963898
0.7200370706059674
0.4542151289043952


### Loop over Pandas Dataframes

In [5]:
import pandas as pd
brics = pd.read_csv("data/brics.csv", index_col=0)
brics

Unnamed: 0,country,capital,area,population
BR,Brazil,Brasilia,8.516,200.4
RU,Russia,Moscow,17.1,143.5
IN,India,New Delhi,3.286,1252.0
CH,China,Beijing,9.597,1357.0
SA,South Africa,Pretoria,1.221,52.98


In [6]:
# below for loop won't work well with the pandas framework
# will only print the column names
for row in brics:
    print(row)

country
capital
area
population


In [7]:
# use .iterrows() method to print out each label and row
for label, row in brics.iterrows():
    print("{a}: \n{b}".format(a=label, b = row))

BR: 
country         Brazil
capital       Brasilia
area             8.516
population       200.4
Name: BR, dtype: object
RU: 
country       Russia
capital       Moscow
area            17.1
population     143.5
Name: RU, dtype: object
IN: 
country           India
capital       New Delhi
area              3.286
population         1252
Name: IN, dtype: object
CH: 
country         China
capital       Beijing
area            9.597
population       1357
Name: CH, dtype: object
SA: 
country       South Africa
capital           Pretoria
area                 1.221
population           52.98
Name: SA, dtype: object


In [8]:
# Only print the label and capital
for label, row in brics.iterrows():
    print("{a}'s capital is {b}".format(a=row["country"], b=row["capital"]))

Brazil's capital is Brasilia
Russia's capital is Moscow
India's capital is New Delhi
China's capital is Beijing
South Africa's capital is Pretoria


In [9]:
# Add a column, country name length
for label, row in brics.iterrows():
    brics.loc[label, "Country Name Length"] = len(row["country"])
brics

Unnamed: 0,country,capital,area,population,Country Name Length
BR,Brazil,Brasilia,8.516,200.4,6.0
RU,Russia,Moscow,17.1,143.5,6.0
IN,India,New Delhi,3.286,1252.0,5.0
CH,China,Beijing,9.597,1357.0,5.0
SA,South Africa,Pretoria,1.221,52.98,12.0


In [10]:
for label, row in brics.iterrows():
    brics.loc[label, "Capital Name Length"] = len(row["capital"])
brics
# Not efficient though

Unnamed: 0,country,capital,area,population,Country Name Length,Capital Name Length
BR,Brazil,Brasilia,8.516,200.4,6.0,8.0
RU,Russia,Moscow,17.1,143.5,6.0,6.0
IN,India,New Delhi,3.286,1252.0,5.0,9.0
CH,China,Beijing,9.597,1357.0,5.0,7.0
SA,South Africa,Pretoria,1.221,52.98,12.0,8.0


In [11]:
# Alternatively, use apply to perform elementwise calculation
brics["capital_name_length"] = brics["capital"].apply(len)
brics

Unnamed: 0,country,capital,area,population,Country Name Length,Capital Name Length,capital_name_length
BR,Brazil,Brasilia,8.516,200.4,6.0,8.0,8
RU,Russia,Moscow,17.1,143.5,6.0,6.0,6
IN,India,New Delhi,3.286,1252.0,5.0,9.0,9
CH,China,Beijing,9.597,1357.0,5.0,7.0,7
SA,South Africa,Pretoria,1.221,52.98,12.0,8.0,8


In [12]:
# Import cars data
cars = pd.read_csv('data/cars.csv', index_col = 0)

# Iterate over rows of cars
for label, row in cars.iterrows():
    print(label)
    print(row)

US
cars_per_cap              809
country         United States
drives_right             True
Name: US, dtype: object
AUS
cars_per_cap          731
country         Australia
drives_right        False
Name: AUS, dtype: object
JAP
cars_per_cap      588
country         Japan
drives_right    False
Name: JAP, dtype: object
IN
cars_per_cap       18
country         India
drives_right    False
Name: IN, dtype: object
RU
cars_per_cap       200
country         Russia
drives_right      True
Name: RU, dtype: object
MOR
cars_per_cap         70
country         Morocco
drives_right       True
Name: MOR, dtype: object
EG
cars_per_cap       45
country         Egypt
drives_right     True
Name: EG, dtype: object


In [13]:
# Adapt for loop
for lab, row in cars.iterrows():
    print(lab+": "+str(row["cars_per_cap"]))

US: 809
AUS: 731
JAP: 588
IN: 18
RU: 200
MOR: 70
EG: 45


In [14]:
# Code for loop that adds COUNTRY column
for lab, row in cars.iterrows():
    cars.loc[lab, "COUNTRY"] = row["country"].upper()
cars

Unnamed: 0,cars_per_cap,country,drives_right,COUNTRY
US,809,United States,True,UNITED STATES
AUS,731,Australia,False,AUSTRALIA
JAP,588,Japan,False,JAPAN
IN,18,India,False,INDIA
RU,200,Russia,True,RUSSIA
MOR,70,Morocco,True,MOROCCO
EG,45,Egypt,True,EGYPT


In [15]:
# Using iterrows() to iterate over every observation of a Pandas DataFrame is easy to understand, 
# but not very efficient. On every iteration, you're creating a new Pandas Series.
# Can use apply
cars["Drive Direction"] = cars["drives_right"].apply(lambda x: "Right" if x else "Left")
cars

Unnamed: 0,cars_per_cap,country,drives_right,COUNTRY,Drive Direction
US,809,United States,True,UNITED STATES,Right
AUS,731,Australia,False,AUSTRALIA,Left
JAP,588,Japan,False,JAPAN,Left
IN,18,India,False,INDIA,Left
RU,200,Russia,True,RUSSIA,Right
MOR,70,Morocco,True,MOROCCO,Right
EG,45,Egypt,True,EGYPT,Right


In [16]:
cars["country_min"] = cars["country"].apply(str.lower)
cars

Unnamed: 0,cars_per_cap,country,drives_right,COUNTRY,Drive Direction,country_min
US,809,United States,True,UNITED STATES,Right,united states
AUS,731,Australia,False,AUSTRALIA,Left,australia
JAP,588,Japan,False,JAPAN,Left,japan
IN,18,India,False,INDIA,Left,india
RU,200,Russia,True,RUSSIA,Right,russia
MOR,70,Morocco,True,MOROCCO,Right,morocco
EG,45,Egypt,True,EGYPT,Right,egypt
