### https://stackoverflow.com/questions/16476924/how-to-iterate-over-rows-in-a-dataframe-in-pandas

### While iterrows() is a good option, sometimes itertuples() can be much faster:

In [3]:
import numpy as np
import pandas as pd

df = pd.DataFrame({'a': np.random.randn(1000), 'b': np.random.randn(1000),'N': np.random.randint(100, 1000, (1000)), 'x': 'x'})

%timeit [row.a * 2 for idx, row in df.iterrows()]

%timeit [row[1] * 2 for row in df.itertuples()]

%timeit [row[1] * 2 for row in df.itertuples(name=None)] # regular tuples

10 loops, best of 3: 68 ms per loop
1000 loops, best of 3: 1.5 ms per loop
1000 loops, best of 3: 661 µs per loop


#### You can write your own iterator that implements namedtuple:

In [1]:
from collections import namedtuple

def myiter(d, cols=None):
    if cols is None:
        v = d.values.tolist()
        cols = d.columns.values.tolist()
    else:
        j = [d.columns.get_loc(c) for c in cols]
        v = d.values[:, j].tolist()

    n = namedtuple('MyTuple', cols)

    for line in iter(v):
        yield n(*line)

In [2]:
import numpy as np
import pandas as pd
import timeit

%matplotlib notebook  ### necesario x i grafici

def iterfullA(d):
    return list(myiter(d))

def iterfullB(d):
    return list(d.itertuples(index=False, name=None))

def itersubA(d):
    return list(myiter(d, ['col3', 'col4', 'col5', 'col6', 'col7']))

def itersubB(d):
    return list(d[['col3', 'col4', 'col5', 'col6', 'col7']].itertuples(index=False, name=None))

res = pd.DataFrame(
    index=[10, 30, 100, 300, 1000, 3000, 10000, 30000],
    columns='iterfullA iterfullB itersubA itersubB'.split(),
    dtype=float
)

for i in res.index:
    d = pd.DataFrame(np.random.randint(10, size=(i, 10))).add_prefix('col')
    for j in res.columns:
        stmt = '{}(d)'.format(j)
        setp = 'from __main__ import d, {}'.format(j)
        res.at[i, j] = timeit.timeit(stmt, setp, number=100)

res.groupby(res.columns.str[4:-1], axis=1).plot(loglog=True)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

full    AxesSubplot(0.125,0.11;0.775x0.77)
sub     AxesSubplot(0.125,0.11;0.775x0.77)
dtype: object

#### To loop all rows in a dataframe and use values of each row conveniently, namedtuples can be converted to ndarrays. For example:

In [26]:
df = pd.DataFrame({'col1': [1, 2], 'col2': [0.1, 0.2]}, index=['a', 'b'])

In [28]:
for row in df.itertuples(index=False, name='Pandas'):
    print(np.asarray(row))

[ 1.   0.1]
[ 2.   0.2]


In [4]:
res

Unnamed: 0,iterfullA,iterfullB,itersubA,itersubB
10,0.053953,0.067851,0.050095,0.085451
30,0.057466,0.068811,0.048321,0.083163
100,0.063855,0.072034,0.056185,0.084087
300,0.079737,0.081065,0.067167,0.096969
1000,0.133248,0.112837,0.109802,0.109827
3000,0.301279,0.202004,0.248742,0.161647
10000,0.887512,0.616699,0.755088,0.476335
30000,2.809888,1.80605,2.158443,0.994526
