## 按行遍历

1. 遍历空的DataFrame，不会有输出
2. 遍历非空的DataFrame，输出每行对应的单元格

In [1]:
import pandas as pd

df = pd.read_csv("ex_empty_csv_with_header.csv")
for index, row in df.iterrows():
    print(row['name'], row['score'])

In [2]:
import pandas as pd

df = pd.read_csv("ex_nonempty_csv_with_header.csv")
for index, row in df.iterrows():
    print(row['name'], row['score'])

app 1780


## 选取行

1. 使用`df.loc(row_lable)`按照行标签名称来选择单行和多行，不能用“数字索引”。
2. 使用切片的时候不需要使用`.loc()`，而是直接通过`df[start_row_lable : end_row_lable]`的形式。可以使用“名称索引”和“数字索引”。
3. 使用`df[col_lable]`来选择单列和多列。

*注：`df.index[0:3]`的输出为`['a', 'b', 'c']`。*

要点：

- 离散行的选择要使用`.loc`。
- 非切片使用数字索引必须要使用`.loc`。

In [11]:
import numpy as np
import pandas as pd

df = pd.DataFrame([10, 20, 30, 40],
                  columns=['numbers'],
                  index=['a', 'b', 'c', 'd'])

df['floats'] = (1.5, 2.5, 3.5, 4.5)
df['names1'] = ('Yves', 'Guido', 'Felix', 'Francesc')
df['names2'] = pd.DataFrame(['Yv', 'Gu', 'Fe', 'Fr'],
                            index=['d', 'a', 'b', 'c']) # 添加新的DataFrame对象。
print(df)

   numbers  floats    names1 names2
a       10     1.5      Yves     Gu
b       20     2.5     Guido     Fe
c       30     3.5     Felix     Fr
d       40     4.5  Francesc     Yv


In [6]:
print("按行名选择列：")
print(df.loc['b'])
print("按行名选择多行：")
print(df.loc[['a', 'b']])
print("按行数字索引选择多行：")
print(df.loc[df.index[0:3]])


print("按行名切片选择多行：")
print(df['a':'c'])
print("按行数字索引选择多行：")
print(df[0:1])


按行名选择列：
numbers       20
floats       2.5
names1     Guido
names2        Fe
Name: b, dtype: object
按行名选择多行：
   numbers  floats names1 names2
a       10     1.5   Yves     Gu
b       20     2.5  Guido     Fe
按行数字索引选择多行：
   numbers  floats names1 names2
a       10     1.5   Yves     Gu
b       20     2.5  Guido     Fe
c       30     3.5  Felix     Fr
按行名切片选择多行：
   numbers  floats names1 names2
a       10     1.5   Yves     Gu
b       20     2.5  Guido     Fe
c       30     3.5  Felix     Fr
按行数字索引选择多行：
   numbers  floats names1 names2
a       10     1.5   Yves     Gu


## 选择列

- 选择列是直接使用`df[col_lable]`来选择单列，使用`df[[col1, col2...]]`来选择多列。
- 如果要使用`loc()`来选择列，那么就需要使用到切片，如`df.loc[:, col1]`。

In [17]:
import numpy as np
import pandas as pd

df = pd.DataFrame([10, 20, 30, 40],
                  columns=['numbers'],
                  index=['a', 'b', 'c', 'd'])

df['floats'] = (1.5, 2.5, 3.5, 4.5)
df['names1'] = ('Yves', 'Guido', 'Felix', 'Francesc')
df['names2'] = pd.DataFrame(['Yv', 'Gu', 'Fe', 'Fr'],
                            index=['d', 'a', 'b', 'c']) # 添加新的DataFrame对象。
print(df)

print("column - floats")
print(df['floats'])

print("column - floats&names1")
print(df[['floats','names1']])

print("column - floats&names2")
print(df.loc[:, ['floats','names2']])

   numbers  floats    names1 names2
a       10     1.5      Yves     Gu
b       20     2.5     Guido     Fe
c       30     3.5     Felix     Fr
d       40     4.5  Francesc     Yv
column - floats
a    1.5
b    2.5
c    3.5
d    4.5
Name: floats, dtype: float64
column - floats&names1
   floats    names1
a     1.5      Yves
b     2.5     Guido
c     3.5     Felix
d     4.5  Francesc
column - floats&names2
   floats names2
a     1.5     Gu
b     2.5     Fe
c     3.5     Fr
d     4.5     Yv


## 通过行列来索引

- 如果知道标签名称，那么可以使用`df.loc['row']['col']`或者`df.loc['row','col']`。
- 如果都是数字索引，那么使用`iloc()`，如`df.iloc [[3, 4], [1, 2]]`。
- 如果混用，使用`df.iloc[0]['col']`或者`df['col'].iloc[0]`。

In [26]:
import numpy as np
import pandas as pd

df = pd.DataFrame([10, 20, 30, 40],
                  columns=['numbers'],
                  index=['a', 'b', 'c', 'd'])

df['floats'] = (1.5, 2.5, 3.5, 4.5)
df['names1'] = ('Yves', 'Guido', 'Felix', 'Francesc')
df['names2'] = pd.DataFrame(['Yv', 'Gu', 'Fe', 'Fr'],
                            index=['d', 'a', 'b', 'c']) # 添加新的DataFrame对象。
print(df)

print('----------------------------------')
print("select cell - by name&index lable:")
print(df['floats'].iloc[0])
print(df.iloc[0]['floats'])

print('----------------------------------')
print("select cell - by name lable:")
print(df['floats']['a'])
print(df.loc['a']['floats'])
print(df.loc['a', 'floats'])

print('----------------------------------')
print("select cell - by index lable:")
print(df.iloc[0]['floats'])
print(df.iloc[0][0])


   numbers  floats    names1 names2
a       10     1.5      Yves     Gu
b       20     2.5     Guido     Fe
c       30     3.5     Felix     Fr
d       40     4.5  Francesc     Yv
----------------------------------
select cell - by name&index lable:
1.5
1.5
----------------------------------
select cell - by name lable:
1.5
1.5
1.5
----------------------------------
select cell - by index lable:
1.5
10


In [None]:


print("按条件选择行例1：")
print(df[df.floats > 3.0]) # 选择列'floats'值大于3.0的那些行

print("按条件选择行例2：")
conditions = []
for f in df.floats:
    if f > 3.0:
        conditions.append(True)
    else:
        conditions.append(False)
print(df[conditions])
match_condition = pd.Series(conditions, index=df.index)
print(df[match_condition])

print("按条件选择行例3：")
condition = df.floats > 3.0
print(df[condition])