## pandas中数据选取操作

In [1]:
import pandas as pd

In [2]:
data = {
    'name':['张三','李四','王五','赵六'],
    'age':[18,19,17,20],
    'height':[1.68,1.73,1.62,1.55]
}
df = pd.DataFrame(data,columns=['name','age','height'])
df

Unnamed: 0,name,age,height
0,张三,18,1.68
1,李四,19,1.73
2,王五,17,1.62
3,赵六,20,1.55


In [3]:
df['name']

0    张三
1    李四
2    王五
3    赵六
Name: name, dtype: object

In [4]:
df.age

0    18
1    19
2    17
3    20
Name: age, dtype: int64

In [5]:
df[['name']]

Unnamed: 0,name
0,张三
1,李四
2,王五
3,赵六


In [6]:
df[['name','age']]

Unnamed: 0,name,age
0,张三,18
1,李四,19
2,王五,17
3,赵六,20


In [7]:
names = df['name']
names

0    张三
1    李四
2    王五
3    赵六
Name: name, dtype: object

In [8]:
names[0]='田七'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [9]:
names

0    田七
1    李四
2    王五
3    赵六
Name: name, dtype: object

In [10]:
df

Unnamed: 0,name,age,height
0,田七,18,1.68
1,李四,19,1.73
2,王五,17,1.62
3,赵六,20,1.55


In [11]:
names = df.name.copy()
names[0] = '周八'
names

0    周八
1    李四
2    王五
3    赵六
Name: name, dtype: object

In [12]:
df

Unnamed: 0,name,age,height
0,田七,18,1.68
1,李四,19,1.73
2,王五,17,1.62
3,赵六,20,1.55


In [13]:
df.columns

Index(['name', 'age', 'height'], dtype='object')

In [14]:
df.columns[1:3]

Index(['age', 'height'], dtype='object')

In [15]:
df[df.columns[1:3]]

Unnamed: 0,age,height
0,18,1.68
1,19,1.73
2,17,1.62
3,20,1.55


In [16]:
import datetime

In [18]:
df['year'] = datetime.datetime.now().year - df.age

In [19]:
df

Unnamed: 0,name,age,height,year
0,田七,18,1.68,2000
1,李四,19,1.73,1999
2,王五,17,1.62,2001
3,赵六,20,1.55,1998


In [20]:
df.drop('year',axis=1)

Unnamed: 0,name,age,height
0,田七,18,1.68
1,李四,19,1.73
2,王五,17,1.62
3,赵六,20,1.55


In [21]:
df

Unnamed: 0,name,age,height,year
0,田七,18,1.68,2000
1,李四,19,1.73,1999
2,王五,17,1.62,2001
3,赵六,20,1.55,1998


In [22]:
df.drop(['height','year'],axis=1)

Unnamed: 0,name,age
0,田七,18
1,李四,19
2,王五,17
3,赵六,20


In [23]:
df

Unnamed: 0,name,age,height,year
0,田七,18,1.68,2000
1,李四,19,1.73,1999
2,王五,17,1.62,2001
3,赵六,20,1.55,1998


In [24]:
df.drop(df.columns[1::2],axis=1)

Unnamed: 0,name,height
0,田七,1.68
1,李四,1.73
2,王五,1.62
3,赵六,1.55


In [25]:
df

Unnamed: 0,name,age,height,year
0,田七,18,1.68,2000
1,李四,19,1.73,1999
2,王五,17,1.62,2001
3,赵六,20,1.55,1998


In [26]:
df.loc[1]

name        李四
age         19
height    1.73
year      1999
Name: 1, dtype: object

In [27]:
type(df.loc[1])

pandas.core.series.Series

In [28]:
df.loc[[1]]

Unnamed: 0,name,age,height,year
1,李四,19,1.73,1999


In [29]:
df.loc[[1,3]]

Unnamed: 0,name,age,height,year
1,李四,19,1.73,1999
3,赵六,20,1.55,1998


In [30]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [31]:
df.index[-2:]

RangeIndex(start=2, stop=4, step=1)

In [33]:
df.loc[df.index[-2:]]

Unnamed: 0,name,age,height,year
2,王五,17,1.62,2001
3,赵六,20,1.55,1998


In [34]:
df.loc[df.index[-2:],['name','age']]

Unnamed: 0,name,age
2,王五,17
3,赵六,20


In [35]:
df

Unnamed: 0,name,age,height,year
0,田七,18,1.68,2000
1,李四,19,1.73,1999
2,王五,17,1.62,2001
3,赵六,20,1.55,1998


In [36]:
df[1:3]

Unnamed: 0,name,age,height,year
1,李四,19,1.73,1999
2,王五,17,1.62,2001


In [38]:
# df[2] 错误

In [39]:
df['name']

0    田七
1    李四
2    王五
3    赵六
Name: name, dtype: object

In [40]:
df

Unnamed: 0,name,age,height,year
0,田七,18,1.68,2000
1,李四,19,1.73,1999
2,王五,17,1.62,2001
3,赵六,20,1.55,1998


In [41]:
df.shape

(4, 4)

In [43]:
df.loc[df.shape[0]]={'age':21,'name':'吴九','height':1.66,'year':0}
df

Unnamed: 0,name,age,height,year
0,田七,18,1.68,2000
1,李四,19,1.73,1999
2,王五,17,1.62,2001
3,赵六,20,1.55,1998
4,吴九,21,1.66,0
5,吴九,21,1.66,0


In [44]:
df2 = df.drop(2)
df2

Unnamed: 0,name,age,height,year
0,田七,18,1.68,2000
1,李四,19,1.73,1999
3,赵六,20,1.55,1998
4,吴九,21,1.66,0
5,吴九,21,1.66,0


In [45]:
df2.index = range(df2.shape[0])
df2

Unnamed: 0,name,age,height,year
0,田七,18,1.68,2000
1,李四,19,1.73,1999
2,赵六,20,1.55,1998
3,吴九,21,1.66,0
4,吴九,21,1.66,0


In [51]:
df2 = df.drop(2)
df2

Unnamed: 0,name,age,height,year
0,田七,18,1.68,2000
1,李四,19,1.73,1999
3,赵六,20,1.55,1998
4,吴九,21,1.66,0
5,吴九,21,1.66,0


In [52]:
df2 = df2.drop(5)

In [47]:
df2.iloc[2] #integer location

name        赵六
age         20
height    1.55
year      1998
Name: 3, dtype: object

In [53]:
df2.index = list("ABCD")
df2

Unnamed: 0,name,age,height,year
A,田七,18,1.68,2000
B,李四,19,1.73,1999
C,赵六,20,1.55,1998
D,吴九,21,1.66,0


In [54]:
df2.iloc[1:3]

Unnamed: 0,name,age,height,year
B,李四,19,1.73,1999
C,赵六,20,1.55,1998


In [55]:
df2.iat[1,1]

19

In [56]:
df2.iat[1,1] = 66

In [57]:
df2

Unnamed: 0,name,age,height,year
A,田七,18,1.68,2000
B,李四,66,1.73,1999
C,赵六,20,1.55,1998
D,吴九,21,1.66,0


In [58]:
df

Unnamed: 0,name,age,height,year
0,田七,18,1.68,2000
1,李四,19,1.73,1999
2,王五,17,1.62,2001
3,赵六,20,1.55,1998
4,吴九,21,1.66,0
5,吴九,21,1.66,0


In [59]:
df['height'] >= 1.65

0     True
1     True
2    False
3    False
4     True
5     True
Name: height, dtype: bool

In [62]:
df[(df['height']>=1.65) & (df['age']<=20)]

Unnamed: 0,name,age,height,year
0,田七,18,1.68,2000
1,李四,19,1.73,1999


In [63]:
df.query('height>=1.65 and age <= 20')

Unnamed: 0,name,age,height,year
0,田七,18,1.68,2000
1,李四,19,1.73,1999


In [66]:
df.query('(height>=1.65 and age <= 20) or name=="吴九"')

Unnamed: 0,name,age,height,year
0,田七,18,1.68,2000
1,李四,19,1.73,1999
4,吴九,21,1.66,0
5,吴九,21,1.66,0


In [68]:
age=20
df.query('age==@age')

Unnamed: 0,name,age,height,year
3,赵六,20,1.55,1998


In [69]:
df['age'].isin([18,19])

0     True
1     True
2    False
3    False
4    False
5    False
Name: age, dtype: bool

In [70]:
df[df['age'].isin([18,19])]

Unnamed: 0,name,age,height,year
0,田七,18,1.68,2000
1,李四,19,1.73,1999


In [71]:
df

Unnamed: 0,name,age,height,year
0,田七,18,1.68,2000
1,李四,19,1.73,1999
2,王五,17,1.62,2001
3,赵六,20,1.55,1998
4,吴九,21,1.66,0
5,吴九,21,1.66,0


In [72]:
df.T

Unnamed: 0,0,1,2,3,4,5
name,田七,李四,王五,赵六,吴九,吴九
age,18,19,17,20,21,21
height,1.68,1.73,1.62,1.55,1.66,1.66
year,2000,1999,2001,1998,0,0
