## Pandas多重索引

In [1]:
import numpy as np
import pandas as pd
from pandas import Series,DataFrame

In [2]:
s = Series([1,2,3,4],index=[['a','a','b','b'],['middle','final','middle','final']])

In [3]:
s

a  middle    1
   final     2
b  middle    3
   final     4
dtype: int64

In [5]:
df = DataFrame(np.random.randint(90,150,(6,3)),
              columns = ['Chinese','Mathe','Python'],
              index = [['Michael','Michael','Lisa','Lisa','Po','Po'],['Mid','End','Mid','End','Mid','End']])
df

Unnamed: 0,Unnamed: 1,Chinese,Mathe,Python
Michael,Mid,122,136,122
Michael,End,145,136,108
Lisa,Mid,94,98,134
Lisa,End,106,130,94
Po,Mid,146,139,137
Po,End,141,129,133


In [6]:
#隐式构造，显式构造
df1 = DataFrame(np.random.randint(90,150,(6,3)),
               columns=['Java','HTML5','Python'],
              index = pd.MultiIndex.from_arrays([['Tim','Tom','Tony','Toby','Tin','Taobao'],['Mid','End','Mid','End','Mid','End']]))
df1

Unnamed: 0,Unnamed: 1,Java,HTML5,Python
Tim,Mid,138,101,117
Tom,End,121,109,110
Tony,Mid,114,148,112
Toby,End,137,91,98
Tin,Mid,140,97,91
Taobao,End,103,93,141


In [7]:
df2 = DataFrame(np.random.randint(90,150,(6,3)),
               columns=['Java','HTML5','Python'],
              index = pd.MultiIndex.from_tuples([('Tim','mid'),('Tom','end'),('Tony','mid'),('Toby','end'),('Tin','mid'),('Taobao','end')]))
df2

Unnamed: 0,Unnamed: 1,Java,HTML5,Python
Tim,mid,146,105,141
Tom,end,147,111,101
Tony,mid,146,104,133
Toby,end,146,134,111
Tin,mid,134,145,92
Taobao,end,139,117,139


In [10]:
df3 = DataFrame(np.random.randint(90,150,(6,3)),
               columns=['Java','HTML5','Python'],
              index = pd.MultiIndex.from_product([['Tim','Tom','Tony'],['mid','end']]))
df3

Unnamed: 0,Unnamed: 1,Java,HTML5,Python
Tim,mid,116,147,149
Tim,end,90,149,103
Tom,mid,148,129,145
Tom,end,95,138,122
Tony,mid,92,143,93
Tony,end,126,101,141


## Pandas拼接操作



In [14]:
nd = np.random.randint(0,10,size=(3,3))
nd

array([[0, 3, 4],
       [8, 6, 9],
       [4, 0, 2]])

In [15]:
np.concatenate((nd,nd),axis=0)

array([[0, 3, 4],
       [8, 6, 9],
       [4, 0, 2],
       [0, 3, 4],
       [8, 6, 9],
       [4, 0, 2]])

In [19]:
def make_df(cols,inds):
    data = {c:[c+str(i) for i in inds ] for c in cols }
    return DataFrame(data,index=inds,columns=cols)

In [20]:
make_df(['A','B'],[1,2])

Unnamed: 0,A,B
1,A1,B1
2,A2,B2


In [23]:
df1 = make_df(list('AB'),[0,1])
df2 = make_df(list('AB'),[2,3])
pd.concat([df1,df2])

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


In [32]:
x = make_df(list('XY'),['a','b'])
y = make_df(list('xy'),['A','B'])
pd.concat([x,y],keys=['x','y'])

Unnamed: 0,Unnamed: 1,X,Y,x,y
x,a,Xa,Ya,,
x,b,Xb,Yb,,
y,A,,,xA,yA
y,B,,,xB,yB


In [31]:
pd.concat([x,y],axis=0)

Unnamed: 0,X,Y,x,y
a,Xa,Ya,,
b,Xb,Yb,,
A,,,xA,yA
B,,,xB,yB


In [34]:
df1 = make_df(['Auto','Fox'],[0,1,2,3,4])
df2 = make_df(['Auto','Fox'],[5,6,7,8,9])


In [35]:
df1.append(df2)
# concat方法属于Pandas
# append被方法DataFrame中

Unnamed: 0,Auto,Fox
0,Auto0,Fox0
1,Auto1,Fox1
2,Auto2,Fox2
3,Auto3,Fox3
4,Auto4,Fox4
5,Auto5,Fox5
6,Auto6,Fox6
7,Auto7,Fox7
8,Auto8,Fox8
9,Auto9,Fox9


In [36]:
pd.merge(df1,df2)

Unnamed: 0,Auto,Fox


## Pandas数据处理

In [42]:
df = DataFrame(np.random.randint(90,92,(6,3)),
               columns=['Chinese','Mathe','Chinese'],
               index=['Tim','Hellon','Bush','Tin','Katty','Zon'])
df

Unnamed: 0,Chinese,Mathe,Chinese.1
Tim,91,91,91
Hellon,90,91,91
Bush,90,90,90
Tin,91,90,91
Katty,91,91,91
Zon,90,91,90


In [43]:
df.duplicated()

Tim       False
Hellon    False
Bush      False
Tin       False
Katty      True
Zon       False
dtype: bool

In [44]:
df.drop_duplicates()

Unnamed: 0,Chinese,Mathe,Chinese.1
Tim,91,91,91
Hellon,90,91,91
Bush,90,90,90
Tin,91,90,91
Zon,90,91,90


In [45]:
#map()不仅可以根据条件修改当前列，还可以映射新一列数据
#可以使用lambd表达式，还可以使用方法，可以自己实现的方法
# 不能使用sun之类的函数、for循环
#transform()该方法根据某种规则算法，进行批量修改


In [48]:
dt = DataFrame(np.random.randint(90,150,(5,1)),index=list('abcde'),columns=['Python'])
dt

Unnamed: 0,Python
a,139
b,102
c,120
d,118
e,102


In [53]:
def state(item):
    if item < 100:
        return 'normal'
    elif item > 135:
        return "Excellent"
    else:
        return 'good'

dt['state'] = dt['Python'].map(state)
dt

Unnamed: 0,Python,state
a,139,Excellent
b,102,good
c,120,good
d,118,good
e,102,good


In [54]:
index = {"a":'A','b':'B'}
dt.rename(index=index)

Unnamed: 0,Python,state
A,139,Excellent
B,102,good
c,120,good
d,118,good
e,102,good


In [61]:
df = DataFrame(np.random.randint(90,150,(6,3)),
               columns=['Chinese','Mathe','History'],
               index=['Tim','Hellon','Bush','Tin','Katty','Zon'])
df

Unnamed: 0,Chinese,Mathe,History
Tim,124,143,128
Hellon,125,111,132
Bush,143,106,90
Tin,118,115,143
Katty,137,107,105
Zon,121,104,115


In [72]:
df2 = np.abs(df) < df.std()+df.mean()
df3 = df2.all(axis=1)

In [73]:
df[df3]

Unnamed: 0,Chinese,Mathe,History
Hellon,125,111,132
Katty,137,107,105
Zon,121,104,115


### 排序
.take()

## 数据聚合
分组、用函数处理、合并
.groupby()