In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Series 创建

In [4]:
pd.Series(np.arange(10))

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int32

In [5]:
pd.Series([6.3,5.7,3.3,2.2,7.4], index=[1,2,3,4,5])

1    6.3
2    5.7
3    3.3
4    2.2
5    7.4
dtype: float64

In [6]:
color_count = pd.Series({'red':100, 'blue':200, 'green': 500, 'yellow':1000})
color_count

blue       200
green      500
red        100
yellow    1000
dtype: int64

In [7]:
color_count.index

Index(['blue', 'green', 'red', 'yellow'], dtype='object')

In [8]:
color_count.values

array([ 200,  500,  100, 1000], dtype=int64)

# DateFrame的创建

In [10]:
pd.DataFrame(np.random.randn(2,3))

Unnamed: 0,0,1,2
0,0.612816,-0.786033,-1.303674
1,-0.866496,0.110397,-1.197897


In [14]:
score = np.random.randint(40, 100, (10,5))
score

array([[52, 58, 85, 87, 40],
       [95, 41, 42, 60, 97],
       [84, 49, 45, 43, 67],
       [55, 90, 44, 68, 74],
       [70, 87, 78, 44, 77],
       [49, 46, 96, 82, 99],
       [41, 64, 79, 89, 80],
       [44, 80, 65, 75, 68],
       [96, 78, 67, 84, 75],
       [66, 60, 78, 53, 64]])

In [15]:
pd.DataFrame(score)

Unnamed: 0,0,1,2,3,4
0,52,58,85,87,40
1,95,41,42,60,97
2,84,49,45,43,67
3,55,90,44,68,74
4,70,87,78,44,77
5,49,46,96,82,99
6,41,64,79,89,80
7,44,80,65,75,68
8,96,78,67,84,75
9,66,60,78,53,64


In [17]:
subjects = ["语文", "数学", "英语", "政治", "体育"]
stu = ['同学'+ str(i) for i in range(score.shape[0])]

In [20]:
stu_data = pd.DataFrame(score, index=stu, columns=subjects)

In [21]:
stu_data.shape

(10, 5)

In [22]:
stu_data.index

Index(['同学0', '同学1', '同学2', '同学3', '同学4', '同学5', '同学6', '同学7', '同学8', '同学9'], dtype='object')

In [23]:
stu_data.values

array([[52, 58, 85, 87, 40],
       [95, 41, 42, 60, 97],
       [84, 49, 45, 43, 67],
       [55, 90, 44, 68, 74],
       [70, 87, 78, 44, 77],
       [49, 46, 96, 82, 99],
       [41, 64, 79, 89, 80],
       [44, 80, 65, 75, 68],
       [96, 78, 67, 84, 75],
       [66, 60, 78, 53, 64]])

In [24]:
stu_data.columns

Index(['语文', '数学', '英语', '政治', '体育'], dtype='object')

In [25]:
stu_data.T

Unnamed: 0,同学0,同学1,同学2,同学3,同学4,同学5,同学6,同学7,同学8,同学9
语文,52,95,84,55,70,49,41,44,96,66
数学,58,41,49,90,87,46,64,80,78,60
英语,85,42,45,44,78,96,79,65,67,78
政治,87,60,43,68,44,82,89,75,84,53
体育,40,97,67,74,77,99,80,68,75,64


In [26]:
stu_data.head()

Unnamed: 0,语文,数学,英语,政治,体育
同学0,52,58,85,87,40
同学1,95,41,42,60,97
同学2,84,49,45,43,67
同学3,55,90,44,68,74
同学4,70,87,78,44,77


In [27]:
stu_data.tail()

Unnamed: 0,语文,数学,英语,政治,体育
同学5,49,46,96,82,99
同学6,41,64,79,89,80
同学7,44,80,65,75,68
同学8,96,78,67,84,75
同学9,66,60,78,53,64


In [29]:
stu_data.reset_index(drop=True)

Unnamed: 0,语文,数学,英语,政治,体育
0,52,58,85,87,40
1,95,41,42,60,97
2,84,49,45,43,67
3,55,90,44,68,74
4,70,87,78,44,77
5,49,46,96,82,99
6,41,64,79,89,80
7,44,80,65,75,68
8,96,78,67,84,75
9,66,60,78,53,64


In [30]:
df = pd.DataFrame({'month': [1, 4, 7, 10],
                    'year': [2012, 2014, 2013, 2014],
                    'sale':[55, 40, 84, 31]})

In [31]:
df

Unnamed: 0,month,sale,year
0,1,55,2012
1,4,40,2014
2,7,84,2013
3,10,31,2014


In [34]:
df.reset_index(drop=True)

Unnamed: 0,month,sale,year
0,1,55,2012
1,4,40,2014
2,7,84,2013
3,10,31,2014


In [35]:
df.set_index('month')

Unnamed: 0_level_0,sale,year
month,Unnamed: 1_level_1,Unnamed: 2_level_1
1,55,2012
4,40,2014
7,84,2013
10,31,2014


# MultiIndex

In [37]:
df = df.set_index(['year', 'month'])

In [38]:
df.index

MultiIndex(levels=[[2012, 2013, 2014], [1, 4, 7, 10]],
           labels=[[0, 2, 1, 2], [0, 1, 2, 3]],
           names=['year', 'month'])

In [39]:
df.index.names

FrozenList(['year', 'month'])