## 引入包

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## 创建对象

In [2]:
s=pd.Series([1,3,5,np.nan,6,9])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    9.0
dtype: float64

In [3]:
dates=pd.date_range('20180313',periods=6)
dates

DatetimeIndex(['2018-03-13', '2018-03-14', '2018-03-15', '2018-03-16',
               '2018-03-17', '2018-03-18'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2018-03-13,0.04941,1.87831,-0.14789,-1.588265
2018-03-14,0.969644,0.951196,-0.394295,-0.012396
2018-03-15,-2.533224,0.268784,-0.418247,0.76005
2018-03-16,1.152236,-0.16806,0.655259,-0.416691
2018-03-17,0.58428,-0.349605,-0.124854,0.235189
2018-03-18,1.785686,0.309275,-1.001553,-1.055367


In [5]:
df2=pd.DataFrame({'A':1,
                 'B':pd.Timestamp('20180313'),
                 'C':pd.Series(1,index=list(range(4)),dtype='float32'),
                 'D':np.array([3]*4,dtype='int32'),
                 'E':pd.Categorical(['test','test2','test3','test4']),
                 'F':'fool'})
df2

Unnamed: 0,A,B,C,D,E,F
0,1,2018-03-13,1.0,3,test,fool
1,1,2018-03-13,1.0,3,test2,fool
2,1,2018-03-13,1.0,3,test3,fool
3,1,2018-03-13,1.0,3,test4,fool


In [6]:
df2.dtypes

A             int64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

## 查看数据

In [7]:
df

Unnamed: 0,A,B,C,D
2018-03-13,0.04941,1.87831,-0.14789,-1.588265
2018-03-14,0.969644,0.951196,-0.394295,-0.012396
2018-03-15,-2.533224,0.268784,-0.418247,0.76005
2018-03-16,1.152236,-0.16806,0.655259,-0.416691
2018-03-17,0.58428,-0.349605,-0.124854,0.235189
2018-03-18,1.785686,0.309275,-1.001553,-1.055367


In [8]:
df.head()

Unnamed: 0,A,B,C,D
2018-03-13,0.04941,1.87831,-0.14789,-1.588265
2018-03-14,0.969644,0.951196,-0.394295,-0.012396
2018-03-15,-2.533224,0.268784,-0.418247,0.76005
2018-03-16,1.152236,-0.16806,0.655259,-0.416691
2018-03-17,0.58428,-0.349605,-0.124854,0.235189


In [9]:
df.head(2)

Unnamed: 0,A,B,C,D
2018-03-13,0.04941,1.87831,-0.14789,-1.588265
2018-03-14,0.969644,0.951196,-0.394295,-0.012396


In [10]:
df.tail(3)

Unnamed: 0,A,B,C,D
2018-03-16,1.152236,-0.16806,0.655259,-0.416691
2018-03-17,0.58428,-0.349605,-0.124854,0.235189
2018-03-18,1.785686,0.309275,-1.001553,-1.055367


In [11]:
df.index

DatetimeIndex(['2018-03-13', '2018-03-14', '2018-03-15', '2018-03-16',
               '2018-03-17', '2018-03-18'],
              dtype='datetime64[ns]', freq='D')

In [12]:
df2.index

Int64Index([0, 1, 2, 3], dtype='int64')

In [13]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [14]:
df.values

array([[ 0.04941045,  1.87831022, -0.14789044, -1.58826543],
       [ 0.96964361,  0.9511957 , -0.39429511, -0.01239569],
       [-2.53322417,  0.2687836 , -0.41824712,  0.76004998],
       [ 1.15223636, -0.16805985,  0.65525934, -0.41669137],
       [ 0.58428023, -0.34960545, -0.1248537 ,  0.23518853],
       [ 1.78568575,  0.30927525, -1.00155318, -1.05536747]])

In [15]:
df2.values

array([[1, Timestamp('2018-03-13 00:00:00'), 1.0, 3, 'test', 'fool'],
       [1, Timestamp('2018-03-13 00:00:00'), 1.0, 3, 'test2', 'fool'],
       [1, Timestamp('2018-03-13 00:00:00'), 1.0, 3, 'test3', 'fool'],
       [1, Timestamp('2018-03-13 00:00:00'), 1.0, 3, 'test4', 'fool']],
      dtype=object)

## 数据统计和操作

In [16]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.334672,0.48165,-0.238597,-0.346247
std,1.519575,0.819667,0.540141,0.863
min,-2.533224,-0.349605,-1.001553,-1.588265
25%,0.183128,-0.058849,-0.412259,-0.895698
50%,0.776962,0.289029,-0.271093,-0.214544
75%,1.106588,0.790716,-0.130613,0.173292
max,1.785686,1.87831,0.655259,0.76005


In [17]:
type(df.describe())

pandas.core.frame.DataFrame

In [18]:
df

Unnamed: 0,A,B,C,D
2018-03-13,0.04941,1.87831,-0.14789,-1.588265
2018-03-14,0.969644,0.951196,-0.394295,-0.012396
2018-03-15,-2.533224,0.268784,-0.418247,0.76005
2018-03-16,1.152236,-0.16806,0.655259,-0.416691
2018-03-17,0.58428,-0.349605,-0.124854,0.235189
2018-03-18,1.785686,0.309275,-1.001553,-1.055367


In [19]:
df.T #转置

Unnamed: 0,2018-03-13 00:00:00,2018-03-14 00:00:00,2018-03-15 00:00:00,2018-03-16 00:00:00,2018-03-17 00:00:00,2018-03-18 00:00:00
A,0.04941,0.969644,-2.533224,1.152236,0.58428,1.785686
B,1.87831,0.951196,0.268784,-0.16806,-0.349605,0.309275
C,-0.14789,-0.394295,-0.418247,0.655259,-0.124854,-1.001553
D,-1.588265,-0.012396,0.76005,-0.416691,0.235189,-1.055367


In [20]:
df.sort_index(axis=1,ascending=False) #按轴排序，按列降序

Unnamed: 0,D,C,B,A
2018-03-13,-1.588265,-0.14789,1.87831,0.04941
2018-03-14,-0.012396,-0.394295,0.951196,0.969644
2018-03-15,0.76005,-0.418247,0.268784,-2.533224
2018-03-16,-0.416691,0.655259,-0.16806,1.152236
2018-03-17,0.235189,-0.124854,-0.349605,0.58428
2018-03-18,-1.055367,-1.001553,0.309275,1.785686


In [21]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2018-03-17,0.58428,-0.349605,-0.124854,0.235189
2018-03-16,1.152236,-0.16806,0.655259,-0.416691
2018-03-15,-2.533224,0.268784,-0.418247,0.76005
2018-03-18,1.785686,0.309275,-1.001553,-1.055367
2018-03-14,0.969644,0.951196,-0.394295,-0.012396
2018-03-13,0.04941,1.87831,-0.14789,-1.588265
