In [1]:
import numpy as np
import pandas as pd

# From dict of Series or dicts （从 `Series的字典` 或者 `字典的字典`）

In [2]:
# 字典的key是不可变对象，所以只有从value上嵌套字典.
# Series的字典
d = {
    "one": pd.Series([1.0, 2.0, 3.0], index=["a", "b", "c"]),
    "two": pd.Series([1.0, 2.0, 3.0, 4.0], index=["a", "b", "c", "d"]),
}

# 字典的字典 
d1 = {
     66: {'a': 1.0, 'b': 2.0, 'c': 3.0},
     88: {'a': 1.0, 'b': 2.0, 'c': 3.0, 'd': 4.0}
}

df = pd.DataFrame(d)
print(df, '\n')

df1 = pd.DataFrame(d1)
print(df1)

   one  two
a  1.0  1.0
b  2.0  2.0
c  3.0  3.0
d  NaN  4.0 

    66   88
a  1.0  1.0
b  2.0  2.0
c  3.0  3.0
d  NaN  4.0


In [3]:
# index 、columns 属性
print(df.index, df.columns, sep='\n')

Index(['a', 'b', 'c', 'd'], dtype='object')
Index(['one', 'two'], dtype='object')


In [4]:
pd.DataFrame(d, index=["d", "b", "a"])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [5]:
pd.DataFrame(d, index=["d", "b", "a"], columns=["one", "two", "three"])

Unnamed: 0,one,two,three
d,,4.0,
b,2.0,2.0,
a,1.0,1.0,


In [6]:
pd.DataFrame(d, index=["d", "b", "a"], columns=["two", "three"])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


# From dict of ndarrays / lists （从 `多维数组的字典` 或者 `列表的字典`）

In [7]:
# 从多维数组的字典
d1 = {22: np.linspace(2, 8, 4), '44': np.linspace(6, 9, 4)}
print(d1, d1.keys(), d1.values(), sep='\n')

df1 = pd.DataFrame(d, index=['you', 'and', 'me', 'together'])
df1

{22: array([2., 4., 6., 8.]), '44': array([6., 7., 8., 9.])}
dict_keys([22, '44'])
dict_values([array([2., 4., 6., 8.]), array([6., 7., 8., 9.])])


Unnamed: 0,one,two
you,,
and,,
me,,
together,,


In [8]:
# 从列表的字典
d2 = {"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]}
print(d2, d2.keys(), d2.values(), sep='\n')

df2 = pd.DataFrame(d, index=['you', 'and', 'me', 'two'])
df2

{'one': [1.0, 2.0, 3.0, 4.0], 'two': [4.0, 3.0, 2.0, 1.0]}
dict_keys(['one', 'two'])
dict_values([[1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0]])


Unnamed: 0,one,two
you,,
and,,
me,,
two,,


# From a list of dicts （从字典列表）

In [9]:
data1= [{"a": 1, "b": 2}, {"a": 5, "b": 10, "c": 20}]

pd.DataFrame(data1)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [10]:
df = pd.DataFrame(data1, index=[66, "second"])
df

Unnamed: 0,a,b,c
66,1,2,
second,5,10,20.0


In [11]:
pd.DataFrame(data1, columns=["a", "b"])

Unnamed: 0,a,b
0,1,2
1,5,10


# Column selection, addition, deletion （列的选择，添加，删除）

In [12]:
df['a']

66        1
second    5
Name: a, dtype: int64

In [13]:
df[88] = df['a'] * df['c']
df

Unnamed: 0,a,b,c,88
66,1,2,,
second,5,10,20.0,100.0


In [14]:
df['bool'] = df['a'] >3
df

Unnamed: 0,a,b,c,88,bool
66,1,2,,,False
second,5,10,20.0,100.0,True


In [15]:
# 删除列
del df['bool']
print(df)
print('-'*40)

three = df.pop(88)
print(three)
df

        a   b     c     88
66      1   2   NaN    NaN
second  5  10  20.0  100.0
----------------------------------------
66          NaN
second    100.0
Name: 88, dtype: float64


Unnamed: 0,a,b,c
66,1,2,
second,5,10,20.0


In [16]:
# When inserting a scalar value, it will naturally be propagated to fill the column.
df["foo"] = "bar"
df

Unnamed: 0,a,b,c,foo
66,1,2,,bar
second,5,10,20.0,bar


In [17]:
df

Unnamed: 0,a,b,c,foo
66,1,2,,bar
second,5,10,20.0,bar


In [18]:
# 通过标签选择行，返回Series
df.loc[66]

a        1
b        2
c      NaN
foo    bar
Name: 66, dtype: object

In [19]:
df.loc['second']

a         5
b        10
c      20.0
foo     bar
Name: second, dtype: object

In [20]:
# 通过整数定位选择行，返回Series
df.iloc[1]

a         5
b        10
c      20.0
foo     bar
Name: second, dtype: object

In [21]:
df_iloc = df.iloc[1:3]  # 当选取多行时，返回的是DataFrame 。与 df[1:3] 效果一样！

print(type(df_iloc[1:3]))

print('-'*60)

df_iloc

<class 'pandas.core.frame.DataFrame'>
------------------------------------------------------------


Unnamed: 0,a,b,c,foo
second,5,10,20.0,bar


In [22]:
df[1:3]

Unnamed: 0,a,b,c,foo
second,5,10,20.0,bar


# Data alignment and arithmetic （数据对齐和算术）

In [23]:
df = pd.DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"])

df2 = pd.DataFrame(np.random.randn(7, 3), columns=["A", "B", "C"])

df + df2

Unnamed: 0,A,B,C,D
0,-1.076164,1.326758,-0.206207,
1,-0.905246,-1.269395,-0.188823,
2,0.381256,-0.451505,2.491535,
3,0.673061,2.468005,0.498992,
4,-4.179537,1.33781,-0.076889,
5,1.025921,-0.63337,-0.75024,
6,-3.570738,1.462248,0.589299,
7,,,,
8,,,,
9,,,,


In [24]:
df - df.iloc[0]

Unnamed: 0,A,B,C,D
0,0.0,0.0,0.0,0.0
1,1.854603,-1.117134,0.224698,0.973074
2,0.104407,-2.147646,1.835215,1.185176
3,1.076172,1.386182,0.607242,1.655046
4,-1.55726,0.225722,0.159763,0.020145
5,-0.277686,0.530044,-1.083007,-0.883537
6,-3.049532,-1.542366,-1.418932,0.411913
7,1.252228,-1.206069,-1.349663,0.572026
8,0.478281,1.15794,-0.848044,0.873534
9,-0.353385,-1.611711,1.20239,1.435487


In [25]:
df = pd.DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"])
print(df)
df * 5

          A         B         C         D
0  0.293429 -0.143101  1.132789  0.078245
1  1.366146 -0.749091 -0.403674 -0.933928
2 -1.085067  1.318764 -0.560788 -1.396666
3 -1.556977  0.367116  1.034900 -0.313587
4 -1.631095 -0.877206 -0.720434 -0.769470
5 -0.918197  0.452128 -0.379279 -1.092979
6  0.838556 -0.425179  0.060466  0.794768
7 -1.050849 -0.734524  0.362016 -0.830644
8 -1.543935 -1.119995 -0.035673  1.087465
9  0.712533 -1.183216  1.769971  0.234904


Unnamed: 0,A,B,C,D
0,1.467144,-0.715503,5.663944,0.391225
1,6.830731,-3.745453,-2.018372,-4.66964
2,-5.425336,6.593821,-2.803939,-6.983331
3,-7.784887,1.835581,5.174499,-1.567935
4,-8.155475,-4.386028,-3.60217,-3.847349
5,-4.590984,2.26064,-1.896393,-5.464897
6,4.192778,-2.125897,0.302328,3.973839
7,-5.254245,-3.672618,1.810078,-4.15322
8,-7.719676,-5.599976,-0.178365,5.437325
9,3.562666,-5.916082,8.849853,1.174521


In [26]:
# 布尔值
df1 = pd.DataFrame({"a": [1, 0, 1], "b": [0, 1, 1]}, dtype=bool)
print(df1, '\n')

df2 = pd.DataFrame({"a": [0, 1, 1], "b": [1, 1, 0]}, dtype=bool)
print(df2)

       a      b
0   True  False
1  False   True
2   True   True 

       a      b
0  False   True
1   True   True
2   True  False


In [27]:
-df1

Unnamed: 0,a,b
0,False,True
1,True,False
2,False,False


# 转置

In [28]:
df = df[:3]
df

Unnamed: 0,A,B,C,D
0,0.293429,-0.143101,1.132789,0.078245
1,1.366146,-0.749091,-0.403674,-0.933928
2,-1.085067,1.318764,-0.560788,-1.396666


In [29]:
df.T

Unnamed: 0,0,1,2
A,0.293429,1.366146,-1.085067
B,-0.143101,-0.749091,1.318764
C,1.132789,-0.403674,-0.560788
D,0.078245,-0.933928,-1.396666


In [30]:
df.transpose()

Unnamed: 0,0,1,2
A,0.293429,1.366146,-1.085067
B,-0.143101,-0.749091,1.318764
C,1.132789,-0.403674,-0.560788
D,0.078245,-0.933928,-1.396666
