## 数据类型dataframe创建

In [1]:
import numpy as np
import pandas as pd

1. 字典类创建dataframe

 * 根据数组、列表及元组构成的字典构造dataframe

In [4]:
# (1)一般字典构造dataframe
# 创建一个字典
d_dict = {
    'a':[1,2,3,4],
    'b':(5,3,6,7),
    'c':np.arange(9,13)
}
# print(d_dict)
#构造dataframe。列索引对应的是dict字典的keys,数值对应values
df = pd.DataFrame(d_dict)
df

Unnamed: 0,a,b,c
0,1,5,9
1,2,3,10
2,3,6,11
3,4,7,12


In [9]:
#查看dataframe行索引
df.index

RangeIndex(start=0, stop=4, step=1)

In [11]:
#查看dataframe列索引
df.columns

Index(['a', 'b', 'c'], dtype='object')

In [6]:
# 通过values属性，查看值。返回一个numpy数组
df.values

array([[ 1,  5,  9],
       [ 2,  3, 10],
       [ 3,  6, 11],
       [ 4,  7, 12]], dtype=int64)

In [8]:
#指定index
df2 = pd.DataFrame(d_dict,index=['A','B','C','d'])
df2

Unnamed: 0,a,b,c
A,1,5,9
B,2,3,10
C,3,6,11
d,4,7,12


In [11]:
#指定列索引。构建DataFrame中的keys与手动添加的columns不一致，或手动改变索引时原数据没有此索引，那么自动认为是缺失值。
df3 = pd.DataFrame(d_dict,index=['A','B','C','d'],columns=['甲','乙','丙','丁'])
df3

Unnamed: 0,甲,乙,丙,丁
A,,,,
B,,,,
C,,,,
d,,,,


In [13]:
#指定列索引。列索引需要与字典中的keys保持一致。
df4 = pd.DataFrame(d_dict,index=['A','B','C','d'],columns=['a','b','c','d'])
df4

Unnamed: 0,a,b,c,d
A,1,5,9,
B,2,3,10,
C,3,6,11,
d,4,7,12,


In [16]:
# (2)Serises构成的字典构造dataframe
# 当字典数据长度不一致时，会自动补全成NaN值，即缺失值
df5 = pd.DataFrame({'a':pd.Series(np.arange(3)),
                  'b':pd.Series(np.arange(3,5))
                   })
df5

Unnamed: 0,a,b
0,0,3.0
1,1,4.0
2,2,


In [20]:
#（3）字典构成的字典，构造dataframe
# 字典嵌套字典：外层字典的keys仍然是dataframe的列索引；内层字典的keys变成了行索引。
dt2 = {
    'a':{'apple':3.6,'banana':2.5},
    'b':{'apple':6.4,'banana':5.5},
    'c':{'apple':8,'orange':7},
}
print(dt2)
pd.DataFrame(dt2)

{'a': {'apple': 3.6, 'banana': 2.5}, 'b': {'apple': 6.4, 'banana': 5.5}, 'c': {'apple': 8, 'orange': 7}}


Unnamed: 0,a,b,c
apple,3.6,6.4,8.0
banana,2.5,5.5,
orange,,,7.0


2. 列表类构建dataframe

In [24]:
#(1)通过二维数组构造2D ndarray
arr1 = np.arange(12).reshape(3,4)
print(arr1)
df21 = pd.DataFrame(arr1)
df21

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


Unnamed: 0,0,1,2,3
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11


In [25]:
#(2)字典构成的列表构造dataframe
# 字典嵌套字典的情况下，外层的keys构成了dataframe的列索引；同时内部的字典keys构成dataframe的行索引；
# 列表内嵌套字典的情况下，内部字典的keys构成了列索引。
# 因此，字典的keys先构成列索引；如果已经有外部字典构成列索引，那么再构成行索引。
l_dic = [
    {'apple':3.6,'banana':2.5},
    {'apple':6.4,'banana':5.5},
    {'apple':8,'orange':7},
]
print(l_dic)
df22 = pd.DataFrame(l_dic)
df22

[{'apple': 3.6, 'banana': 2.5}, {'apple': 6.4, 'banana': 5.5}, {'apple': 8, 'orange': 7}]


Unnamed: 0,apple,banana,orange
0,3.6,2.5,
1,6.4,5.5,
2,8.0,,7.0


In [27]:
#(3)Series构成列表构成的dataframe
l_series = [pd.Series(np.random.rand(5)),pd.Series(np.random.rand(2))]
print(l_series)
df23 = pd.DataFrame(l_series)
df23

[0    0.819862
1    0.673491
2    0.845014
3    0.992261
4    0.218535
dtype: float64, 0    0.158859
1    0.419827
dtype: float64]


Unnamed: 0,0,1,2,3,4
0,0.819862,0.673491,0.845014,0.992261,0.218535
1,0.158859,0.419827,,,


In [28]:
#(4)使用python列表嵌套列表构成dataframe
l_list = [[1,2],[24,3]]
print(l_list)
df24 = pd.DataFrame(l_list)
df24

[[1, 2], [24, 3]]


Unnamed: 0,0,1
0,1,2
1,24,3
