## 3.1 Pandas 数据结构的介绍
### 3.1.2 Series 数据类型的运算

In [None]:
from pandas import Series
goods_in=Series({"苹果":30,"梨":25,"香蕉":20,"桃":21,"李子":15})
goods_other_in=Series({"苹果":10,"梨":20,"香蕉":15,"桃":10,"西瓜":50})
goods_kucun= goods_in+goods_other_in
print(f"库存:\n[{goods_kucun}]")
print("注：一个 Series 中有索引而另一个没有，相加后结果为 NAN （表示空值）")

In [None]:
import numpy as np
goods=Series([30,25,20,21,np.nan],index=["苹果","梨","香蕉","桃","李子"])
print(f"original Series:\n{goods}")
goods["李子"]=15
print(f"Series after assigning new values:\n{goods}")


### 3.1.3 DataFrame 数据结构

In [None]:
from pandas import DataFrame

paints = {
    "字画名称": ["旭日东升", "富水长流", "招财进宝", "鸿运当头"],
    "字画底价": [2860, 498, 1068, 598],
    "字画拍卖加价": [1000, 2000, 500, 1500],
}
goods_in = DataFrame(paints)
print(f"goods_in:\n{goods_in}")
print("-" * 20)
goods_in1 = DataFrame(paints, columns=["字画名称", "字画拍卖加价", "字画底价"])
print(f"goods_in1:\n{goods_in1}")

DataFrame最终是按照columns指定的顺序排列的。如果传入的列名在数据中是无法找到的,就会产生NaN值。  

In [None]:
goods_in2 = DataFrame(paints,columns=["字画名称", "字画拍卖加价", "字画底价","字画所属人"])
print(f"goods_in2:\n{goods_in2}")

列名被指定DataFrame结构时可以通过类似字典标记的方式将列获取为一个Series结构。  
代码中定义了字画的DataFrame之后,直接通过goods_in["字画底价"]来访问“字画底价”这个维度的数据,获取的是一个Series结构的数据。  
返回的Series拥有与原DataFrame相同的索引,且其索引值也已经被相应地设置好了。

In [None]:
goods_in3 = DataFrame(paints, index=["第一幅", "第二幅", "第三幅", "第四幅"])
paints_price = goods_in3["字画底价"]
print(paints_price)

可以通过位置和名称的方式访问行。  
使用goods_in.loc["第三幅"] 来获取行数据,其中loc就是位置的关键词,“第三幅”就是索引的名称。


In [None]:
paints_3 = goods_in3.loc["第三幅"]
print(f"paints_3:\n{paints_3}")

利用标签的切片运算也可以获取多行和多列。  

In [None]:
paints_4 = goods_in3.loc[["第三幅", "第四幅"], ["字画名称", "字画底价"]]
print(paints_4)

对DataFrame数据的选取也可以通过布尔型数组实现。  

In [None]:
paints_5=goods_in3.loc[goods_in3["字画底价"]>500,:]
print(f"paints_5:\n{paints_5}")
print(f"-"*20)
paints_6=goods_in3.loc[(goods_in3["字画底价"]>500)&(goods_in3["字画拍卖加价"]>1000),:]
print(f"paints_6:\n{paints_6}")

### 3.1.4 DataFrame 数据的修改

In [None]:
from pandas import DataFrame

paints = {
    "字画名称": ["旭日东升", "富水长流", "招财进宝", "鸿运当头"],
    "字画底价": [2860, 498, 1068, 598],
    "字画拍卖加价": [1000, 2000, 500, 1500],
}
goods_in = DataFrame(paints, columns=["字画名称", "字画底价", "字画拍卖加价","字画所属人"])
print(f"goods_in:\n{goods_in}")
print(f"-"*20)
goods_in["字画所属人"]="张三"
print(f"goods_in:\n{goods_in}")
print(f"-"*20)
goods_in["字画所属人"] = ["张三","李四","王五","赵六"]
print(f"goods_in:\n{goods_in}")

可通过del DataFrame名["列名"]格式来进行删除操作。

In [None]:
del goods_in["字画所属人"]
print(f"goods_in:\n{goods_in}")

### 3.1.5 DataFrame 中的索引对象
注意：索引对象不可修改。

### 3.1.6 层次化索引
层次化索引使用户能在一个轴上拥有多个索引级别，即能以低纬度形式处理高维度数据。

In [None]:
from pandas import DataFrame

paints = {
    "字画名称": ["旭日东升", "富水长流", "招财进宝", "鸿运当头"],
    "字画底价": [2860, 498, 1068, 598],
    "字画拍卖加价": [1000, 2000, 500, 1500],
}
goods_in = DataFrame(
    paints,
    index=[
        ["第一拍卖现场", "第一拍卖现场", "第二拍卖现场", "第二拍卖现场"],
        ["第一幅", "第二幅", "第一幅", "第二幅"],
    ],
)
print(f"层次化索引后的 goods_in:\n{goods_in}")
print(f"-"*20)
goods_in_indexes = goods_in.index
print(f"goods_in_indexes:\n{goods_in_indexes}")

 选取数据子集。

In [None]:
goods_in_second = goods_in.loc["第二拍卖现场"]
print(f"goods_in_second:\n{goods_in_second}")
print(f"-"*20)
goods_in_second_first = goods_in.loc["第二拍卖现场", "第一幅"]
print(f"goods_in_second_first:\n{goods_in_second_first}")

 通过 unstack() 方法将二级列索引转换为二级行索引。
 还可通过 stack() 方法进行逆向操作。

In [None]:
print(f"original goods_in:\n{goods_in}")
print(f"-"*20)
goods_stack= goods_in.unstack()
print(f"unstacked goods_in:\n{goods_stack}")
print(f"-"*20)
goods_stack = goods_in.unstack().stack()
print(f"stacked goods_in:\n{goods_stack.stack()}")

## 3.2 Pandas 数据结构中的基本数据操作
### 3.2.1 重新索引

In [None]:
from pandas import DataFrame

paints={"车名":["奥迪Q5L","哈弗H6","奔驰GLC"],"最低报价": [38.78,9.80,39.48],"最 高报价":[49.80,14.10,58.78]}
goods_in=DataFrame(paints,index=["第一辆车","第二辆车","第三辆车"])
print(f"goods_in:\n{goods_in}")
print(f"-"*20)
other_goods = goods_in.reindex(["第三辆车","第二辆车","第一辆车"])
print(f"other_goods:\n{other_goods}")

缺少数据时的前填充。

In [None]:
other_goods = goods_in.reindex(["第三辆车","第二辆车","第一辆车","第四辆车"], fill_value=7.9)
print(f"直接填充数字后的 other_goods:\n{other_goods}")
print(f"-"*20)
goods_in = DataFrame(paints,index=[1,2,3])
other_goods = goods_in.reindex([1,2,3,4], method="ffill")
print(f"使用 ffill 方法填充后的 other_goods:\n{other_goods}")
print(f"-"*20)
other_goods = goods_in.reindex([1,2,4,3], method="bfill")
print(f"使用 bfill 方法填充后的 other_goods:\n{other_goods}")

### 3.2.2 删除指定轴上的项

In [None]:
from pandas import DataFrame
paints={"车名":["奥迪Q5L","哈弗H6","奔驰GLC"],"最低报价": [38.78,9.80,39.48],"最高报价":[49.80,14.10,58.78]}
goods_in=DataFrame(paints,index=[1,2,3])
print(f"goods_in:\n{goods_in}")
print(f"-"*20)
goods_in = goods_in.drop(2)
print(f"goods_in after dropping row 2:\n{goods_in}")
print(f"-"*20)
goods_in = goods_in.drop("最低报价",axis=1)
print(f"goods_in after dropping column '最低报价':\n{goods_in}")