## 3.5.1 有哪些io方式

- 数据分析阶段的重点：分析、建模

## 3.5.2 读取和存储csv

- 存储、读取、索引设置
- 数据追加

## 3.5.3 读取和存储excel

- 存储、读取、工作表设置
- 数据追加

In [39]:
import pandas as pd
import numpy as np

In [40]:
data = pd.DataFrame(np.random.randn(1000,3),columns=['a','b','c'],
                   index=pd.date_range('20200101',periods=1000))

In [41]:
data

Unnamed: 0,a,b,c
2020-01-01,0.020135,-0.499029,-1.238012
2020-01-02,0.386697,0.859078,-0.132450
2020-01-03,-1.068120,-1.350477,0.585804
2020-01-04,0.295700,0.924323,0.952206
2020-01-05,0.200377,0.124585,0.510945
...,...,...,...
2022-09-22,-1.468196,-1.038229,-1.301679
2022-09-23,0.237349,0.877902,1.713553
2022-09-24,-0.511768,-0.198326,-0.028762
2022-09-25,-1.353956,0.909144,2.300395


In [42]:
# 数据存储
data.to_csv('txt.csv') # route/file.csv

In [43]:
filename = 'txt.csv'
pd.read_csv(filename)

Unnamed: 0.1,Unnamed: 0,a,b,c
0,2020-01-01,0.020135,-0.499029,-1.238012
1,2020-01-02,0.386697,0.859078,-0.132450
2,2020-01-03,-1.068120,-1.350477,0.585804
3,2020-01-04,0.295700,0.924323,0.952206
4,2020-01-05,0.200377,0.124585,0.510945
...,...,...,...,...
995,2022-09-22,-1.468196,-1.038229,-1.301679
996,2022-09-23,0.237349,0.877902,1.713553
997,2022-09-24,-0.511768,-0.198326,-0.028762
998,2022-09-25,-1.353956,0.909144,2.300395


In [44]:
pd.read_csv(filename,index_col=['Unnamed: 0'])

Unnamed: 0,a,b,c
2020-01-01,0.020135,-0.499029,-1.238012
2020-01-02,0.386697,0.859078,-0.132450
2020-01-03,-1.068120,-1.350477,0.585804
2020-01-04,0.295700,0.924323,0.952206
2020-01-05,0.200377,0.124585,0.510945
...,...,...,...
2022-09-22,-1.468196,-1.038229,-1.301679
2022-09-23,0.237349,0.877902,1.713553
2022-09-24,-0.511768,-0.198326,-0.028762
2022-09-25,-1.353956,0.909144,2.300395


In [45]:
# 存储前对数据索引进行命名：date
data.index

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08',
               '2020-01-09', '2020-01-10',
               ...
               '2022-09-17', '2022-09-18', '2022-09-19', '2022-09-20',
               '2022-09-21', '2022-09-22', '2022-09-23', '2022-09-24',
               '2022-09-25', '2022-09-26'],
              dtype='datetime64[ns]', length=1000, freq='D')

In [46]:
data.index.names

FrozenList([None])

In [47]:
data.index.names = ['date']

In [48]:
data

Unnamed: 0_level_0,a,b,c
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-01,0.020135,-0.499029,-1.238012
2020-01-02,0.386697,0.859078,-0.132450
2020-01-03,-1.068120,-1.350477,0.585804
2020-01-04,0.295700,0.924323,0.952206
2020-01-05,0.200377,0.124585,0.510945
...,...,...,...
2022-09-22,-1.468196,-1.038229,-1.301679
2022-09-23,0.237349,0.877902,1.713553
2022-09-24,-0.511768,-0.198326,-0.028762
2022-09-25,-1.353956,0.909144,2.300395


In [49]:
data.to_csv(filename) # 完全覆盖/替换

In [50]:
# 对已有文件进行数据追加
data2 = data.tail()

In [51]:
data2

Unnamed: 0_level_0,a,b,c
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-09-22,-1.468196,-1.038229,-1.301679
2022-09-23,0.237349,0.877902,1.713553
2022-09-24,-0.511768,-0.198326,-0.028762
2022-09-25,-1.353956,0.909144,2.300395
2022-09-26,-1.208891,-0.143507,-2.714516


In [52]:
# 首先判断是否有数据，有的情况下可以这么操作：
data2.to_csv(filename,mode='a',header=False) # append - 追加操作

In [53]:
# excel
excelname = 'excel.xlsx'

In [54]:
data.to_excel(excelname,sheet_name='a')

In [55]:
pd.read_excel(excelname)

Unnamed: 0,date,a,b,c
0,2020-01-01,0.020135,-0.499029,-1.238012
1,2020-01-02,0.386697,0.859078,-0.132450
2,2020-01-03,-1.068120,-1.350477,0.585804
3,2020-01-04,0.295700,0.924323,0.952206
4,2020-01-05,0.200377,0.124585,0.510945
...,...,...,...,...
995,2022-09-22,-1.468196,-1.038229,-1.301679
996,2022-09-23,0.237349,0.877902,1.713553
997,2022-09-24,-0.511768,-0.198326,-0.028762
998,2022-09-25,-1.353956,0.909144,2.300395


In [56]:
data.to_excel(excelname,sheet_name='b') # 没有mode参数

In [57]:
data.to_excel?

In [58]:
# 一次写入多个sheet
with pd.ExcelWriter('writer.xlsx') as writer:
    data.to_excel(writer,sheet_name='a')
    data.to_excel(writer,sheet_name='b')
    data.to_excel(writer,sheet_name='c')

In [59]:
# 追加新sheet
with pd.ExcelWriter('writer.xlsx',mode='a',engine='openpyxl') as writer:
    data2.to_excel(writer,sheet_name='d')

In [60]:
# 测试excel追加数据至sheet
with pd.ExcelWriter('writer.xlsx',mode='a',engine='openpyxl', if_sheet_exists='overlay') as writer:
    data.to_excel(writer,sheet_name='d')

In [61]:
# openpyxl库：读取 -> 追加新数据 -> 存入表格中