# Chapter8 数据文件的存取

▲清单 8.1 %%writefile 命令的示例

In [1]:
%%writefile file.txt
sample
样本

Writing file.txt


▲清单 8.2 以只读模式打开文本文件

In [2]:
f = open(r'file.txt', encoding='utf-8')
f

<_io.TextIOWrapper name='file.txt' mode='r' encoding='utf-8'>

▲清单 8.3 输出文本的内容

In [3]:
for line in f:
    print(line, end='')

sample
样本


▲清单 8.4 关闭文件

In [4]:
f.close()

▲清单 8.5 以只写模式打开文本文件

In [5]:
f = open('file.txt', 'w+', encoding='utf-8')
f.write('第1 行
第2 行
')

10

▲清单 8.6 输出文件的内容

In [6]:
f.seek(0)

for line in f:
    print(line, end='')
    
f.close()

第1行
第2行


▲清单 8.7 with 语句的示例

In [7]:
with open('file.txt', encoding='utf-8') as f:
    for line in f:
        print(line, end='')

第1行
第2行


▲清单 8.8 创建样本文件

In [8]:
%%writefile data.csv
样本数据
x,y,z
0.1,1.0,-2.0
0.2,1.2,-1.9
0.3,1.3,-1.8
0.4,1.4,-1.7

Writing data.csv


▲清单 8.9 csv.reader 函数的示例

In [9]:
import csv

with open(r'data.csv', encoding='utf-8') as f:
    rows = [row for row in csv.reader(f)]

rows

[['样本数据'],
 ['x', 'y', 'z'],
 ['0.1', '1.0', '-2.0'],
 ['0.2', '1.2', '-1.9'],
 ['0.3', '1.3', '-1.8'],
 ['0.4', '1.4', '-1.7']]

▲清单 8.10 csv.writer 函数的示例

In [10]:
with open(r'data2.csv', 'w', encoding='utf-8', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(rows)

▲清单 8.11 loadtxt 函数的示例①

In [11]:
import numpy as np

arr = np.loadtxt(r'data.csv', encoding='utf-8',
                 delimiter=',', skiprows=2)
arr

array([[ 0.1,  1. , -2. ],
       [ 0.2,  1.2, -1.9],
       [ 0.3,  1.3, -1.8],
       [ 0.4,  1.4, -1.7]])

▲清单 8.12 loadtxt 函数的示例②

In [12]:
x, y = np.loadtxt(
    r'data.csv',
    encoding='utf-8',
    delimiter=',',
    skiprows=2,
    usecols=(1, 2),
    max_rows=4,
    unpack=True,
)
x

array([1. , 1.2, 1.3, 1.4])

▲清单 8.13 savetxt 函数的示例

In [13]:
np.savetxt(
    r'out_np.csv',
    arr,
    encoding='utf-8',
    delimiter=',',
    header='x,y,z',
    comments='サンプルデータ\n',
)

▲清单 8.14 read_csv 函数的示例

In [14]:
import pandas as pd

df = pd.read_csv('data.csv', header=1)
df

Unnamed: 0,x,y,z
0,0.1,1.0,-2.0
1,0.2,1.2,-1.9
2,0.3,1.3,-1.8
3,0.4,1.4,-1.7


▲清单 8.15 to_csv 方法的示例

In [15]:
df.to_csv('out_pd.csv')

▲清单 8.16 json.dumps 函数的示例①

In [16]:
import json

json.dumps([1, 0.3, 'JSON', None, True, [2.0]])

'[1, 0.3, "JSON", null, true, [2.0]]'

▲清单 8.17 json.dumps 函数的示例②

In [17]:
json.dumps({'a': (1, 2, 3), 'b': ['2020', '0102']})

'{"a": [1, 2, 3], "b": ["2020", "0102"]}'

▲清单 8.18 json.loads 函数的示例

In [18]:
json.loads('[1, 0.3, "JSON", null, true, [2.0]]')

[1, 0.3, 'JSON', None, True, [2.0]]

▲清单 8.19 json.dump 函数的示例

In [19]:
data = {
    'str': 'JSON',
    'dict': {'read': 'load', 'write': 'dump'},
    'list': [(1,), (2, 3)],
}

with open(r'test.json', 'w') as f:
    json.dump(data, f)

▲清单 8.20 json.load 函数的示例

In [20]:
with open(r'test.json') as f:
    data_loaded = json.load(f)
    
data_loaded

{'str': 'JSON',
 'dict': {'read': 'load', 'write': 'dump'},
 'list': [[1], [2, 3]]}

▲清单 8.21 工作簿的创建

In [21]:
from openpyxl import Workbook

wb = Workbook()
ws = wb.active

▲清单 8.22 添加工作表

In [22]:
ws.title = '销售额'
wb.create_sheet('分析结果')
wb.sheetnames

['销售额', '分析结果']

▲清单 8.23 文件的保存

In [23]:
rows = [['编号', '单价', '销售数量'],
        [1, 2000, 5],
        [2, 4500, 3],
        [3, 3000, 2],
        [4, 6000, 4]]

for row in rows:
    ws.append(row)

wb.save(r'openpyxl.xlsx')

▲清单 8.24 访问单元的值

In [24]:
from openpyxl import load_workbook

wb = load_workbook(r'openpyxl.xlsx', read_only=True,
                   data_only=True)
ws = wb['销售额']
ws['B4'].value

3000

▲清单 8.25 iter_rows 方法的示例

In [25]:
for value in ws.iter_rows(min_row=1, max_row=4, min_col=1,
                          max_col=3, values_only=True):
    print(value)

wb.close()

('编号', '单价', '销售数量')
(1, 2000, 5)
(2, 4500, 3)
(3, 3000, 2)


▲清单 8.26 to_excel 方法的示例

In [26]:
import numpy as np
import pandas as pd

data = np.array([[3, 0, 4, 0],
                 [2, 1, 9, 2],
                 [7, 3, 7, 0],
                 [6, 0, 9, 2]])
df_raw = pd.DataFrame(data)

df_raw.to_excel(r'pandas.xlsx', sheet_name='df1')

▲清单 8.27 read_excel 函数的示例

In [27]:
df = pd.read_excel(r'pandas.xlsx', index_col=0,
                   sheet_name='df1', engine='openpyxl')
df

Unnamed: 0,0,1,2,3
0,3,0,4,0
1,2,1,9,2
2,7,3,7,0
3,6,0,9,2


▲清单 8.28 使用 ExcelWriter 类写入数据

In [28]:
with pd.ExcelWriter(r'pandas.xlsx') as writer:
    df.to_excel(writer, sheet_name='df1')
    df.T.to_excel(writer, sheet_name='df2')

▲清单 8.29 使用 ExcelFile 类读取数据

In [29]:
with pd.ExcelFile(r'pandas.xlsx', engine='openpyxl') as f:
    df1 = pd.read_excel(f, index_col=0, sheet_name='df1')
    df2 = pd.read_excel(f, index_col=0, sheet_name='df2')