## 使用內建功能讀取 txt 檔

In [3]:
with open("./data/example.txt", 'r') as f:
    data = f.readlines()
print(data)

['id,sex,age,score\n', '001,F,20,77\n', '002,F,25,90\n', '003,M,22,80\n', '004,F,30,66\n', '005,M,40,60\n', '006,M,29,87']


## 將 txt 轉成 pandas dataframe

In [5]:
import pandas as pd

data = []
with open("data/example.txt", 'r') as f:
    for line in f:
        line = line.replace('\n', '').split(',') # 將每句最後的 /n 取代成空值後，再以逗號斷句
        data.append(line)
data

[['id', 'sex', 'age', 'score'],
 ['001', 'F', '20', '77'],
 ['002', 'F', '25', '90'],
 ['003', 'M', '22', '80'],
 ['004', 'F', '30', '66'],
 ['005', 'M', '40', '60'],
 ['006', 'M', '29', '87']]

In [6]:
df = pd.DataFrame(data[1:])
df.columns = data[0]
df

Unnamed: 0,id,sex,age,score
0,1,F,20,77
1,2,F,25,90
2,3,M,22,80
3,4,F,30,66
4,5,M,40,60
5,6,M,29,87


## 將資料轉成 json 檔後輸出
將 json 讀回來後，是否與我們原本想要存入的方式一樣? (以 id 為 key)

In [44]:
import json
df.to_json('data/examples/example01.json')

In [46]:
# 上面的存入方式，會將 column name 做為主要的 key, row name 做為次要的 key
with open('data/examples/example01.json', 'r') as f:
    j1 = json.load(f)
j1

{'age': {'0': '20', '1': '25', '2': '22', '3': '30', '4': '40', '5': '29'},
 'id': {'0': '001',
  '1': '002',
  '2': '003',
  '3': '004',
  '4': '005',
  '5': '006'},
 'score': {'0': '77', '1': '90', '2': '80', '3': '66', '4': '60', '5': '87'},
 'sex': {'0': 'F', '1': 'F', '2': 'M', '3': 'F', '4': 'M', '5': 'M'}}

In [41]:
df.set_index('id', inplace=True)
df

Unnamed: 0_level_0,sex,age,score
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,F,20,77
2,F,25,90
3,M,22,80
4,F,30,66
5,M,40,60
6,M,29,87


In [42]:
df.to_json('data/examples/example02.json', orient='index')

In [47]:
with open('data/examples/example02.json', 'r') as f:
    j2 = json.load(f)
j2

{'001': {'age': '20', 'score': '77', 'sex': 'F'},
 '002': {'age': '25', 'score': '90', 'sex': 'F'},
 '003': {'age': '22', 'score': '80', 'sex': 'M'},
 '004': {'age': '30', 'score': '66', 'sex': 'F'},
 '005': {'age': '40', 'score': '60', 'sex': 'M'},
 '006': {'age': '29', 'score': '87', 'sex': 'M'}}

## 將檔案存為 npy 檔
一個專門儲存 numpy array 的檔案格式
使用 npy 通常可以讓你更快讀取資料喔!  
[建議閱讀](https://towardsdatascience.com/why-you-should-start-using-npy-file-more-often-df2a13cc0161)

In [55]:
import numpy as np
# 將 data 的數值部分轉成 numpy array
array = np.array(data[1:])
array

array([['001', 'F', '20', '77'],
       ['002', 'F', '25', '90'],
       ['003', 'M', '22', '80'],
       ['004', 'F', '30', '66'],
       ['005', 'M', '40', '60'],
       ['006', 'M', '29', '87']],
      dtype='<U3')

In [56]:
np.save(arr=array, file='data/examples/example.npy')

In [58]:
array_back = np.load('data/examples/example.npy')
array_back

array([['001', 'F', '20', '77'],
       ['002', 'F', '25', '90'],
       ['003', 'M', '22', '80'],
       ['004', 'F', '30', '66'],
       ['005', 'M', '40', '60'],
       ['006', 'M', '29', '87']],
      dtype='<U3')

## Pickle
存成 pickle 檔  
什麼都包，什麼都不奇怪的 [Pickle](https://docs.python.org/3/library/pickle.html)  
比如說 [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html) 的資料集就是用 pickle 包的喔!

In [60]:
import pickle
with open('data/examples/example.pkl', 'wb') as f:
    pickle.dump(file=f, obj=data)

In [62]:
with open('data/examples/example.pkl', 'rb') as f:
    pkl_data = pickle.load(f)
pkl_data

[['id', 'sex', 'age', 'score'],
 ['001', 'F', '20', '77'],
 ['002', 'F', '25', '90'],
 ['003', 'M', '22', '80'],
 ['004', 'F', '30', '66'],
 ['005', 'M', '40', '60'],
 ['006', 'M', '29', '87']]