## 使用內建功能讀取 txt 檔

In [5]:
# 修改 txt 檔的讀取位置
with open("example.txt", 'r') as f:
    data = f.readlines()
print(data)

['id,sex,age,score\n', '001,F,20,77\n', '002,F,25,90\n', '003,M,22,80\n', '004,F,30,66\n', '005,M,40,60\n', '006,M,29,87']


## 將 txt 轉成 pandas dataframe

Question - What does \n mean in Python programming?
Answer - Inside a string it means a new-line. For example:

print(“Hello\nWorld”)
Will output:

Hello
World

參考資料 
It is part of a larger group of sub-strings called escape sequences, you can find a mostly complete list here:
Python 3 Escape Sequences(https://www.quackit.com/python/reference/python_3_escape_sequences.cfm)

In [6]:
# example test 
print("Hello \n World")

Hello 
 World


In [7]:
import pandas as pd

data = []
with open("example.txt", 'r') as f:
    for line in f:
        line = line.replace('\n', '').split(',') # 將每句最後的 /n 取代成空值後，再以逗號斷句
        data.append(line)
data

[['id', 'sex', 'age', 'score'],
 ['001', 'F', '20', '77'],
 ['002', 'F', '25', '90'],
 ['003', 'M', '22', '80'],
 ['004', 'F', '30', '66'],
 ['005', 'M', '40', '60'],
 ['006', 'M', '29', '87']]

In [8]:
df = pd.DataFrame(data[1:])
df.columns = data[0]
df

Unnamed: 0,id,sex,age,score
0,1,F,20,77
1,2,F,25,90
2,3,M,22,80
3,4,F,30,66
4,5,M,40,60
5,6,M,29,87


## 將資料轉成 json 檔後輸出
將 json 讀回來後，是否與我們原本想要存入的方式一樣? (以 id 為 key)

In [10]:
import json
df.to_json('example01.json') 
df

Unnamed: 0,id,sex,age,score
0,1,F,20,77
1,2,F,25,90
2,3,M,22,80
3,4,F,30,66
4,5,M,40,60
5,6,M,29,87


In [4]:
# 上面的存入方式，會將 column name 做為主要的 key, row name 做為次要的 key
with open('example01.json', 'r') as f:
    j1 = json.load(f)
j1

{'sex': {'001': 'F',
  '002': 'F',
  '003': 'M',
  '004': 'F',
  '005': 'M',
  '006': 'M'},
 'age': {'001': '20',
  '002': '25',
  '003': '22',
  '004': '30',
  '005': '40',
  '006': '29'},
 'score': {'001': '77',
  '002': '90',
  '003': '80',
  '004': '66',
  '005': '60',
  '006': '87'}}

In [11]:
df.set_index('id', inplace=True)
df

Unnamed: 0_level_0,sex,age,score
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,F,20,77
2,F,25,90
3,M,22,80
4,F,30,66
5,M,40,60
6,M,29,87


In [12]:
df.to_json('example02.json', orient='index')

In [13]:
with open('example02.json', 'r') as f:
    j2 = json.load(f)
j2

{'001': {'sex': 'F', 'age': '20', 'score': '77'},
 '002': {'sex': 'F', 'age': '25', 'score': '90'},
 '003': {'sex': 'M', 'age': '22', 'score': '80'},
 '004': {'sex': 'F', 'age': '30', 'score': '66'},
 '005': {'sex': 'M', 'age': '40', 'score': '60'},
 '006': {'sex': 'M', 'age': '29', 'score': '87'}}

## 將檔案存為 npy 檔
一個專門儲存 numpy array 的檔案格式
使用 npy 通常可以讓你更快讀取資料喔!  
[建議閱讀](https://towardsdatascience.com/why-you-should-start-using-npy-file-more-often-df2a13cc0161)

In [9]:
import numpy as np
# 將 data 的數值部分轉成 numpy array
array = np.array(data[1:])
array

array([['001', 'F', '20', '77'],
       ['002', 'F', '25', '90'],
       ['003', 'M', '22', '80'],
       ['004', 'F', '30', '66'],
       ['005', 'M', '40', '60'],
       ['006', 'M', '29', '87']], dtype='<U3')

In [10]:
np.save(arr=array, file='example.npy')

In [11]:
array_back = np.load('example.npy')
array_back

array([['001', 'F', '20', '77'],
       ['002', 'F', '25', '90'],
       ['003', 'M', '22', '80'],
       ['004', 'F', '30', '66'],
       ['005', 'M', '40', '60'],
       ['006', 'M', '29', '87']], dtype='<U3')

## Pickle
存成 pickle 檔  
什麼都包，什麼都不奇怪的 [Pickle](https://docs.python.org/3/library/pickle.html)  
比如說 [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html) 的資料集就是用 pickle 包的喔!

In [12]:
import pickle
with open('example.pkl', 'wb') as f:
    pickle.dump(file=f, obj=data)

In [13]:
with open('example.pkl', 'rb') as f:
    pkl_data = pickle.load(f)
pkl_data

[['id', 'sex', 'age', 'score'],
 ['001', 'F', '20', '77'],
 ['002', 'F', '25', '90'],
 ['003', 'M', '22', '80'],
 ['004', 'F', '30', '66'],
 ['005', 'M', '40', '60'],
 ['006', 'M', '29', '87']]