## save / load
* pickle, npy, npz 형식으로 파일에 읽고 쓰기가 가능
* np.loadtxt
* np.savetxt

### 함수별 특성

####  np.load / np.save
* 배열을 그대로 보존 / 3차원이상의 배열도 저장 가능
* 확장자는 pickle / npz/ npy 지원
* 다른 어플리케이션과는 호환성이 거의 없음

#### np.loadtxt / np.savetxt
* 다른 어플리케이션과 호환. .dat / .csv / .txt 형식으로 읽고/쓰기
* 저장할수 있는 배열의 차원은 2차원까지

### np.savetxt / np.loadtxt

```python
numpy.loadtxt (fname, dtype = 'float', comments = '#', delimiter = None, converters = None, skiprows = 0, usecols = None, unpack = False, ndmin = 0)
```

```python
numpy.savetxt (fname X, fmt = '% 18e', delimiter = '', newline = '\ n', header = ''footer = '', comments = '#')
```

#### sample code

In [1]:
import numpy as np
a = np.random.randn(3,4)

In [2]:
a

array([[ 0.09546733,  0.36732485, -0.61061037, -1.79288591],
       [-0.38613224,  2.78462896, -2.77080997, -1.17357921],
       [-1.84248193,  1.21319844, -0.58874938, -0.32503618]])

In [3]:
np.savetxt("sample.txt", a)

In [4]:
!head sample.txt

9.546733271701966783e-02 3.673248453154397963e-01 -6.106103740547671466e-01 -1.792885908414068474e+00
-3.861322384380413419e-01 2.784628958692518808e+00 -2.770809965128290919e+00 -1.173579210824126529e+00
-1.842481930595260664e+00 1.213198437011286712e+00 -5.887493822600914362e-01 -3.250361799060529355e-01


In [5]:
b = np.loadtxt("sample.txt")
b

array([[ 0.09546733,  0.36732485, -0.61061037, -1.79288591],
       [-0.38613224,  2.78462896, -2.77080997, -1.17357921],
       [-1.84248193,  1.21319844, -0.58874938, -0.32503618]])

In [6]:
np.savetxt("sample.csv", a)

In [7]:
!head sample.csv

9.546733271701966783e-02 3.673248453154397963e-01 -6.106103740547671466e-01 -1.792885908414068474e+00
-3.861322384380413419e-01 2.784628958692518808e+00 -2.770809965128290919e+00 -1.173579210824126529e+00
-1.842481930595260664e+00 1.213198437011286712e+00 -5.887493822600914362e-01 -3.250361799060529355e-01


In [8]:
c = np.loadtxt("sample.csv")
c

array([[ 0.09546733,  0.36732485, -0.61061037, -1.79288591],
       [-0.38613224,  2.78462896, -2.77080997, -1.17357921],
       [-1.84248193,  1.21319844, -0.58874938, -0.32503618]])

In [9]:
np.savetxt("sample.dat", a)
!head sample.dat

9.546733271701966783e-02 3.673248453154397963e-01 -6.106103740547671466e-01 -1.792885908414068474e+00
-3.861322384380413419e-01 2.784628958692518808e+00 -2.770809965128290919e+00 -1.173579210824126529e+00
-1.842481930595260664e+00 1.213198437011286712e+00 -5.887493822600914362e-01 -3.250361799060529355e-01


In [10]:
d = np.loadtxt("sample.dat")
d

array([[ 0.09546733,  0.36732485, -0.61061037, -1.79288591],
       [-0.38613224,  2.78462896, -2.77080997, -1.17357921],
       [-1.84248193,  1.21319844, -0.58874938, -0.32503618]])

In [11]:
np.savetxt("sample2.txt", a, delimiter=",")
!head sample2.txt

9.546733271701966783e-02,3.673248453154397963e-01,-6.106103740547671466e-01,-1.792885908414068474e+00
-3.861322384380413419e-01,2.784628958692518808e+00,-2.770809965128290919e+00,-1.173579210824126529e+00
-1.842481930595260664e+00,1.213198437011286712e+00,-5.887493822600914362e-01,-3.250361799060529355e-01


In [12]:
e = np.loadtxt("sample2.txt")

ValueError: could not convert string to float: '9.546733271701966783e-02,3.673248453154397963e-01,-6.106103740547671466e-01,-1.792885908414068474e+00'

In [13]:
e = np.loadtxt("sample2.txt", delimiter=",")
e

array([[ 0.09546733,  0.36732485, -0.61061037, -1.79288591],
       [-0.38613224,  2.78462896, -2.77080997, -1.17357921],
       [-1.84248193,  1.21319844, -0.58874938, -0.32503618]])

In [14]:
np.savetxt('sample3.txt', a, fmt ='%.2e')

In [15]:
!head sample3.txt

9.55e-02 3.67e-01 -6.11e-01 -1.79e+00
-3.86e-01 2.78e+00 -2.77e+00 -1.17e+00
-1.84e+00 1.21e+00 -5.89e-01 -3.25e-01


In [16]:
np.savetxt('sample4.txt', a, fmt='%2f')

In [17]:
!head sample4.txt

0.095467 0.367325 -0.610610 -1.792886
-0.386132 2.784629 -2.770810 -1.173579
-1.842482 1.213198 -0.588749 -0.325036


In [18]:
f = np.array([[10.1+3.21j,100+32.1j],[20+0.2j, 22.1-1j, 0-100j]]) 

In [19]:
np.savetxt("sample6.txt", f, fmt=["%.3e + $.3j", "%.1e + %.1ej"])

AttributeError: fmt has wrong shape.  ['%.3e + $.3j', '%.1e + %.1ej']

In [21]:
## loadtxt

In [23]:
np.loadtxt('sample4.txt', usecols = (0,2))

array([[ 0.095467, -0.61061 ],
       [-0.386132, -2.77081 ],
       [-1.842482, -0.588749]])

In [24]:
np.loadtxt('sample4.txt', skiprows=1)

array([[-0.386132,  2.784629, -2.77081 , -1.173579],
       [-1.842482,  1.213198, -0.588749, -0.325036]])

In [25]:
## header / footer
np.savetxt('sample7.txt', a, fmt='%.3e', header='this is a header', footer='this is a footer')

In [26]:
!head sample7.txt

# this is a header
9.547e-02 3.673e-01 -6.106e-01 -1.793e+00
-3.861e-01 2.785e+00 -2.771e+00 -1.174e+00
-1.842e+00 1.213e+00 -5.887e-01 -3.250e-01
# this is a footer


In [28]:
np.savetxt('sample8.txt', a, fmt='%.3e', header='this is a header', footer='this is a footer', comments=">>>")

In [29]:
!head sample8.txt

>>>this is a header
9.547e-02 3.673e-01 -6.106e-01 -1.793e+00
-3.861e-01 2.785e+00 -2.771e+00 -1.174e+00
-1.842e+00 1.213e+00 -5.887e-01 -3.250e-01
>>>this is a footer


In [30]:
np.loadtxt("sample8.txt", comments=">>>")

array([[ 0.09547,  0.3673 , -0.6106 , -1.793  ],
       [-0.3861 ,  2.785  , -2.771  , -1.174  ],
       [-1.842  ,  1.213  , -0.5887 , -0.325  ]])

In [31]:
np.loadtxt("sample8.txt")

ValueError: could not convert string to float: '>>>this'

In [33]:
!head foo.csv

# age gender tall[cm] driver's_lisense
18 female 154.1 No
21 male 172.3 Yes
22 female 160.8 Yes
23 male 180.1 Yes
25 female 145.0 No


In [34]:
np.loadtxt('foo.csv', dtype=[('col1', 'i8'), ('col2', 'S10'), ('col3', 'f8'), ('col4', 'S10')])

array([(18, b'female', 154.1, b'No'), (21, b'male', 172.3, b'Yes'),
       (22, b'female', 160.8, b'Yes'), (23, b'male', 180.1, b'Yes'),
       (25, b'female', 145. , b'No')],
      dtype=[('col1', '<i8'), ('col2', 'S10'), ('col3', '<f8'), ('col4', 'S10')])

In [35]:
np.loadtxt('foo.csv', dtype=[('col1', 'i8'), ('col2', 'S10'), ('col3', 'f8'), ('col4', 'S10')], unpack=True)

[array([18, 21, 22, 23, 25]),
 array([b'female', b'male', b'female', b'male', b'female'], dtype='|S10'),
 array([154.1, 172.3, 160.8, 180.1, 145. ]),
 array([b'No', b'Yes', b'Yes', b'Yes', b'No'], dtype='|S10')]

In [36]:
age, gender, tall, driver_licese = np.loadtxt('foo.csv', dtype=[('col1', 'i8'), ('col2', 'S10'), ('col3', 'f8'), ('col4', 'S10')], unpack=True)

In [37]:
age

array([18, 21, 22, 23, 25])

In [38]:
gender

array([b'female', b'male', b'female', b'male', b'female'], dtype='|S10')

In [39]:
tall

array([154.1, 172.3, 160.8, 180.1, 145. ])

In [40]:
driver_licese

array([b'No', b'Yes', b'Yes', b'Yes', b'No'], dtype='|S10')

In [42]:
for item in zip(age, gender, tall, driver_licese):
    print(item)

(18, b'female', 154.1, b'No')
(21, b'male', 172.3, b'Yes')
(22, b'female', 160.8, b'Yes')
(23, b'male', 180.1, b'Yes')
(25, b'female', 145.0, b'No')


In [45]:
## Converters

In [43]:
# callback
def driver_licese_cb(str):
    if str == b'Yes': return 1
    else: return -1

def gender_cb(str):
    if str == b'male': return 1
    else: return -1

In [44]:
np.loadtxt('foo.csv', converters={1: lambda s:gender_cb(s), 3:lambda s:driver_licese_cb(s)})

array([[ 18. ,  -1. , 154.1,  -1. ],
       [ 21. ,   1. , 172.3,   1. ],
       [ 22. ,  -1. , 160.8,   1. ],
       [ 23. ,   1. , 180.1,   1. ],
       [ 25. ,  -1. , 145. ,  -1. ]])

### np.genfromtxt
* 손실이 있는 데이터를 위한 함수

```python
np.genfromtxt (fname, dtype = 'float', comments = '#', delimiter = None, skip_header = 0, skip_footer = 0, converters = None, missing_values ​​= None, filling_values ​​= None, usecols = None, names = None, excludelist = None, deletechars = None, replace_space = '_', autostrip = False, case_sensitive = True, defaultfmt = 'f % i', unpack = None, usemask = False, loose = True, invalide_raise = True, max_rows = None)
```


In [46]:
!head bar.txt

1.1,,
2.3, 5.2, -9.1
0.1,, 2.0


In [48]:
np.loadtxt('bar.txt', delimiter=',')

ValueError: could not convert string to float: 

In [49]:
np.genfromtxt('bar.txt', delimiter=',')

array([[ 1.1,  nan,  nan],
       [ 2.3,  5.2, -9.1],
       [ 0.1,  nan,  2. ]])

In [50]:
## return nan for not exists data

In [51]:
np.genfromtxt('bar.txt', delimiter=',', dtype=('int', 'float', 'int'))

array([(-1, nan, -1), (-1, 5.2, -1), (-1, nan, -1)],
      dtype=[('f0', '<i8'), ('f1', '<f8'), ('f2', '<i8')])

In [52]:
np.genfromtxt('bar.txt', delimiter=',', dtype=('int', 'int', 'int'))

array([(-1, -1, -1), (-1, -1, -1), (-1, -1, -1)],
      dtype=[('f0', '<i8'), ('f1', '<i8'), ('f2', '<i8')])