# <h1 style="color:red;background-color:#fff">              I/O with NumPy</h1>

In [20]:
import numpy as np
from io import StringIO 

### <font color="blue">Splitting the lines into columns</font>

In [21]:

# Importing data with genfromtxt
s = "12345567"
s = " ".join(s)
# The delimiter argument
np.genfromtxt(StringIO(s),delimiter=" ", dtype=int)

array([1, 2, 3, 4, 5, 5, 6, 7])

In [15]:
# The autostrip argument
data = u"1, abc , 2\n 3, xxx, 4"
np.genfromtxt(StringIO(data), delimiter=",", dtype="|U5", autostrip=True)


array([['1', 'abc', '2'],
       ['3', 'xxx', '4']], dtype='<U5')

In [16]:
# The comments argument
data = u"""//
// Skip me !
// Skip me too !
1, 2
3, 4
5, 6 //This is the third line of the data
7, 8
// And here comes the last line
9, 0 """
np.genfromtxt(StringIO(data), comments="//", delimiter=",")

array([[1., 2.],
       [3., 4.],
       [5., 6.],
       [7., 8.],
       [9., 0.]])

### <font color="blue">Skipping lines and choosing columns</font>


In [17]:
# The skip_header and skip_footer arguments
data = u"\n".join(str(i) for i in range(10))
np.genfromtxt(StringIO(data),)
# array([0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.])
np.genfromtxt(StringIO(data),skip_header=3, skip_footer=5)
# array([3.,  4.])

array([3., 4.])

In [23]:
# The usecols argument
data = u"1 2 3 5 6 7\n4 5 6 10 11 12"
np.genfromtxt(StringIO(data))
# array([[ 1.,  2.,  3.,  5.,  6.,  7.], [ 4.,  5.,  6., 10., 11., 12.]])
np.genfromtxt(StringIO(data), usecols=(0, -1))
# array([[ 1.,  7.], [ 4., 12.]])

array([[ 1.,  7.],
       [ 4., 12.]])

### <font color="blue">Setting the names</font>

In [29]:
# The names argument
data = StringIO("1 2 3\n 4 5 6")
np.genfromtxt(data, names="A, B, C")


array([(1., 2., 3.), (4., 5., 6.)],
      dtype=[('A', '<f8'), ('B', '<f8'), ('C', '<f8')])

In [31]:
# name in file
data = StringIO("So it goes\n#a b c\n1 2 3\n 4 5 6")
np.genfromtxt(data, skip_header=1, names=True)

array([(1., 2., 3.), (4., 5., 6.)],
      dtype=[('a', '<f8'), ('b', '<f8'), ('c', '<f8')])

In [37]:
# The defaultfmt argument (f1, f2, ...., fn)
data = StringIO("1 2 3 6 7 8\n 4 5 6 1 2 3")
np.genfromtxt(data, dtype=(int, int , int , int ,int ,int), names="a")


array([8, 3])

In [52]:
# The converters argument
convertfunc = lambda x: float(x.strip(b"%"))/100.
data = u"1, 2.3%, 45.5%\n6, 78.9%, 0.5%"
names = ("i", "p", "n")
np.genfromtxt(StringIO(data), delimiter=",", names=names, converters={1:convertfunc, 2: convertfunc})

array([(1., 0.023, 0.455), (6., 0.789, 0.005)],
      dtype=[('i', '<f8'), ('p', '<f8'), ('n', '<f8')])

In [63]:
# missing_values

data = u"N/A, 2, 3\n4, ,???"
kwargs = dict(delimiter=",",
              dtype=int,
              names="a,b,c",
              # missing_values={1:"N/A", 'b':" ", 2:"???"},
              # filling_values={0:0, 'b':0, 2:-999})
)
np.genfromtxt(StringIO(data), **kwargs, usemask=True)

masked_array(data=[(-1, 2, 3), (4, --, -1)],
             mask=[(False, False, False), (False,  True, False)],
       fill_value=(999999, 999999, 999999),
            dtype=[('a', '<i4'), ('b', '<i4'), ('c', '<i4')])