* NumPy provides several functions to create arrays from tabular data. We focus here on the `genfromtxt` function.
* In a nutshell, genfromtxt runs two main loops.
* The `first loop` converts each line of the file in a `sequence of strings`.
* The `second loop` converts each string to the appropriate `data type`.
* This mechanism is slower than a single loop, but gives more flexibility.
* In particular, genfromtxt is able to take missing data into account, when other faster and simpler functions like loadtxt cannot.

In [1]:
import numpy as np
from io import StringIO

In [3]:
data = u"1, 2, 3\n4, 5, 6"
data

'1, 2, 3\n4, 5, 6'

In [7]:
np.genfromtxt(StringIO(data),delimiter=',')

array([[1., 2., 3.],
       [4., 5., 6.]])

In [8]:
data = u"  1  2  3\n  4  5 67\n890123  4"
data

'  1  2  3\n  4  5 67\n890123  4'

In [12]:
np.genfromtxt(StringIO(data),delimiter=3)

array([[  1.,   2.,   3.],
       [  4.,   5.,  67.],
       [890., 123.,   4.]])

In [13]:
data = u"123456789\n   4  7 9\n   4567 9"

In [17]:
np.genfromtxt(StringIO(data),delimiter=(4,3,2))

array([[1234.,  567.,   89.],
       [   4.,    7.,    9.],
       [   4.,  567.,    9.]])

In [18]:
data = u"1, abc , 2\n 3, xxx, 4"
data

'1, abc , 2\n 3, xxx, 4'

In [19]:
np.genfromtxt(StringIO(data), delimiter=",", dtype="|U5")

array([['1', ' abc ', ' 2'],
       ['3', ' xxx', ' 4']], dtype='<U5')

In [20]:
np.genfromtxt(StringIO(data), delimiter=",", dtype="|U5", autostrip=True)

array([['1', 'abc', '2'],
       ['3', 'xxx', '4']], dtype='<U5')

In [21]:
data = u"""#
# Skip me !
# Skip me too !
1, 2
3, 4
5, 6 #This is the third line of the data
7, 8
# And here comes the last line
9, 0
"""

In [22]:
np.genfromtxt(StringIO(data),comments='#',delimiter=',')

array([[1., 2.],
       [3., 4.],
       [5., 6.],
       [7., 8.],
       [9., 0.]])

In [23]:
data = u"\n".join(str(i) for i in range(10))
data

'0\n1\n2\n3\n4\n5\n6\n7\n8\n9'

In [24]:
np.genfromtxt(StringIO(data),)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [26]:
np.genfromtxt(StringIO(data),
             skip_header=3,skip_footer=5
             )

array([3., 4.])

In [27]:
data = u"1 2 3\n4 5 6"
data

'1 2 3\n4 5 6'

In [30]:
np.genfromtxt(StringIO(data),)

array([[1., 2., 3.],
       [4., 5., 6.]])

In [32]:
np.genfromtxt(StringIO(data),usecols=(0,-1))

array([[1., 3.],
       [4., 6.]])

In [33]:
data = u"1 2 3\n4 5 6"
data

'1 2 3\n4 5 6'

In [35]:
np.genfromtxt(StringIO(data),names="a,b,c",usecols=("a",'c'))

array([(1., 3.), (4., 6.)], dtype=[('a', '<f8'), ('c', '<f8')])

In [36]:
np.genfromtxt(StringIO(data),
              names="a, b, c", usecols=("a, c"))

array([(1., 3.), (4., 6.)], dtype=[('a', '<f8'), ('c', '<f8')])

In [37]:
data = StringIO("1 2 3\n 4 5 6")
data

<_io.StringIO at 0x11d445655e0>

In [38]:
np.genfromtxt(data, dtype=[(_, int) for _ in "abc"])

array([(1, 2, 3), (4, 5, 6)],
      dtype=[('a', '<i4'), ('b', '<i4'), ('c', '<i4')])

In [39]:
data = StringIO("1 2 3\n 4 5 6")
np.genfromtxt(data, names="A, B, C")

array([(1., 2., 3.), (4., 5., 6.)],
      dtype=[('A', '<f8'), ('B', '<f8'), ('C', '<f8')])

In [40]:
data = StringIO("1 2 3\n 4 5 6")
ndtype=[('a',int), ('b', float), ('c', int)]
names = ["A", "B", "C"]

In [41]:
np.genfromtxt(data, names=names, dtype=ndtype)

array([(1, 2., 3), (4, 5., 6)],
      dtype=[('A', '<i4'), ('B', '<f8'), ('C', '<i4')])

In [42]:
convertfunc = lambda x: float(x.strip(b"%"))/100.

In [43]:
data = u"1, 2.3%, 45.\n6, 78.9%, 0"

In [44]:
names = ("i", "p", "n")

In [45]:
np.genfromtxt(StringIO(data), delimiter=",", names=names)

array([(1., nan, 45.), (6., nan,  0.)],
      dtype=[('i', '<f8'), ('p', '<f8'), ('n', '<f8')])

In [46]:
np.genfromtxt(StringIO(data), delimiter=",", names=names,
              converters={1: convertfunc})

array([(1., 0.023, 45.), (6., 0.789,  0.)],
      dtype=[('i', '<f8'), ('p', '<f8'), ('n', '<f8')])

In [47]:
np.genfromtxt(StringIO(data), delimiter=",", names=names,
              converters={"p": convertfunc})

array([(1., 0.023, 45.), (6., 0.789,  0.)],
      dtype=[('i', '<f8'), ('p', '<f8'), ('n', '<f8')])