### Reading file with Numpy

In [1]:
import numpy as np

In [2]:
%cat ../Data/dummydata.txt

Some sample datafile

Year, Min, Max
2009, 10, 30
2010, 12, 29
2011, 11, 35
2012, 14, 28
2013, 10, 32


In [3]:
data = np.genfromtxt("../Data/dummydata.txt")

In [4]:
data

array([[nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, 30.],
       [nan, nan, 29.],
       [nan, nan, 35.],
       [nan, nan, 28.],
       [nan, nan, 32.]])

In [8]:
data = np.genfromtxt("../Data/dummydata.txt", skip_header=3, delimiter=",")

In [9]:
data

array([[2009.,   10.,   30.],
       [2010.,   12.,   29.],
       [2011.,   11.,   35.],
       [2012.,   14.,   28.],
       [2013.,   10.,   32.]])

In [10]:
type(data)

numpy.ndarray

In [11]:
data.dtype

dtype('float64')

In [13]:
data[2]

array([2011.,   11.,   35.])

In [14]:
data[2, 0]

2011.0

In [15]:
data[0:3]

array([[2009.,   10.,   30.],
       [2010.,   12.,   29.],
       [2011.,   11.,   35.]])

In [16]:
data[:3, 0]

array([2009., 2010., 2011.])

In [17]:
data[:, 0]

array([2009., 2010., 2011., 2012., 2013.])

In [18]:
%whos

Variable   Type       Data/Info
-------------------------------
data       ndarray    5x3: 15 elems, type `float64`, 120 bytes
np         module     <module 'numpy' from '/us<...>kages/numpy/__init__.py'>


In [19]:
temp = data[:, 1]

In [20]:
temp

array([10., 12., 11., 14., 10.])

In [21]:
temp.mean()

11.4

In [22]:
date = data[:, 0]

In [23]:
date

array([2009., 2010., 2011., 2012., 2013.])

In [24]:
date.astype("int")

array([2009, 2010, 2011, 2012, 2013])

In [25]:
date = date.astype("int")

In [26]:
date

array([2009, 2010, 2011, 2012, 2013])

In [27]:
date.dtype

dtype('int64')

In [28]:
date.astype("int32")

array([2009, 2010, 2011, 2012, 2013], dtype=int32)

In [29]:
diff = np.zeros(len(date))

In [30]:
diff

array([0., 0., 0., 0., 0.])

In [31]:
temp_max = data[:, 2]

In [32]:
temp_min = data[:, 1]

In [33]:
diff = temp_max - temp_min

In [34]:
diff

array([20., 17., 24., 14., 22.])

In [38]:
bad_data = np.zeros(15)

In [39]:
bad_data

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [40]:
bad_data[:6] = np.nan

In [41]:
bad_data

array([nan, nan, nan, nan, nan, nan,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.])

In [43]:
np.isfinite(bad_data)

array([False, False, False, False, False, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True])

In [44]:
bad_data[np.isfinite(bad_data)]

array([0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [47]:
np.isinf([np.inf])

array([ True])

In [48]:
np.isnan([np.nan])

array([ True])

In [49]:
np.isinf([np.nan, np.inf])

array([False,  True])

In [50]:
np.isnan([np.nan, np.inf])

array([ True, False])

In [51]:
np.isfinite([np.nan, np.inf])

array([False, False])

In [52]:
np.inf == np.inf

True

In [53]:
np.inf > np.inf

False

In [54]:
temp

array([10., 12., 11., 14., 10.])

In [55]:
temp_c = temp / 3

In [56]:
temp_c

array([3.33333333, 4.        , 3.66666667, 4.66666667, 3.33333333])

In [57]:
temp_c.round(0)

array([3., 4., 4., 5., 3.])

In [58]:
np.unique(temp_c.round(0))

array([3., 4., 5.])

In [59]:
temp

array([10., 12., 11., 14., 10.])

In [60]:
new_data = np.vstack((date, temp, temp_c))

In [61]:
new_data

array([[2009.        , 2010.        , 2011.        , 2012.        ,
        2013.        ],
       [  10.        ,   12.        ,   11.        ,   14.        ,
          10.        ],
       [   3.33333333,    4.        ,    3.66666667,    4.66666667,
           3.33333333]])

In [63]:
new_data = np.transpose(new_data)

In [64]:
new_data

array([[2009.        ,   10.        ,    3.33333333],
       [2010.        ,   12.        ,    4.        ],
       [2011.        ,   11.        ,    3.66666667],
       [2012.        ,   14.        ,    4.66666667],
       [2013.        ,   10.        ,    3.33333333]])

In [65]:
np.savetxt('converted.csv', new_data, delimiter=",")

In [66]:
usecols

NameError: name 'usecols' is not defined