# Record arrays
## here every row of a 2-d numpy array can hanve mixed types

# Datetime64 objects
## these, as name suggests, can encode date and time

In [1]:
import numpy as np

## usually , in mumpy, we create arrays of a single type
## record array is an exception.

* record array is a hybrid of a list and a dictionary
* a list, because we can access the rows by their indexes
* a dictionary because we can access the columns by their names (python strings)

## record array requires a special syntax
## we specify the rows using numpy tuples (here, each row will contain an integer, a sub-array of two floating-point numbers and a string), and a dtype with a tuple, where first tuple element is python string for the name of the column and the second one is the datatype for the column

In [3]:
rec_array = np.array([(1,(2.0,3.0),'hey'),(2,(3.5,4.0),'n')],
                    dtype=[('x', np.int32), ('y', np.float64,2), ('z', np.str,4)])
rec_array

array([(1, [2. , 3. ], 'hey'), (2, [3.5, 4. ], 'n')],
      dtype=[('x', '<i4'), ('y', '<f8', (2,)), ('z', '<U4')])

In [4]:
rec_array[0]

(1, [2., 3.], 'hey')

In [6]:
rec_array['x']

array([1, 2], dtype=int32)

In [7]:
rec_array['x'][0]

1

## record arrays are very similar to dataframes in pandas
## record arrays are often used to represent non-homogeneous tabular data
## they can be read very conveniently using the loadtext and genfromtext functions

# datetime in numpy

## there is already a datetime object in Python but it is somewhat limited
## to avoid confusion, the numpy datetime object is called datetime64 where 64 is the number of bits that each such object takes in memory
## what is interensting about it is that we can use it at different resolutions..from a year, or a month, or a day or down toa fraction of a second

In [12]:
#this object represents a year
np.datetime64('2015')

numpy.datetime64('2015')

In [13]:
#represents a year and month
np.datetime64('2015-01')

numpy.datetime64('2015-01')

In [15]:
# this represents a full date and time object
np.datetime64('2015-02-03 12:00:00')

numpy.datetime64('2015-02-03T12:00:00')

In [16]:
# we can also add a time-zone
np.datetime64('2015-02-03 12:00:00+0700')

  


numpy.datetime64('2015-02-03T05:00:00')

In [17]:
#we can compare datetimes
np.datetime64('2015-02-03 12:00:00') > np.datetime64('2015-01-01 12:00:00')

True

In [19]:
#we can subtract two dates. This will result in a time-delta object
np.datetime64('2015-02-03 12:00:00') - np.datetime64('2015-01-01 12:00:00')

numpy.timedelta64(2851200,'s')

In [20]:
#we can add time-delta object to a date-time object to move into the past or future
#like add five days
np.datetime64('2015-01-01 12:00:00') + np.timedelta64(5,'D')

numpy.datetime64('2015-01-06T12:00:00')

In [21]:
#or few hours
np.datetime64('2015-01-01 12:00:00') + np.timedelta64(5,'h')

numpy.datetime64('2015-01-01T17:00:00')

In [23]:
#we can convert datetime64 objects to numbers or strings or days..depending on resolution
# since '2015-01-01' is a day, it returns the no of days since epoch
np.datetime64('2015-01-01').astype(float)

16436.0

In [26]:
# the good thing about numpy datetime objects is that they are understood throughout numpy
#for example, lets build a range of dates
#this range also cares about leap-year corrections
#the final date in the arange ('2016-03-01') is not included. This is in-line with numpy arange
r = np.arange(np.datetime64('2016-02-01'),np.datetime64('2016-03-01'))
r

array(['2016-02-01', '2016-02-02', '2016-02-03', '2016-02-04',
       '2016-02-05', '2016-02-06', '2016-02-07', '2016-02-08',
       '2016-02-09', '2016-02-10', '2016-02-11', '2016-02-12',
       '2016-02-13', '2016-02-14', '2016-02-15', '2016-02-16',
       '2016-02-17', '2016-02-18', '2016-02-19', '2016-02-20',
       '2016-02-21', '2016-02-22', '2016-02-23', '2016-02-24',
       '2016-02-25', '2016-02-26', '2016-02-27', '2016-02-28',
       '2016-02-29'], dtype='datetime64[D]')