# Simple array example

In [1]:
import numpy

In [2]:
simple_array = numpy.ndarray(5, numpy.dtype([('col1', numpy.float32), ('col2', numpy.float32)]))
n = 0
for i in range(5):
    for j in range(2):
        simple_array[i][j] = n
        n += 1

**Example array with a couple simple columns:**

In [3]:
simple_array

array([(0., 1.), (2., 3.), (4., 5.), (6., 7.), (8., 9.)],
      dtype=[('col1', '<f4'), ('col2', '<f4')])

In [4]:
simple_input_pathname = 'simple_data.npy'

In [5]:
numpy.save(simple_input_pathname, simple_array)

In [6]:
from clientlib.DataSets import ArrayDataSet

In [7]:
simple_dataset = ArrayDataSet(simple_input_pathname)
simple_array_stream = simple_dataset.AsNumpyArray()

**Query to just get first column out:**

In [8]:
simple_col1_query = simple_array_stream.Select("lambda e: e.col1")

In [9]:
simple_output = simple_col1_query.value()

Select(source=NumpySourceStream(), selection=Module(body=[Expr(value=Lambda(args=arguments(args=[arg(arg='e', annotation=None)], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[]), body=Attribute(value=Name(id='e', ctx=Load()), attr='col1', ctx=Load())))]))


**Output:**

In [10]:
simple_output

array([0., 2., 4., 6., 8.], dtype=float32)

In [11]:
import os

In [12]:
os.remove(simple_input_pathname)

# Example with multi-entry column in array

In [13]:
multidim_array = numpy.ndarray(5, numpy.dtype([('eventNumber', numpy.uint), ('jets', [('pt', numpy.single), ('nTracks', numpy.uint)], 2) ]))
n = 0
for i in range(5):
    multidim_array[i]['eventNumber'] = n
    n += 1
    for j in range(2):
        multidim_array[i]['jets'][j]['pt'] = (i * 2 + j) * 0.3
        multidim_array[i]['jets'][j]['nTracks'] = i * 2 + j

**Now we have a _jets_ column with a couple properties:**

In [14]:
multidim_array

array([(0, [(0. , 0), (0.3, 1)]), (1, [(0.6, 2), (0.9, 3)]),
       (2, [(1.2, 4), (1.5, 5)]), (3, [(1.8, 6), (2.1, 7)]),
       (4, [(2.4, 8), (2.7, 9)])],
      dtype=[('eventNumber', '<u8'), ('jets', [('pt', '<f4'), ('nTracks', '<u8')], (2,))])

In [15]:
multidim_input_pathname = 'multidim_data.npy'

In [16]:
numpy.save(multidim_input_pathname, multidim_array)

In [17]:
multidim_dataset = ArrayDataSet(multidim_input_pathname)
multidim_array_stream = multidim_dataset.AsNumpyArray()

**Query to get one property across all jets:**

In [18]:
multidim_pt_query = multidim_array_stream.SelectMany("lambda e: e.jets").Select("lambda j: j.pt")

In [19]:
multidim_output = multidim_pt_query.value()

Select(source=SelectMany(source=NumpySourceStream(), selection=Module(body=[Expr(value=Lambda(args=arguments(args=[arg(arg='e', annotation=None)], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[]), body=Attribute(value=Name(id='e', ctx=Load()), attr='jets', ctx=Load())))])), selection=Module(body=[Expr(value=Lambda(args=arguments(args=[arg(arg='j', annotation=None)], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[]), body=Attribute(value=Name(id='j', ctx=Load()), attr='pt', ctx=Load())))]))


**Output:**

In [20]:
multidim_output

array([0. , 0.3, 0.6, 0.9, 1.2, 1.5, 1.8, 2.1, 2.4, 2.7], dtype=float32)

In [21]:
os.remove(multidim_input_pathname)

# Example with awkward array

In [22]:
import awkward

**Use lists and dictionaries to import as an awkward array:**

In [23]:
awkward_array = awkward.fromiter([
    {'eventNumber': 0, 'jets': [{'pt': 2.5, 'nTracks': 3}, {'pt': 1.0, 'nTracks': 2}]},
    {'eventNumber': 1, 'jets': []},
    {'eventNumber': 2, 'jets': [{'pt': 5.0, 'nTracks': 10}]},
])

**Note that _jets_ now has a variable length**

In [24]:
awkward_input_pathname = 'awkward_data.awkd'

In [25]:
awkward.save(awkward_input_pathname, awkward_array)

In [26]:
awkward_dataset = ArrayDataSet(awkward_input_pathname)
awkward_array_stream = awkward_dataset.AsAwkwardArray()

**Get _nTracks_ for each jet this time:**

In [27]:
awkward_nTracks_query = awkward_array_stream.SelectMany("lambda e: e.jets").Select("lambda j: j.nTracks")

In [28]:
awkward_output = awkward_nTracks_query.value()

Select(source=SelectMany(source=AwkwardSourceStream(), selection=Module(body=[Expr(value=Lambda(args=arguments(args=[arg(arg='e', annotation=None)], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[]), body=Attribute(value=Name(id='e', ctx=Load()), attr='jets', ctx=Load())))])), selection=Module(body=[Expr(value=Lambda(args=arguments(args=[arg(arg='j', annotation=None)], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[]), body=Attribute(value=Name(id='j', ctx=Load()), attr='nTracks', ctx=Load())))]))


**Output:**

In [29]:
awkward_output

array([ 3,  2, 10])

In [30]:
os.remove(awkward_input_pathname)