In [1]:
import numpy as np
import pandas as pd

# list() vs .tolist()

In [2]:
arr = np.random.random((2,3))
arr

array([[0.40572256, 0.88119198, 0.52848236],
       [0.42259206, 0.2162982 , 0.79586521]])

In [3]:
arr.tolist()

[[0.4057225631080309, 0.8811919800825823, 0.5284823557739861],
 [0.4225920613859705, 0.21629819741347145, 0.7958652117874225]]

In [4]:
type(arr.tolist())

list

In [5]:
type(arr.tolist()[0])

list

`arr.tolist()` is a list of lists. An array is converted into lists in all dimensions.

In [6]:
list(arr)

[array([0.40572256, 0.88119198, 0.52848236]),
 array([0.42259206, 0.2162982 , 0.79586521])]

In [7]:
type(list(arr)[0])

numpy.ndarray

`list(arr)` is a list of arrays. A n-dimensional array is converted into a list of (n-1)-dimensional arrays (in the first dimension only).

# Scalar value inside cell

# 1D array inside cell

In [8]:
arr1d = np.array([1, 2, 3])

df = pd.DataFrame([
    [1, 0, arr1d],
    [2, 42, arr1d * 10],
    [3, 42, arr1d * 100],
    [4, 43, arr1d * 1000],
])
df

Unnamed: 0,0,1,2
0,1,0,"[1, 2, 3]"
1,2,42,"[10, 20, 30]"
2,3,42,"[100, 200, 300]"
3,4,43,"[1000, 2000, 3000]"


In [9]:
df.dtypes

0     int64
1     int64
2    object
dtype: object

In [10]:
signal = df.iloc[:,2]
signal

0             [1, 2, 3]
1          [10, 20, 30]
2       [100, 200, 300]
3    [1000, 2000, 3000]
Name: 2, dtype: object

In [11]:
signal.dtype

dtype('O')

In [12]:
len(signal)

4

In [13]:
type(signal[0])

numpy.ndarray

## Get single cell value

In [14]:
signal[1]  # or: df.iloc[1,2]

array([10, 20, 30])

In [15]:
type(signal[1])  # or: type(df.iloc[1,2])

numpy.ndarray

## Get single value from single cell

In [16]:
df.iloc[1,2][2]

30

In [17]:
type(df.iloc[1,2][2])

numpy.int32

## Get whole data (if we have vector 1 x N, then we want to have as output array NObjects x N) as numpy 2d array

In [18]:
signal

0             [1, 2, 3]
1          [10, 20, 30]
2       [100, 200, 300]
3    [1000, 2000, 3000]
Name: 2, dtype: object

In [19]:
type(signal)

pandas.core.series.Series

In [20]:
signal[1]

array([10, 20, 30])

In [21]:
type(signal[1])

numpy.ndarray

`signal` is a series of 1D-arrays.

In [22]:
np.array(signal)

array([array([1, 2, 3]), array([10, 20, 30]), array([100, 200, 300]),
       array([1000, 2000, 3000])], dtype=object)

In [23]:
signal.to_numpy()

array([array([1, 2, 3]), array([10, 20, 30]), array([100, 200, 300]),
       array([1000, 2000, 3000])], dtype=object)

In [24]:
signal.to_numpy().shape

(4,)

In [25]:
type(signal.to_numpy())

numpy.ndarray

`signal.to_numpy()` is an 1D-array of 1D-arrays.

In [26]:
signal.to_numpy()[1,2]  # won't work

IndexError: too many indices for array

In [27]:
signal.to_numpy()[1][2]

30

Double-indexing is required (instead of multi-indexing).

In [28]:
np.stack(signal.to_numpy())  # or: np.vstack

array([[   1,    2,    3],
       [  10,   20,   30],
       [ 100,  200,  300],
       [1000, 2000, 3000]])

In [29]:
np.stack(signal.to_numpy()).shape

(4, 3)

In [30]:
type(np.stack(signal.to_numpy()))

numpy.ndarray

`np.stack(signal.to_numpy())` is a 2D-array.

## Get whole vector for single object

In [31]:
mask = (df[1] == 42)  # df[1] = df.iloc[:,1]
mask

0    False
1     True
2     True
3    False
Name: 1, dtype: bool

In [32]:
signal[mask]  # or: df[mask].iloc[:,2]

1       [10, 20, 30]
2    [100, 200, 300]
Name: 2, dtype: object

In [33]:
np.stack(signal[mask].to_numpy())

array([[ 10,  20,  30],
       [100, 200, 300]])

In [34]:
np.stack(signal[mask].to_numpy()).shape

(2, 3)

## Get single value from vector for each object

In [35]:
df.iloc[:,2,1]  # won't work

IndexingError: Too many indexers

In [36]:
df.iloc[:,2][1]  # or: signal[1]

array([10, 20, 30])

In [37]:
df.iloc[:,2][:,1]  # or: signal[:,1]

ValueError: Can only tuple-index with a MultiIndex

In [38]:
np.stack(signal.to_numpy())[:,1]

array([   2,   20,  200, 2000])

## Get single value from vector for single object

In [39]:
np.stack(signal[mask].to_numpy())[:,1]

array([ 20, 200])

# 2D array inside cell

In [40]:
arr2d = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])
df = pd.DataFrame([
    [1, 0, arr2d],
    [2, 42, arr2d * 10],
    [3, 42, arr2d * 100],
    [4, 43, arr2d * 1000],
])
df

Unnamed: 0,0,1,2
0,1,0,"[[1, 2, 3], [4, 5, 6], [7, 8, 9]]"
1,2,42,"[[10, 20, 30], [40, 50, 60], [70, 80, 90]]"
2,3,42,"[[100, 200, 300], [400, 500, 600], [700, 800, ..."
3,4,43,"[[1000, 2000, 3000], [4000, 5000, 6000], [7000..."


In [41]:
df.dtypes

0     int64
1     int64
2    object
dtype: object

In [42]:
df.iloc[:,2].dtype

dtype('O')

In [43]:
signal = df.iloc[:,2]
signal

0                    [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
1           [[10, 20, 30], [40, 50, 60], [70, 80, 90]]
2    [[100, 200, 300], [400, 500, 600], [700, 800, ...
3    [[1000, 2000, 3000], [4000, 5000, 6000], [7000...
Name: 2, dtype: object

In [44]:
signal[0].shape

(3, 3)

## Get single cell value

In [45]:
df.iloc[1,2]

array([[10, 20, 30],
       [40, 50, 60],
       [70, 80, 90]])

In [46]:
type(df.iloc[1,2])

numpy.ndarray

In [47]:
df.iloc[1,2][:,1]

array([20, 50, 80])

In [48]:
df.iloc[1,2][2,1]

80

## Get whole data (if we have vector 1 x N, then we want to have as output array NObjects x N) as numpy 2d array

In [49]:
signal

0                    [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
1           [[10, 20, 30], [40, 50, 60], [70, 80, 90]]
2    [[100, 200, 300], [400, 500, 600], [700, 800, ...
3    [[1000, 2000, 3000], [4000, 5000, 6000], [7000...
Name: 2, dtype: object

In [50]:
signal.to_numpy()

array([array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]]),
       array([[10, 20, 30],
       [40, 50, 60],
       [70, 80, 90]]),
       array([[100, 200, 300],
       [400, 500, 600],
       [700, 800, 900]]),
       array([[1000, 2000, 3000],
       [4000, 5000, 6000],
       [7000, 8000, 9000]])], dtype=object)

In [51]:
np.stack(signal.to_numpy())  # or: np.dstack

array([[[   1,    2,    3],
        [   4,    5,    6],
        [   7,    8,    9]],

       [[  10,   20,   30],
        [  40,   50,   60],
        [  70,   80,   90]],

       [[ 100,  200,  300],
        [ 400,  500,  600],
        [ 700,  800,  900]],

       [[1000, 2000, 3000],
        [4000, 5000, 6000],
        [7000, 8000, 9000]]])

In [52]:
np.stack(signal.to_numpy()).shape

(4, 3, 3)

In [53]:
np.stack(signal.to_numpy())[1]

array([[10, 20, 30],
       [40, 50, 60],
       [70, 80, 90]])

In [54]:
np.stack(signal.to_numpy())[:,1]

array([[   4,    5,    6],
       [  40,   50,   60],
       [ 400,  500,  600],
       [4000, 5000, 6000]])

In [55]:
np.stack(signal.to_numpy())[:,:,1]

array([[   2,    5,    8],
       [  20,   50,   80],
       [ 200,  500,  800],
       [2000, 5000, 8000]])

## Get single value from vector for each object

In [56]:
np.stack(signal.to_numpy())[:,1,2]

array([   6,   60,  600, 6000])

## Get whole vector for single object

In [57]:
mask = (df[1] == 42)
mask

0    False
1     True
2     True
3    False
Name: 1, dtype: bool

In [58]:
np.stack(signal[mask].to_numpy())

array([[[ 10,  20,  30],
        [ 40,  50,  60],
        [ 70,  80,  90]],

       [[100, 200, 300],
        [400, 500, 600],
        [700, 800, 900]]])

## Get single value from vector for single object

In [59]:
np.stack(signal[mask].to_numpy())[:,1,2]

array([ 60, 600])

# df.apply()

In [60]:
df = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]])
df

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


In [61]:
df.apply(sum)

0    12
1    15
2    18
dtype: int64

In [62]:
df.apply(sum, axis=1)

0     6
1    15
2    24
dtype: int64

In [63]:
df['sum'] = df.apply(sum, axis=1)
df

Unnamed: 0,0,1,2,sum
0,1,2,3,6
1,4,5,6,15
2,7,8,9,24


# To suffix or not to suffix?

Use-cases:
* Get whole vector for single object
* Get single value from vector for each object
    * With binary mask as well
* Get whole data (if we have vector 1 x N, then we want to have as output array NObjects x N) as numpy 2d array

Usability > performance

Show values in debugger?

## Get whole data (if we have vector 1 x N, then we want to have as output array NObjects x N) as numpy 2d array

In [None]:
dataset.signals['covariance'].to_numpy()

array = np.array((n,6,6))
for row in range(6):
    for col in range(6):
        array[n,row,col] = df[f'covariance_{row}_{col}']

In [None]:
covariance = dataset.signals['covariance']
convert_func(covariance)[:,1,2]  # np.array

dataset.signals['covariance_1_2'].to_numpy()  # pd.Series