In [1]:
import numpy as np
import torch

from EVE.src.datasources.eve_sequences import EVESequencesBase


In [57]:
dataset = EVESequencesBase(
    'sample/eve_dataset',
    participants_to_use=['train01']
)
dataloader = torch.utils.data.DataLoader(dataset)

In [4]:
np.set_printoptions(suppress=True)

In [58]:
np.unique([inp['subfolder'] for inp in dataloader])

array(['step008_image_MIT-i2263021117', 'step009_image_MIT-i2267703789',
       'step010_image_MIT-istatic-outdoor-street-city-cambridge-uk-IMG-8893',
       'step011_image_MIT-i1325514089', 'step012_image_MIT-i455593157',
       'step013_image_MIT-istatic-outdoor-mallorca-spain-IMG-8527',
       'step014_image_MIT-i391150010', 'step015_image_MIT-i1158217360',
       'step016_image_MIT-i134592788', 'step017_image_MIT-i160271625',
       'step018_image_MIT-i2236294346', 'step019_image_MIT-i20708508',
       'step020_image_MIT-i111373642', 'step021_image_MIT-i12049788',
       'step022_image_MIT-i1107247595', 'step023_image_MIT-i1086742403',
       'step024_image_MIT-i2291485733',
       'step025_image_MIT-ibarcelona-static-street-p9150528',
       'step026_image_MIT-i335910791', 'step027_image_MIT-i1308682205',
       'step030_video_diem-harry-potter-6-trailer',
       'step031_video_diem-movie-trailer-ice-age-3',
       'step032_video_vagba-track01',
       'step033_video_kurzhals-09-b

In [34]:
def get_means(dataloader):
    dataloader = iter(dataloader)
    basler = dict(
        px_per_mm = [],
        matrices = [],
        transformations = [],
    )
    webcam_l = dict(
        px_per_mm = [],
        matrices = [],
        transformations = [],
    )
    webcam_c = dict(
        px_per_mm = [],
        matrices = [],
        transformations = [],
    )
    webcam_r = dict(
        px_per_mm = [],
        matrices = [],
        transformations = [],
    )
    for inp in dataloader:
        if inp['camera'] == ['basler']:
            basler['px_per_mm'].append(inp['pixels_per_millimeter'][:, 0].numpy())
            basler['matrices'].append(inp['camera_matrix'][:, 0].numpy())
            basler['transformations'].append(inp['camera_transformation'][:, 0].numpy())
        if inp['camera'] == ['webcam_l']:
            webcam_l['px_per_mm'].append(inp['pixels_per_millimeter'][:, 0].numpy())
            webcam_l['matrices'].append(inp['camera_matrix'][:, 0].numpy())
            webcam_l['transformations'].append(inp['camera_transformation'][:, 0].numpy())
        if inp['camera'] == ['webcam_c']:
            webcam_c['px_per_mm'].append(inp['pixels_per_millimeter'][:, 0].numpy())
            webcam_c['matrices'].append(inp['camera_matrix'][:, 0].numpy())
            webcam_c['transformations'].append(inp['camera_transformation'][:, 0].numpy())
        if inp['camera'] == ['webcam_r']:
            webcam_r['px_per_mm'].append(inp['pixels_per_millimeter'][:, 0].numpy())
            webcam_r['matrices'].append(inp['camera_matrix'][:, 0].numpy())
            webcam_r['transformations'].append(inp['camera_transformation'][:, 0].numpy())
    return basler, webcam_l, webcam_c, webcam_r

In [48]:
def get_ct_mean(camera_transformations):
    ct = camera_transformations
    return np.array([
        [ct[:, 0, 0].mean(), ct[:, 0, 1].mean(), ct[:, 0, 2].mean(), ct[:, 0, 3].mean()],
        [ct[:, 1, 0].mean(), ct[:, 1, 1].mean(), ct[:, 1, 2].mean(), ct[:, 1, 3].mean()],
        [ct[:, 2, 0].mean(), ct[:, 2, 1].mean(), ct[:, 2, 2].mean(), ct[:, 2, 3].mean()],
        [ct[:, 3, 0].mean(), ct[:, 3, 1].mean(), ct[:, 3, 2].mean(), ct[:, 3, 3].mean()],
    ])

In [49]:
def get_cmtx_mean(camera_matrices):
    cmtx = camera_matrices
    return np.array([
        [cmtx[:, 0, 0].mean(), cmtx[:, 0, 1].mean(), cmtx[:, 0, 2].mean()],
        [cmtx[:, 1, 0].mean(), cmtx[:, 1, 1].mean(), cmtx[:, 1, 2].mean()],
        [cmtx[:, 2, 0].mean(), cmtx[:, 2, 1].mean(), cmtx[:, 2, 2].mean()],
    ])

In [36]:
basler, webcam_l, webcam_c, webcam_r = get_means(dataloader)

# basler:

In [52]:
print(basler['px_per_mm'][0])
print(get_ct_mean(np.concatenate(basler['transformations'])))
print(get_cmtx_mean(np.concatenate(basler['matrices'])))

[[3.471971 3.472669]]
[[  -0.9998208     0.01386724    0.01287778  265.4817    ]
 [   0.0088855     0.94479924   -0.3275297  -421.44678   ]
 [  -0.01670885   -0.32735655   -0.94475293  149.0165    ]
 [   0.            0.            0.            1.        ]]
[[1780.604      0.       959.32904]
 [   0.      1779.855    579.3101 ]
 [   0.         0.         1.     ]]


In [6]:
inp['camera_matrix'][:, 0].numpy()

array([[[1780.6042 ,    0.     ,  959.32886],
        [   0.     , 1779.8547 ,  579.31006],
        [   0.     ,    0.     ,    1.     ]]], dtype=float32)

In [7]:
inp['camera_transformation'][:, 0].numpy()

array([[[  -0.9998209 ,    0.01386724,    0.01287777,  265.4817    ],
        [   0.0088855 ,    0.9447991 ,   -0.32752967, -421.44687   ],
        [  -0.01670884,   -0.32735658,   -0.9447531 ,  149.0165    ],
        [   0.        ,    0.        ,    0.        ,    1.        ]]],
      dtype=float32)

# webcam_l

In [51]:
print(webcam_l['px_per_mm'][0])
print(get_ct_mean(np.concatenate(webcam_l['transformations'])))
print(get_cmtx_mean(np.concatenate(webcam_l['matrices'])))

[[3.471971 3.472669]]
[[-0.99969506 -0.00189966 -0.02461496 21.737684  ]
 [-0.00852217  0.9623024   0.27184793 40.741585  ]
 [ 0.02317062  0.27197483 -0.96202564 -3.7921836 ]
 [ 0.          0.          0.          1.        ]]
[[1413.7068     0.       920.9032 ]
 [   0.      1411.6295   513.69617]
 [   0.         0.         1.     ]]


In [9]:
inp['camera_matrix'][:, 0].numpy()

array([[[1413.7065,    0.    ,  920.903 ],
        [   0.    , 1411.6295,  513.6961],
        [   0.    ,    0.    ,    1.    ]]], dtype=float32)

In [10]:
inp['camera_transformation'][:, 0].numpy()

array([[[-0.9996952 , -0.00189966, -0.02461495, 21.737686  ],
        [-0.00852217,  0.9623025 ,  0.2718479 , 40.74159   ],
        [ 0.02317061,  0.27197483, -0.9620254 , -3.792184  ],
        [ 0.        ,  0.        ,  0.        ,  1.        ]]],
      dtype=float32)

# webcam_c

In [53]:
print(webcam_c['px_per_mm'][0])
print(get_ct_mean(np.concatenate(webcam_c['transformations'])))
print(get_cmtx_mean(np.concatenate(webcam_c['matrices'])))

[[3.471971 3.472669]]
[[ -0.9999926    0.00350132   0.0014209  281.23346   ]
 [  0.00376616   0.9540883    0.2995015   49.301582  ]
 [ -0.00030702   0.2995047   -0.9540948    4.102493  ]
 [  0.           0.           0.           1.        ]]
[[1434.8025    0.      895.1911]
 [   0.     1430.0175  520.2318]
 [   0.        0.        1.    ]]


In [12]:
inp['camera_matrix'][:, 0].numpy()

array([[[1434.8026 ,    0.     ,  895.1911 ],
        [   0.     , 1430.0173 ,  520.23175],
        [   0.     ,    0.     ,    1.     ]]], dtype=float32)

In [13]:
inp['camera_transformation'][:, 0].numpy()

array([[[ -0.99999285,   0.00350132,   0.0014209 , 281.2335    ],
        [  0.00376616,   0.9540884 ,   0.2995015 ,  49.301586  ],
        [ -0.00030702,   0.29950473,  -0.95409477,   4.102493  ],
        [  0.        ,   0.        ,   0.        ,   1.        ]]],
      dtype=float32)

# webcam_r

In [54]:
print(webcam_r['px_per_mm'][0])
print(get_ct_mean(np.concatenate(webcam_r['transformations'])))
print(get_cmtx_mean(np.concatenate(webcam_r['matrices'])))

[[3.471971 3.472669]]
[[ -0.9999392    0.00086562  -0.01097378 548.8291    ]
 [ -0.0025663    0.951102     0.3088666   47.009884  ]
 [  0.01070454   0.30887607  -0.9510421   13.909316  ]
 [  0.           0.           0.           1.        ]]
[[1483.2804     0.       871.09283]
 [   0.      1470.7018   557.36475]
 [   0.         0.         1.     ]]


In [15]:
inp['camera_matrix'][:, 0].numpy()

array([[[1483.2803,    0.    ,  871.0928],
        [   0.    , 1470.7019,  557.3647],
        [   0.    ,    0.    ,    1.    ]]], dtype=float32)

In [16]:
inp['camera_transformation'][:, 0].numpy()

array([[[ -0.9999394 ,   0.00086562,  -0.01097378, 548.8292    ],
        [ -0.0025663 ,   0.9511019 ,   0.30886662,  47.009876  ],
        [  0.01070454,   0.30887607,  -0.95104206,  13.909317  ],
        [  0.        ,   0.        ,   0.        ,   1.        ]]],
      dtype=float32)