## Imports

In [None]:
! pip install ~/ml4c3

import os
import h5py
import socket
import pprint
import numpy as np
import pandas as pd
from tqdm import tqdm
from typing import List, Union, Dict

import seaborn as sns
from matplotlib import pyplot as plt

from ml4c3.datasets import train_valid_test_datasets
from ml4c3.tensormap.TensorMap import TensorMap, update_tmaps
from definitions.globals import TENSOR_EXT

pp = pprint.PrettyPrinter(indent=4)

%matplotlib inline

## Define TMaps

In [None]:
needed_tensor_maps = [
#     "ecg_age",
#     "av_mean_gradient_365_days_post_ecg_newest",
#     "av_peak_gradient",
#     "echo_datetime_365_days_pre_ecg",
#     "ecg_datetime_365_days_pre_echo_newest",
#     "ecg_age_365_days_pre_echo",
#     "ecg_age_36500_days_pre_sts",
#     "foobar",
]

tmaps_all = {}
tmaps = []
for tmap_name in needed_tensor_maps:
    tmaps_all = update_tmaps(tmap_name=tmap_name, tmaps=tmaps_all)
    if tmap_name in tmaps_all:
        print(f"Successfully created tensor map {tmap_name} with shape {tmaps_all[tmap_name].shape}")
        tmaps.append(tmaps_all[tmap_name])
    else:
        print(f"Could not create {tmap_name}!")

## Set paths to tensors

In [None]:
# List of either:
# 1. String: Path to HD5-containing directory or CSV file
# 2. Tuple: first element is string path to CSV file, second element is friendly name
tensors = [
#     "/storage/shared/ecg/mgh-100-files",
#     (os.path.expanduser("~/dropbox/sts-data/sts-mgh.csv"), "sts"),
#     (os.path.expanduser("~/dropbox/ecgnet-as/data/edw/edw-echo.csv"), "echo"),
    (os.path.expanduser("~/dropbox/ecgnet-as/data/edw/edw-echo-tiny.csv"), "echo"),
]
print("Set path to tensors")

## Generate datasets

In [None]:
batch_size = 32
datasets, stats, cleanups = train_valid_test_datasets(
    tensors=tensors,
    tensor_maps_in=tmaps,
    tensor_maps_out=[],
    batch_size=batch_size,
    num_workers=4,
    valid_ratio=0,
    test_ratio=0,
    cache_off=True,
    mixup_alpha=0,
    allow_empty_split=True,
)
train_dataset, _, _ = datasets

print("Created datasets")

## Iterate to get first batch of tensors

In [None]:
%%timeit -n 1 -r 1

# batch is a tuple; the 1st element has input tensors, 2nd element has output tensors
batch = next(iter(train_dataset))
batch = batch[0]

print("Obtained batch from one iteration of dataset")

for element in range(10):
    print(f"\nSample {element+1}")
    for key, value in batch.items():
        print(f"\tTensorMap: {key} / Value: {value[element]}")
        
print('\n')