# Build tensors in N,T,F format

In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm

def train_to_tensors(train,investment_ids,timesteps):
    train = train.drop(columns="row_id").set_index(["investment_id","time_id"])

    targets = []
    features = []
    for investment_id in tqdm(investment_ids):
        df = train.loc[investment_id].reindex(timesteps)

        targets.append(np.expand_dims(df.target.values,0))
        features.append(np.expand_dims(df.drop(columns="target").values,0))

    targets = np.concatenate(targets,axis=0)
    features = np.concatenate(features,axis=0)
    return targets,features

In [None]:
data_path = "/kaggle/input/ubiquant-csv-to-pickle/"
output_path = "/kaggle/working/"

In [None]:
# Second half
print("Preparing tensor data, second half")
train = pd.read_pickle(f"{data_path}/train.p")
# limit size to avoid oom
train = train[train.time_id>=600]

investment_ids = sorted(train.investment_id.unique())
timesteps = sorted(train.time_id.unique())
targets,features = train_to_tensors(train,investment_ids,timesteps)

pd.to_pickle(targets,f"{output_path}/targets.p")
pd.to_pickle(features,f"{output_path}/features.p")
pd.to_pickle(investment_ids,f"{output_path}/investment_ids.p")
pd.to_pickle(timesteps,f"{output_path}/timesteps.p")

In [None]:
import gc
del train,targets,features
gc.collect()

In [None]:
# First half

print("Preparing tensor data, first half")

train = pd.read_pickle(f"{data_path}/train.p")
# limit size to avoid oom
train = train[train.time_id<600]

investment_ids = sorted(train.investment_id.unique())
timesteps = sorted(train.time_id.unique())
targets,features = train_to_tensors(train,investment_ids,timesteps)

pd.to_pickle(targets,f"{output_path}/targets_start.p")
pd.to_pickle(features,f"{output_path}/features_start.p")
pd.to_pickle(investment_ids,f"{output_path}/investment_ids_start.p")
pd.to_pickle(timesteps,f"{output_path}/timesteps_start.p")