# Data Preparation

## Imports

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

from dichasus_cf0x import full_dataset_freq_domain, full_dataset_time_domain
import tensorflow as tf
import numpy as np

## From TensorFlow to NumPy

In [2]:
# freq domain
cfos_raw = []
csi_freq_domain_raw = []
groundtruth_positions_raw = []
snrs_raw = []
timestamps_raw = []

for i, (cfo, csi, pos, snr, time) in enumerate(full_dataset_freq_domain):
    cfos_raw.append(cfo.numpy())
    csi_freq_domain_raw.append(csi.numpy())
    groundtruth_positions_raw.append(pos.numpy())
    snrs_raw.append(snr.numpy())
    timestamps_raw.append(time.numpy())

cfos_raw = np.asarray(cfos_raw)
csi_freq_domain_raw = np.asarray(csi_freq_domain_raw)
groundtruth_positions_raw = np.asarray(groundtruth_positions_raw)
snrs_raw = np.asarray(snrs_raw)
timestamps_raw = np.asarray(timestamps_raw)

indices_raw = np.arange(timestamps_raw.shape[0])

# time domain
csi_time_domain = []

for i, (csi) in enumerate(full_dataset_time_domain):
    csi_time_domain.append(csi.numpy())

csi_time_domain = np.asarray(csi_time_domain)

## Compensate for CFOs

In [3]:
Fc = 1.272e9
Fs = 50.056e6
SUBCARRIERS = csi_freq_domain_raw.shape[-1]

# compensate for CFOs
timestamp_diffs = np.concatenate([[0], np.round(np.diff(timestamps_raw) / 0.048) * 0.048])
cumulative_cfos = np.cumsum(timestamp_diffs[:,np.newaxis] * cfos_raw, axis = 0)

mean_cumulative_cfos = np.mean(cumulative_cfos, axis = 1)
initial_sto = -np.angle(np.sum(csi_freq_domain_raw[0,:,:,:,1:] * np.conj(csi_freq_domain_raw[0,:,:,:,:-1]))) / (2 * np.pi) * SUBCARRIERS
predicted_stos = initial_sto - mean_cumulative_cfos / Fc * Fs
predicted_cpos = -2 * np.pi * mean_cumulative_cfos
phaseshift = np.exp(-1.0j * predicted_cpos).astype(np.csingle)
timeshift = np.exp(-1.0j * 2 * np.pi * np.outer(predicted_stos, np.arange(-SUBCARRIERS // 2, SUBCARRIERS // 2) / SUBCARRIERS)).astype(np.csingle)
predicted_csi = phaseshift[:,np.newaxis] * timeshift

csi_freq_domain = csi_freq_domain_raw * np.conj(predicted_csi[:,np.newaxis,np.newaxis,np.newaxis,:])

csi_freq_domain.shape

(83403, 4, 2, 4, 32)

## Bring indices in correct order

In [4]:
# reduce large timestamp jumps to max. 140 * sample time
for i in range(1,timestamps_raw.shape[0]):
    if (timestamps_raw[i] - timestamps_raw[i-1] >= 0.048*140):
        timestamps_raw[i:] -= (timestamps_raw[i] - (timestamps_raw[i-1] + 0.048*140))

#### get valid indices ####
# create list of true indices based on timestamps
index_list = np.round((timestamps_raw-timestamps_raw[0])/0.048).astype(int)

# valid indices for time series
valid_indices = -1*np.ones((index_list[-1]+1), dtype=np.int32)
valid_indices[index_list] = index_list

# valid indices for raw dataset
raw_valid_indices = -1*np.ones((index_list[-1]+1), dtype=np.int32)
raw_valid_indices[index_list] = indices_raw

# timestamps
timestamps = -1*np.ones((index_list[-1]+1), dtype=np.float32)
timestamps[index_list] = timestamps_raw

# get indices for training set and prediction set
filter_train = np.arange(valid_indices.shape[0]) % 4 == 0
filter_pred = (np.arange(valid_indices.shape[0]) + 2) % 4 == 0

# get valid indices for training set and prediction set
valid_indices_train = valid_indices[filter_train]
valid_indices_pred = valid_indices[filter_pred]

# timestamps for training set and prediction set
timestamps_train = timestamps[valid_indices_train[np.where(valid_indices_train>=0)]]
timestamps_pred = timestamps[valid_indices_pred[np.where(valid_indices_pred>=0)]]

# get valid indices in raw dataset for training and prediction set
raw_valid_indices_train = raw_valid_indices[valid_indices_train[np.where(valid_indices_train>=0)]]
raw_valid_indices_pred = raw_valid_indices[valid_indices_pred[np.where(valid_indices_pred>=0)]]

In [5]:
# training set
csi_freq_domain_train = csi_freq_domain[raw_valid_indices_train]
csi_time_domain_train = csi_time_domain[raw_valid_indices_train]
groundtruth_positions_train = groundtruth_positions_raw[raw_valid_indices_train]
snrs_train = snrs_raw[raw_valid_indices_train]

# prediction set
csi_freq_domain_pred = csi_freq_domain[raw_valid_indices_pred]
csi_time_domain_pred = csi_time_domain[raw_valid_indices_pred]
groundtruth_positions_pred = groundtruth_positions_raw[raw_valid_indices_pred]

## Save Data as NumPy Files

In [6]:
# training set
np.save('dataset/csi_freq_domain_train.npy', csi_freq_domain_train)
np.save('dataset/csi_time_domain_train.npy', csi_time_domain_train)
np.save('dataset/groundtruth_positions_train.npy', groundtruth_positions_train)
np.save('dataset/timestamps_train.npy', timestamps_train)

# prediction set
np.save('dataset/csi_freq_domain_pred.npy', csi_freq_domain_pred)
np.save('dataset/csi_time_domain_pred.npy', csi_time_domain_pred)
np.save('dataset/groundtruth_positions_pred.npy', groundtruth_positions_pred)
np.save('dataset/timestamps_pred.npy', timestamps_pred)