In [5]:
import pandas as pd

In [6]:
data_path = "../data/combined/"
data_file = "amplitude_csi_dataframe.pkl"

DISCRETE_VARIABLES = ["person"]
TARGET_VARIABLE = "position"
STATE = 42

min_subcarrier = 0
max_subcarrier = 60

data_df: pd.DataFrame = pd.read_pickle(data_path + data_file)
columns_to_drop = [
    col
    for col in data_df.columns
    if isinstance(col, (int)) and (col > max_subcarrier or col < min_subcarrier)
]

data_df.drop(columns=columns_to_drop, inplace=True)
total_columns = len(data_df.columns)

# Convert all column names to strings
data_df.columns = data_df.columns.astype(str)

print(total_columns)
print(data_df.columns)
print(data_df.head())

55
Index(['person', 'position', '6', '7', '8', '9', '10', '11', '12', '13', '14',
       '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '26', '27',
       '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39',
       '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51',
       '52', '54', '55', '56', '57', '58', '59', '60'],
      dtype='object')
   person  position            6            7            8            9  \
0       1        17   795.910156   849.388000   890.166809   912.882263   
1       1        17   798.279419   843.614258   868.484314   895.013977   
2       1        17  1064.543091  1086.945312  1105.320312  1135.975342   
3       1        17  1060.771362  1092.156128  1112.137573  1130.086670   
4       1        17  1329.939087  1409.457397  1416.469604  1432.482056   

            10           11           12           13  ...           50  \
0   946.926086   979.547363  1059.871704  1146.253052  ...  1196.047607   
1 

In [7]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

numeric_transformer = Pipeline(
    steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())]
)

numerical_columns = [
    col
    for col in data_df.columns
    if col not in DISCRETE_VARIABLES and col != TARGET_VARIABLE
]

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", "passthrough", DISCRETE_VARIABLES),
        ("num", numeric_transformer, numerical_columns),
    ]
)

X = data_df.drop(columns=[TARGET_VARIABLE])
X = preprocessor.fit_transform(X)
y = data_df[TARGET_VARIABLE]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=STATE
)

print(X_train[:5])
print(y_train[:5])
print(X_test[:5])
print(y_test[:5])

[[ 4.40000000e+01  3.86420459e-01  2.99760550e-01  2.14165747e-01
   1.34595379e-01  4.12062481e-02 -5.14612645e-02 -5.58421127e-02
  -1.57581687e-01 -1.79050624e-01 -2.17031330e-01 -2.02907100e-01
  -1.93499207e-01 -1.57181755e-01 -3.31633165e-02  1.03215985e-01
   2.09063992e-01  3.63035977e-01  5.20033300e-01  7.18070447e-01
   1.11974823e+00  1.34288454e+00  1.52173138e+00  1.75008583e+00
   1.94033062e+00  1.88802719e+00  3.42168593e+00  2.11799693e+00
   2.19798589e+00  2.11926270e+00  1.98476148e+00  1.83936405e+00
   1.87860620e+00  1.72248757e+00  1.47795570e+00  1.45548248e+00
   1.14400971e+00  9.06879425e-01  7.62229443e-01  4.31666166e-01
   2.07606107e-01  1.83341429e-02 -8.50208029e-02 -2.38454506e-01
  -4.36978966e-01 -5.80921113e-01 -7.28283882e-01 -1.08006287e+00
  -1.20785308e+00 -1.28778493e+00 -1.45514607e+00 -1.54684305e+00
  -5.23569047e-01 -5.66832840e-01]
 [ 2.60000000e+01 -6.97603643e-01 -6.82346702e-01 -6.44464195e-01
  -6.40488207e-01 -6.10596836e-01 -5.8075

In [8]:
save_path = "../data/reduced_train_test_split/"

def save_pkl(obj: object, path: str) -> None:
    with open(path, "wb") as f:
        pd.to_pickle(obj, f)

save_pkl(X_train, save_path + "X_train.pkl")
save_pkl(y_train, save_path + "y_train.pkl")
save_pkl(X_test, save_path + "X_test.pkl")
save_pkl(y_test, save_path + "y_test.pkl")

print("Data saved to", save_path)

Data saved to ../data/reduced_train_test_split/


In [9]:
import torch

# Convert the data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

torch.save(X_train_tensor, save_path + "X_train.pt")
torch.save(y_train_tensor, save_path + "y_train.pt")
torch.save(X_test_tensor, save_path + "X_test.pt")
torch.save(y_test_tensor, save_path + "y_test.pt")

print("Data saved to", save_path)

Data saved to ../data/reduced_train_test_split/


In [10]:
train_X = torch.load(save_path + "X_train.pt")
train_y = torch.load(save_path + "y_train.pt")
test_X = torch.load(save_path + "X_test.pt")
test_y = torch.load(save_path + "y_test.pt")

print(train_X[:5])
print(train_y[:5])
print(test_X[:5])
print(test_y[:5])

  train_X = torch.load(save_path + "X_train.pt")
  train_y = torch.load(save_path + "y_train.pt")
  test_X = torch.load(save_path + "X_test.pt")


tensor([[ 4.4000e+01,  3.8642e-01,  2.9976e-01,  2.1417e-01,  1.3460e-01,
          4.1206e-02, -5.1461e-02, -5.5842e-02, -1.5758e-01, -1.7905e-01,
         -2.1703e-01, -2.0291e-01, -1.9350e-01, -1.5718e-01, -3.3163e-02,
          1.0322e-01,  2.0906e-01,  3.6304e-01,  5.2003e-01,  7.1807e-01,
          1.1197e+00,  1.3429e+00,  1.5217e+00,  1.7501e+00,  1.9403e+00,
          1.8880e+00,  3.4217e+00,  2.1180e+00,  2.1980e+00,  2.1193e+00,
          1.9848e+00,  1.8394e+00,  1.8786e+00,  1.7225e+00,  1.4780e+00,
          1.4555e+00,  1.1440e+00,  9.0688e-01,  7.6223e-01,  4.3167e-01,
          2.0761e-01,  1.8334e-02, -8.5021e-02, -2.3845e-01, -4.3698e-01,
         -5.8092e-01, -7.2828e-01, -1.0801e+00, -1.2079e+00, -1.2878e+00,
         -1.4551e+00, -1.5468e+00, -5.2357e-01, -5.6683e-01],
        [ 2.6000e+01, -6.9760e-01, -6.8235e-01, -6.4446e-01, -6.4049e-01,
         -6.1060e-01, -5.8075e-01, -5.4399e-01, -5.4720e-01, -5.2102e-01,
         -4.8339e-01, -4.4630e-01, -4.3467e-01, -4

  test_y = torch.load(save_path + "y_test.pt")
