In [1]:
import pandas as pd

In [2]:
data_path = "../data/combined/"
data_file = "amplitude_csi_dataframe.pkl"

DISCRETE_VARIABLES = ["person"]
TARGET_VARIABLE = "position"
STATE = 42

data_df: pd.DataFrame = pd.read_pickle(data_path + data_file)
columns_to_drop = [
    col for col in data_df.columns if isinstance(col, (int)) and col > 60
]

data_df.drop(columns=columns_to_drop, inplace=True)
total_columns = len(data_df.columns)

# Convert all column names to strings
data_df.columns = data_df.columns.astype(str)

print(total_columns)
print(data_df.columns)
print(data_df.head())

55
Index(['person', 'position', '6', '7', '8', '9', '10', '11', '12', '13', '14',
       '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '26', '27',
       '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39',
       '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51',
       '52', '54', '55', '56', '57', '58', '59', '60'],
      dtype='object')
   person  position            6            7            8            9  \
0       1        17   795.910156   849.388000   890.166809   912.882263   
1       1        17   798.279419   843.614258   868.484314   895.013977   
2       1        17  1064.543091  1086.945312  1105.320312  1135.975342   
3       1        17  1060.771362  1092.156128  1112.137573  1130.086670   
4       1        17  1329.939087  1409.457397  1416.469604  1432.482056   

            10           11           12           13  ...           50  \
0   946.926086   979.547363  1059.871704  1146.253052  ...  1196.047607   
1 

In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

numeric_transformer = Pipeline(
    steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())]
)

numerical_columns = [
    col
    for col in data_df.columns
    if col not in DISCRETE_VARIABLES and col != TARGET_VARIABLE
]

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numerical_columns),
        ("cat", "passthrough", DISCRETE_VARIABLES),
    ]
)

X = data_df.drop(columns=[TARGET_VARIABLE])
X = preprocessor.fit_transform(X)
y = data_df[TARGET_VARIABLE]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=STATE
)

print(X_train[:5])
print(y_train[:5])
print(X_test[:5])
print(y_test[:5])

[[ 3.86420459e-01  2.99760550e-01  2.14165747e-01  1.34595379e-01
   4.12062481e-02 -5.14612645e-02 -5.58421127e-02 -1.57581687e-01
  -1.79050624e-01 -2.17031330e-01 -2.02907100e-01 -1.93499207e-01
  -1.57181755e-01 -3.31633165e-02  1.03215985e-01  2.09063992e-01
   3.63035977e-01  5.20033300e-01  7.18070447e-01  1.11974823e+00
   1.34288454e+00  1.52173138e+00  1.75008583e+00  1.94033062e+00
   1.88802719e+00  3.42168593e+00  2.11799693e+00  2.19798589e+00
   2.11926270e+00  1.98476148e+00  1.83936405e+00  1.87860620e+00
   1.72248757e+00  1.47795570e+00  1.45548248e+00  1.14400971e+00
   9.06879425e-01  7.62229443e-01  4.31666166e-01  2.07606107e-01
   1.83341429e-02 -8.50208029e-02 -2.38454506e-01 -4.36978966e-01
  -5.80921113e-01 -7.28283882e-01 -1.08006287e+00 -1.20785308e+00
  -1.28778493e+00 -1.45514607e+00 -1.54684305e+00 -5.23569047e-01
  -5.66832840e-01  4.40000000e+01]
 [-6.97603643e-01 -6.82346702e-01 -6.44464195e-01 -6.40488207e-01
  -6.10596836e-01 -5.80753505e-01 -5.4399

In [4]:
import torch
from torch.utils.data import DataLoader, TensorDataset

inputs = total_columns - 1
batch_size = 512
num_workers = 16
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Convert the data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
)
test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True,
)

print(train_loader.dataset.tensors)
print(test_loader.dataset.tensors)

(tensor([[ 0.3864,  0.2998,  0.2142,  ..., -0.5236, -0.5668, 44.0000],
        [-0.6976, -0.6823, -0.6445,  ...,  0.8655,  0.8509, 26.0000],
        [-0.6339, -0.6493, -0.6274,  ..., -1.1731, -1.1522, 52.0000],
        ...,
        [ 1.2202,  1.2272,  1.2493,  ...,  1.6093,  1.5470,  4.0000],
        [-1.7210, -1.6329, -1.5727,  ...,  1.6795,  1.7027, 19.0000],
        [-0.3707, -0.3563, -0.3407,  ...,  0.5436,  0.5145,  4.0000]]), tensor([17, 12,  8,  ...,  2,  5, 12]))
(tensor([[-0.2138, -0.2730, -0.2008,  ..., -1.1744, -1.1432, 46.0000],
        [-1.2046, -1.1377, -1.1721,  ..., -1.1571, -1.1429, 10.0000],
        [-1.7444, -1.7738, -1.8353,  ...,  1.2261,  1.2260, 33.0000],
        ...,
        [ 1.3852,  1.5208,  1.3748,  ..., -1.1643, -1.1480, 35.0000],
        [-0.8822, -0.9156, -0.8782,  ..., -1.1681, -1.1030, 56.0000],
        [-0.1381, -0.1438, -0.1963,  ..., -1.1750, -1.1440, 43.0000]]), tensor([13, 10,  0,  ..., 16,  3,  5]))


In [5]:
from mlp import CsiNeuralNet

save_path = "reduced_mlp/"
num_epochs = 1000
net = CsiNeuralNet(inputs, device)

result_model = net.train_model(train_loader, save_path, num_epochs)
torch.save(result_model, "mlp_model.pth")

Training on device: cuda
Epoch 1/1000, Accuracy: 33.70%, Loss: 2.15697715, In 19.28s
Model saved for epoch 10 with accuracy 76.20%
Epoch 10/1000, Accuracy: 76.20%, Loss: 0.79391069, In 189.73s
Model saved for epoch 19 with accuracy 80.50%
Epoch 20/1000, Accuracy: 80.00%, Loss: 0.65277985, In 217.29s
Model saved for epoch 30 with accuracy 82.84%
Epoch 30/1000, Accuracy: 82.84%, Loss: 0.54786452, In 219.28s
Model saved for epoch 40 with accuracy 84.14%
Epoch 40/1000, Accuracy: 84.14%, Loss: 0.49972294, In 218.22s
Model saved for epoch 50 with accuracy 84.91%
Epoch 50/1000, Accuracy: 84.91%, Loss: 0.47044012, In 228.92s
Model saved for epoch 60 with accuracy 85.60%
Epoch 60/1000, Accuracy: 85.60%, Loss: 0.44558119, In 219.60s
Model saved for epoch 70 with accuracy 86.16%
Epoch 70/1000, Accuracy: 86.16%, Loss: 0.42512877, In 219.65s
Model saved for epoch 79 with accuracy 86.69%
Epoch 80/1000, Accuracy: 86.63%, Loss: 0.40765439, In 219.40s
Model saved for epoch 88 with accuracy 87.04%
Epoch

KeyboardInterrupt: 

In [None]:
predictions = net.test_model(test_loader)