In [1]:
import pandas as pd

In [2]:
data_path = "data/combined/"
data_file = "amplitude_csi_dataframe.pkl"

DISCRETE_VARIABLES = ["person"]
TARGET_VARIABLE = "position"
STATE = 42

data_df: pd.DataFrame = pd.read_pickle(data_path + data_file)

# Convert all column names to strings
data_df.columns = data_df.columns.astype(str)

print(data_df.head())

   person  position            6            7            8            9  \
0       1        17   795.910156   849.388000   890.166809   912.882263   
1       1        17   798.279419   843.614258   868.484314   895.013977   
2       1        17  1064.543091  1086.945312  1105.320312  1135.975342   
3       1        17  1060.771362  1092.156128  1112.137573  1130.086670   
4       1        17  1329.939087  1409.457397  1416.469604  1432.482056   

            10           11           12           13  ...        241  \
0   946.926086   979.547363  1059.871704  1146.253052  ...  52.201534   
1   921.195984   935.745667  1021.312866  1114.450928  ...  33.526108   
2  1170.029907  1180.800171  1267.976318  1369.742310  ...  47.539455   
3  1138.223999  1126.414673  1193.649902  1277.542969  ...  26.870058   
4  1445.448364  1401.856567  1508.621948  1599.805298  ...  51.156624   

         242        243        244        245         246         247  \
0  41.773197  24.515301  16.552946  3

In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

numeric_transformer = Pipeline(
    steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())]
)

numerical_columns = [
    col
    for col in data_df.columns
    if col not in DISCRETE_VARIABLES and col != TARGET_VARIABLE
]

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numerical_columns),
        ("cat", "passthrough", DISCRETE_VARIABLES),
    ]
)

X = data_df.drop(columns=[TARGET_VARIABLE])
X = preprocessor.fit_transform(X)
y = data_df[TARGET_VARIABLE]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=STATE
)

print(X_train[:5])
print(y_train[:5])
print(X_test[:5])
print(y_test[:5])

[[ 0.38642046  0.29976055  0.21416575 ...  2.7544522  -0.07609867
  44.        ]
 [-0.69760364 -0.6823467  -0.6444642  ... -0.4048676  -0.7114199
  26.        ]
 [-0.6339469  -0.64925706 -0.6274046  ... -0.6062821   0.19943404
  52.        ]
 [-0.88174653 -0.9246833  -0.8360618  ... -0.37353423 -0.69077873
   8.        ]
 [ 1.1035314   1.0955558   1.1070964  ... -0.4393156   0.4879667
  49.        ]]
1559868    17
903800     12
1865988     8
277030      7
1732226    13
Name: position, dtype: uint8
[[-0.21375568 -0.2730182  -0.200793   ... -0.86679655 -0.8598902
  46.        ]
 [-1.2045823  -1.1377039  -1.172124   ...  0.9736769  -0.10447231
  10.        ]
 [-1.7443719  -1.7738211  -1.8353074  ... -0.5750575  -0.17337221
  33.        ]
 [-1.4034363  -1.4370869  -1.4509764  ... -0.74591374 -0.72425234
  56.        ]
 [-1.1189996  -1.1041296  -1.0781889  ... -0.5311101  -0.33968678
  32.        ]]
1625434    13
323710     10
1151379     0
1993553     1
1135407     3
Name: position, dtype:

In [4]:
import torch
from torch.utils.data import DataLoader, TensorDataset

inputs = 235
batch_size = 512
num_workers = 16
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Convert the data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
)
test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True,
)

print(train_loader.dataset.tensors)
print(test_loader.dataset.tensors)

(tensor([[ 3.8642e-01,  2.9976e-01,  2.1417e-01,  ...,  2.7545e+00,
         -7.6099e-02,  4.4000e+01],
        [-6.9760e-01, -6.8235e-01, -6.4446e-01,  ..., -4.0487e-01,
         -7.1142e-01,  2.6000e+01],
        [-6.3395e-01, -6.4926e-01, -6.2740e-01,  ..., -6.0628e-01,
          1.9943e-01,  5.2000e+01],
        ...,
        [ 1.2202e+00,  1.2272e+00,  1.2493e+00,  ..., -4.1676e-02,
          7.6974e-01,  4.0000e+00],
        [-1.7210e+00, -1.6329e+00, -1.5727e+00,  ...,  2.0413e-01,
         -1.5632e-01,  1.9000e+01],
        [-3.7073e-01, -3.5631e-01, -3.4066e-01,  ..., -7.8024e-01,
         -4.3450e-01,  4.0000e+00]]), tensor([17, 12,  8,  ...,  2,  5, 12]))
(tensor([[-0.2138, -0.2730, -0.2008,  ..., -0.8668, -0.8599, 46.0000],
        [-1.2046, -1.1377, -1.1721,  ...,  0.9737, -0.1045, 10.0000],
        [-1.7444, -1.7738, -1.8353,  ..., -0.5751, -0.1734, 33.0000],
        ...,
        [ 1.3852,  1.5208,  1.3748,  ..., -0.7631, -0.7330, 35.0000],
        [-0.8822, -0.9156, -0.87

In [5]:
from models.mlp import CsiNeuralNet

num_epochs = 1000
net = CsiNeuralNet(inputs, device)

result_model = net.train_model(train_loader, num_epochs)
torch.save(result_model, "mlp_model.pth")

Training on device: cuda
Epoch 1/1000, Accuracy: 39.80%, Loss: 1.97261509, In 19.88s
Model saved for epoch 10 with accuracy 78.19%
Epoch 10/1000, Accuracy: 78.19%, Loss: 0.70194657, In 201.41s
Model saved for epoch 19 with accuracy 82.98%
Epoch 20/1000, Accuracy: 82.00%, Loss: 0.56723515, In 232.09s
Model saved for epoch 28 with accuracy 86.27%
Epoch 30/1000, Accuracy: 85.98%, Loss: 0.42891366, In 232.55s
Model saved for epoch 39 with accuracy 89.00%
Epoch 40/1000, Accuracy: 87.54%, Loss: 0.37753882, In 227.63s
Model saved for epoch 46 with accuracy 90.81%
Epoch 50/1000, Accuracy: 90.12%, Loss: 0.29687983, In 229.77s
Model saved for epoch 59 with accuracy 93.82%
Epoch 60/1000, Accuracy: 92.17%, Loss: 0.23537457, In 226.62s
Model saved for epoch 69 with accuracy 94.33%
Epoch 70/1000, Accuracy: 93.04%, Loss: 0.21098351, In 227.94s
Model saved for epoch 72 with accuracy 95.40%
Epoch 80/1000, Accuracy: 94.81%, Loss: 0.15946127, In 227.20s
Model saved for epoch 88 with accuracy 96.83%
Epoch

In [6]:
predictions = net.test_model(test_loader)

Testing on device: cuda
Accuracy: 78.26%


In [7]:
print(predictions)

[13, 16, 0, 1, 3, 6, 9, 16, 5, 16, 15, 2, 2, 13, 16, 9, 0, 17, 16, 13, 4, 15, 5, 12, 6, 9, 16, 1, 11, 8, 2, 10, 6, 7, 12, 17, 4, 2, 1, 7, 0, 7, 3, 9, 8, 8, 8, 12, 1, 8, 4, 11, 7, 4, 11, 15, 12, 1, 17, 17, 12, 0, 3, 3, 0, 16, 6, 3, 13, 2, 9, 2, 17, 7, 8, 8, 9, 5, 14, 5, 5, 1, 0, 1, 16, 5, 14, 17, 4, 14, 6, 2, 10, 4, 4, 17, 1, 1, 11, 11, 17, 16, 2, 10, 5, 5, 3, 12, 9, 10, 2, 5, 9, 15, 15, 6, 9, 4, 2, 3, 10, 2, 6, 8, 11, 17, 14, 5, 9, 13, 5, 3, 10, 10, 8, 6, 13, 10, 3, 16, 5, 10, 9, 1, 15, 0, 13, 7, 8, 11, 14, 7, 2, 9, 3, 4, 8, 11, 7, 8, 7, 15, 1, 9, 6, 16, 6, 2, 12, 16, 8, 12, 9, 7, 8, 14, 13, 8, 11, 11, 6, 15, 4, 11, 9, 12, 1, 14, 9, 7, 6, 6, 9, 1, 0, 15, 15, 8, 10, 12, 15, 0, 14, 1, 11, 5, 11, 3, 8, 4, 13, 4, 0, 4, 11, 13, 9, 3, 8, 7, 10, 2, 6, 1, 13, 13, 0, 4, 9, 17, 14, 10, 16, 9, 9, 6, 15, 17, 7, 16, 9, 14, 16, 3, 0, 9, 9, 0, 14, 6, 15, 6, 5, 2, 7, 16, 14, 11, 8, 11, 16, 0, 17, 0, 14, 0, 6, 9, 5, 10, 14, 4, 9, 15, 10, 14, 7, 7, 4, 5, 14, 10, 6, 1, 0, 7, 2, 14, 1, 8, 9, 10, 7, 2, 0, 

In [8]:
data = {"expected": y_test_tensor, "predictions": predictions}
df = pd.DataFrame(data)
df.to_csv("results.csv", index=False)
print(df.head())

   expected  predictions
0        13           13
1        10           16
2         0            0
3         1            1
4         3            3
