In [1]:
import os
import pandas as pd
import numpy as np
import h5py

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV

from keras.models import Sequential
from keras.layers import Dense

In [None]:
# An example of load labelled data.
# The unlabelled data and test data have similar format
!gdown --id '18oSQy3eRQKP_rufwyhfmbw8RGafgeH7h' --output "labelled.zip"
!gdown --id '1T55aW7f6dLvsC2jyCCrVq4z05UWVObnt' --output "unlabelled.zip"
!gdown --id '1AekpiyN14HejCKWocWsl7Yj-I_QKOUTo' --output "test.zip"

In [22]:
!unzip -q "labelled.zip" -d "./"
!unzip -q "unlabelled.zip" -d "./"
!unzip -q "test.zip" -d "./"

In [2]:
labelled = "labelled_data/"
unlabelled = "unlabelled_data/"
test = "test_data/"

In [3]:
def get_unlabelled_test_data(data_file):
    f = h5py.File(data_file, 'r')
    H_Re = f['H_Re'][:] 
    H_Im = f['H_Im'][:] 
    SNR = f['SNR'][:] 
    f.close()

    return H_Re, H_Im, SNR

def get_labelled_data(data_file):
    f = h5py.File(data_file, 'r')
    H_Re = f['H_Re'][:] 
    H_Im = f['H_Im'][:]
    SNR = f['SNR'][:] 
    Pos = f['Pos'][:] 
    f.close()

    return H_Re, H_Im, SNR, Pos

In [4]:
print("Files in unlabelled data:", len([f for f in os.listdir('unlabelled_data') if f.endswith('.hdf5')]))
print("Files in test data:", len([f for f in os.listdir('test') if f.endswith('.hdf5')]))
print("Files in labelled data:", len([f for f in os.listdir('labelled_data') if f.endswith('.hdf5')]))

Files in unlabelled data: 64
Files in test data: 1
Files in labelled data: 8


In [5]:
unlabelled_H_Re, unlabelled_H_Im, unlabelled_SNR = get_unlabelled_test_data("unlabelled_data/file_1.hdf5")

for i in range(2,64):
    H_Re_i, H_Im_i, SNR_i = get_unlabelled_test_data("unlabelled_data/file_"+str(i)+".hdf5")
    np.concatenate((unlabelled_H_Re, H_Re_i))
    np.concatenate((unlabelled_H_Im, H_Im_i))
    np.concatenate((unlabelled_SNR, SNR_i))

print("Shape of unlabelled data:")
print("unlabelled_H_Re is of shape {}".format(unlabelled_H_Re.shape))
print("unlabelled_H_Im is of shape {}".format(unlabelled_H_Im.shape))
print("unlabelled_SNR is of shape {}".format(unlabelled_SNR.shape))

Shape of unlabelled data:
unlabelled_H_Re is of shape (512, 56, 924, 5)
unlabelled_H_Im is of shape (512, 56, 924, 5)
unlabelled_SNR is of shape (512, 56, 5)


In [6]:
test_H_Re, test_H_Im, test_SNR = get_unlabelled_test_data("test/file_1.hdf5")

print("Shape of test data:")
print("test_H_Re is of shape {}".format(test_H_Re.shape))
print("test_H_Im is of shape {}".format(test_H_Im.shape))
print("test_SNR is of shape {}".format(test_SNR.shape))

Shape of test data:
test_H_Re is of shape (883, 56, 924, 5)
test_H_Im is of shape (883, 56, 924, 5)
test_SNR is of shape (883, 56, 5)


In [7]:
labelled_H_Re, labelled_H_Im, labelled_SNR, labelled_Pos = get_labelled_data("labelled_data/file_1.hdf5")

for i in range(2,9):
    H_Re_i, H_Im_i, SNR_i, Pos_i = get_labelled_data("labelled_data/file_"+str(i)+".hdf5")
    np.concatenate((labelled_H_Re, H_Re_i))
    np.concatenate((labelled_H_Im, H_Im_i))
    np.concatenate((labelled_SNR, SNR_i))
    np.concatenate((labelled_Pos, Pos_i))

print("Shape of labelled data:")
print("labelled_H_Re is of shape {}".format(labelled_H_Re.shape))
print("labelled_H_Im is of shape {}".format(labelled_H_Im.shape))
print("labelled_SNR is of shape {}".format(labelled_SNR.shape))
print("labelled_Pos is of shape {}".format(labelled_Pos.shape))

Shape of labelled data:
labelled_H_Re is of shape (512, 56, 924, 5)
labelled_H_Im is of shape (512, 56, 924, 5)
labelled_SNR is of shape (512, 56, 5)
labelled_Pos is of shape (512, 3)


In [8]:
def create_autoencoder(input_dim):
    model = Sequential()
    model.add(Dense(128, input_dim=input_dim, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(input_dim, activation='linear'))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [9]:
def extract_features(autoencoder, unlabelled_data):
    return autoencoder.predict(unlabelled_data)

In [10]:
model = make_pipeline(StandardScaler(), RandomForestRegressor())

In [11]:
X_train, X_val, y_train, y_val = train_test_split(labelled_H_Re, labelled_Pos, test_size=0.2, random_state=42)

X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_val_flat = X_val.reshape(X_val.shape[0], -1)
test_data_flat = test_H_Re.reshape(test_H_Re.shape[0], -1)

In [12]:
model.fit(X_train_flat, y_train)

In [13]:
val_predictions_flat = model.predict(X_val_flat)
mae_val = mean_absolute_error(y_val, val_predictions_flat)
print(f'MAE on validation set: {mae_val}')

MAE on validation set: 58.84740355491467


In [14]:
test_predictions_flat = model.predict(test_data_flat)

In [15]:
submission_df = pd.DataFrame({
    'id': range(1, len(test_predictions_flat) + 1),
    'x': test_predictions_flat[:, 0],
    'y': test_predictions_flat[:, 1],
    'z': test_predictions_flat[:, 2]
})

submission_df.to_csv('user_localization_submission.csv', index=False)