In [1]:
import torch
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import lightgbm as lgb

class UJIDataset(torch.utils.data.Dataset):
    def __init__(self, root, train = True, transform = None, target_transform = None, download = False):
        self.root = root
        dir_path = self.root + '/UJIndoorLoc'
        zip_path = self.root + '/uji_uil.zip'
        dataset_training_file = dir_path + '/trainingData.csv'
        dataset_validation_file = dir_path + '/validationData.csv'
        # Load independent variables (WAPs values)
        if train:
            dataset_file = dataset_training_file
        else:
            dataset_file = dataset_validation_file
        file = open(dataset_file, 'r')
        # Load labels
        label = file.readline()
        label = label.split(',')
        # Load independent variables
        file_load = np.loadtxt(file, delimiter = ',', skiprows = 1)
        #file_load_label = np.loadtxt(file, delimiter = ',')
        #data = np.genfromtxt(file, dtype = float, delimiter = ',', names = True)
        # RSSI values
        self.x = file_load[:, 0 : 520]
        # Load dependent variables
        self.y = file_load[:, 520 : 524]
        # Divide labels into x and y
        self.x_label = label[0 : 520]
        self.x_label = np.concatenate([self.x_label, label[524: 529]])
        self.y_label = label[520 : 524]
        # Regularization of independent variables
        self.x[self.x == 100] = np.nan    # WAP not detected
        self.x = self.x + 104             # Convert into positive values
        self.x = self.x / 104             # Regularize into scale between 0 and 1
        # Building ID, Space ID, Relative Position, User ID, Phone ID and Timestamp respectively
        self.x = np.concatenate([self.x, file_load[:, 524 : 529]], axis = 1)
        file.close()
        # Reduce the number of dependent variables by combining building number and floor into one variable: area
        self.area = self.y[:, 3] * 5 + self.y[:, 2]
    def to_tensor(self):
        self.x = torch.from_numpy(self.x).float()
        self.y = torch.from_numpy(self.y).float()
        self.area = torch.from_numpy(self.area).float()
    def nan_to_zero(self):
        self.x = np.nan_to_num(self.x)
    # Return the target instance (row)
    def __getitem__(self, index_row):
        return self.x[index_row, :], self.y[index_row, :]
    # Return the number of instances (the number of rows)
    def __len__(self, dim = 0):
        return int(self.x.size()[0])

Import dataset with lightgbm

In [2]:
a = torch.zeros([2,2])
print(a)
b = torch.ones([2,2])
c = torch.cat((a, b), dim = 1)
print(c)

tensor([[0., 0.],
        [0., 0.]])
tensor([[0., 0., 1., 1.],
        [0., 0., 1., 1.]])


In [3]:
# Load training dataset
dataset_train = UJIDataset('./data', train = True)
dataset_validate = UJIDataset('./data', train = False)
#gb_train_data = lgb.Dataset(

In [4]:
print(dataset_train.x_label)

['WAP001' 'WAP002' 'WAP003' 'WAP004' 'WAP005' 'WAP006' 'WAP007' 'WAP008'
 'WAP009' 'WAP010' 'WAP011' 'WAP012' 'WAP013' 'WAP014' 'WAP015' 'WAP016'
 'WAP017' 'WAP018' 'WAP019' 'WAP020' 'WAP021' 'WAP022' 'WAP023' 'WAP024'
 'WAP025' 'WAP026' 'WAP027' 'WAP028' 'WAP029' 'WAP030' 'WAP031' 'WAP032'
 'WAP033' 'WAP034' 'WAP035' 'WAP036' 'WAP037' 'WAP038' 'WAP039' 'WAP040'
 'WAP041' 'WAP042' 'WAP043' 'WAP044' 'WAP045' 'WAP046' 'WAP047' 'WAP048'
 'WAP049' 'WAP050' 'WAP051' 'WAP052' 'WAP053' 'WAP054' 'WAP055' 'WAP056'
 'WAP057' 'WAP058' 'WAP059' 'WAP060' 'WAP061' 'WAP062' 'WAP063' 'WAP064'
 'WAP065' 'WAP066' 'WAP067' 'WAP068' 'WAP069' 'WAP070' 'WAP071' 'WAP072'
 'WAP073' 'WAP074' 'WAP075' 'WAP076' 'WAP077' 'WAP078' 'WAP079' 'WAP080'
 'WAP081' 'WAP082' 'WAP083' 'WAP084' 'WAP085' 'WAP086' 'WAP087' 'WAP088'
 'WAP089' 'WAP090' 'WAP091' 'WAP092' 'WAP093' 'WAP094' 'WAP095' 'WAP096'
 'WAP097' 'WAP098' 'WAP099' 'WAP100' 'WAP101' 'WAP102' 'WAP103' 'WAP104'
 'WAP105' 'WAP106' 'WAP107' 'WAP108' 'WAP109' 'WAP1

In [5]:
#dataset_train.nan_to_zero()
#dataset_validate.nan_to_zero()
#dataset_train.to_tensor()
#dataset_validate.to_tensor()

In [6]:
print(dataset_train.y)

[[-7.53662120e+03  4.86493423e+06  2.00000000e+00  1.00000000e+00]
 [-7.51915240e+03  4.86494953e+06  2.00000000e+00  1.00000000e+00]
 [-7.52457040e+03  4.86493409e+06  2.00000000e+00  1.00000000e+00]
 ...
 [-7.51684150e+03  4.86488929e+06  3.00000000e+00  1.00000000e+00]
 [-7.53732190e+03  4.86489578e+06  3.00000000e+00  1.00000000e+00]
 [-7.53616580e+03  4.86489786e+06  3.00000000e+00  1.00000000e+00]]


In [7]:
x_train, x_validate, y_train, y_validate = sklearn.model_selection.train_test_split(dataset_train.x, dataset_train.y, test_size = 0.2, random_state = 42)

In [8]:
print(y_train)

[[-7.68013460e+03  4.86493149e+06  0.00000000e+00  0.00000000e+00]
 [-7.37280290e+03  4.86484930e+06  4.00000000e+00  2.00000000e+00]
 [-7.37707827e+03  4.86484296e+06  0.00000000e+00  2.00000000e+00]
 ...
 [-7.40199620e+03  4.86479002e+06  0.00000000e+00  2.00000000e+00]
 [-7.36838610e+03  4.86476941e+06  3.00000000e+00  2.00000000e+00]
 [-7.64097620e+03  4.86501066e+06  0.00000000e+00  0.00000000e+00]]


In [9]:
dataset_train_reg_long_lgb = lgb.Dataset(x_train, label = y_train[:, 0])
dataset_validate_reg_long_lgb = lgb.Dataset(x_validate, label = y_validate[:, 0])
dataset_train_reg_lat_lgb = lgb.Dataset(x_train, label = y_train[:, 1])
dataset_validate_reg_lat_lgb = lgb.Dataset(x_validate, label = y_validate[:, 1])
dataset_train_cat_floor_lgb = lgb.Dataset(x_train, label = y_train[:, 2])
dataset_validate_cat_floor_lgb = lgb.Dataset(x_validate, label = y_validate[:, 2])
dataset_train_cat_building_lgb = lgb.Dataset(x_train, label = y_train[:, 3])
dataset_validate_cat_building_lgb = lgb.Dataset(x_validate, label = y_validate[:, 3])

In [10]:
params_reg = {'learning_rate': 0.001,
              'num_boost_round': 1000,
              'max_depth': 16,
              'boosting': 'gbdt', 
              'objective': 'regression', 
              'metric': 'mae', 
              'is_training_metric': True, 
              'num_leaves': 1440,  
              #'feature_fraction': 0.9, 
              'bagging_fraction': 0.8, 
              'bagging_freq': 5, 
              #'seed':2018
             }
params_cat_building = {'learning_rate': 0.002,
              'num_boost_round': 500,
              'max_depth': 16,
              'boosting': 'rf',
              'objective': 'multiclass',
              'num_class': 3,
              'metric': 'multi_logloss',
              'is_training_metric': True,
              'num_leaves': 144,
              #'feature_fraction': 0.97,
              'bagging_fraction': 0.7,
              'bagging_freq': 5,
             }
params_cat_floor = {'learning_rate': 0.002,
              'num_boost_round': 500,
              'max_depth': 16,
              'boosting': 'rf',
              'objective': 'multiclass',
              'num_class': 5,
              'metric': 'multi_logloss',
              'is_training_metric': True,
              'num_leaves': 144,
              #'feature_fraction': 0.97,
              'bagging_fraction': 0.7,
              'bagging_freq': 5,
             }

In [11]:
model_reg_long_lgb = lgb.train(params_reg, dataset_train_reg_long_lgb, dataset_validate_reg_long_lgb, verbose_eval = 100)#, early_stopping_rounds = 100)
model_reg_lat_lgb = lgb.train(params_reg, dataset_train_reg_lat_lgb, dataset_validate_reg_lat_lgb, verbose_eval = 100)#, early_stopping_rounds = 100)
model_cat_floor_lgb = lgb.train(params_cat_floor, dataset_train_cat_floor_lgb, dataset_validate_cat_floor_lgb, verbose_eval = 100)#, early_stopping_rounds = 100)
model_cat_building_lgb = lgb.train(params_cat_building, dataset_train_cat_building_lgb, dataset_validate_cat_building_lgb, verbose_eval = 100)#, early_stopping_rounds = 100)




In [27]:
predict_long_train = model_reg_long_lgb.predict(x_train)
predict_long_validate = model_reg_long_lgb.predict(x_validate)
predict_lat_train = model_reg_lat_lgb.predict(x_train)
predict_lat_validate = model_reg_lat_lgb.predict(x_validate)
predict_floor_train = model_cat_floor_lgb.predict(x_train)
predict_floor_validate = model_cat_floor_lgb.predict(x_validate)
predict_building_train = model_cat_building_lgb.predict(x_train)
predict_building_validate = model_cat_building_lgb.predict(x_validate)

In [19]:
mse_long = sklearn.metrics.mean_squared_error(y_validate[:, 0], predict_long_test)
mse_lat = sklearn.metrics.mean_squared_error(y_validate[:, 1], predict_lat_test)
mse_floor = sklearn.metrics.mean_squared_error(y_validate[:, 2], predict_floor_test)
mse_building = sklearn.metrics.mean_squared_error(y_validate[:, 3], predict_building_test)

ValueError: y_true and y_pred have different number of output (1!=5)

In [18]:
print(mse_long)
print(mse_lat)
print(mse_floor)
print(mse_building)

2112.669409702993


NameError: name 'mse_lat' is not defined

In [37]:
print(y_train[:, 1])
print(predict_lat_train)

[4864931.4921   4864849.3028   4864842.961439 ... 4864790.0184
 4864769.4062   4865010.6595  ]
[4864909.59856222 4864858.06103507 4864853.47590745 ... 4864832.36316475
 4864806.97037601 4864956.7621106 ]


In [36]:
print(y_train[:, 0])
print(predict_long_train)
print(y_validate[:, 0])
print(predict_long_validate)
print(y_train[:, 2])
print(predict_floor_train.argmax(axis = 1))
print(y_validate[:, 2])
print(predict_floor_validate.argmax(axis = 1))

[-7680.1346     -7372.8029     -7377.07826999 ... -7401.9962
 -7368.3861     -7640.9762    ]
[-7598.81364419 -7405.48140222 -7407.09954952 ... -7425.17448814
 -7404.14188336 -7575.80149271]
[-7511.5215     -7535.39365217 -7348.8982     ... -7408.69525072
 -7445.55787384 -7390.7612    ]
[-7495.09348868 -7509.55422871 -7391.1639951  ... -7433.20310066
 -7447.53077389 -7416.35507647]
[0. 4. 0. ... 0. 3. 0.]
[0 4 0 ... 0 3 0]
[2. 2. 3. ... 2. 0. 3.]
[2 2 3 ... 2 0 3]


In [42]:
print(len(predict_floor_validate))

3988


In [43]:
error_building_train = []
error_building_validate = []
error_floor_train = []
error_floor_validate = []
predict_floor_argmax_train = predict_floor_train.argmax(axis = 1)
predict_floor_argmax_validate = predict_floor_validate.argmax(axis = 1)
predict_building_argmax_train = predict_building_train.argmax(axis = 1)
predict_building_argmax_validate = predict_building_validate.argmax(axis = 1)
for i in range(len(predict_floor_train)):
    if predict_floor_argmax_train[i] != y_train[i, 2]:
        error_floor_train.append(i)
    if predict_building_argmax_train[i] != y_train[i, 3]:
        error_building_train.append(i)
for i in range(len(predict_floor_validate)):
    if predict_floor_argmax_validate[i] != y_validate[i, 2]:
        error_floor_validate.append(i)
    if predict_building_argmax_validate[i] != y_validate[i, 3]:
        error_building_validate.append(i)

In [46]:
error_rate_floor_train = len(error_floor_train) / len(predict_floor_train)
print(error_rate_floor_train)
error_rate_floor_validate = len(error_floor_validate) / len(predict_floor_train)
print(error_rate_floor_validate)
error_rate_building_train = len(error_building_train) / len(predict_building_train)
print(error_rate_building_train)
error_rate_building_validate = len(error_building_validate) / len(predict_building_train)
print(error_rate_building_validate)

0.01310509154752947
0.004075746175068974
0.0007524454477050414
0.00043892651116127413
