In [2]:
!ls real_data/

[34mtest[m[m     [34mtraining[m[m


In [135]:
import os
import pandas as pd
import numpy as np
import progressbar
def get_training_data(path,calculate_means_and_variances=True):
    ''' Loads in training data from path argument, and returns in matrix for with mac addresses as columns.
        If calculate_means_and_variances=True the entries are the means and standard deviations of the readings,
        else just a list of all samples is returned
    '''
    print(f'Loading training data from {path}')
    locations_df = pd.read_csv(os.path.join(path,'location.txt'),header=None)
    locations_df.columns = ['x','y']
    features_path = os.path.join(path,'wifi_signal')
    feature_csvs = os.listdir(features_path)
    train_df = pd.DataFrame()
    for feature_csv in progressbar.progressbar(feature_csvs):
        location = np.array(locations_df.iloc[int(feature_csv[6:].split('.')[0])-1])
#         corresponding_location = locations_df[]
        feature_df = pd.read_csv(os.path.join(features_path,feature_csv),header=None)
        feature_df.columns = ['timestamp','router_name','mac_address','channel','RSSI']
        unique_mac_addresses = list(set(feature_df['mac_address']))
        location_dict = {'x': location[0], 'y': location[1]}
        
        for unique_mac_address in unique_mac_addresses:
            relevant_entries = feature_df.iloc[np.where(feature_df['mac_address']==unique_mac_address)]
            rssi_values = relevant_entries['RSSI'] + 0.5*np.random.random(size=len(relevant_entries))
            if calculate_means_and_variances:
                #             add artificial noise to avoid 0 standard deviation
                mean = rssi_values.mean()
                std_dv = rssi_values.std()
                means_and_stdevs = [mean,std_dv]
                location_dict[unique_mac_address] = means_and_stdevs 
            else:
                location_dict[unique_mac_address] = rssi_values
        train_df =train_df.append(location_dict,ignore_index=True)
    train_df = train_df.reindex(columns = ['x','y'] + [col for col in train_df.columns if col not in ['x','y'] ])
    return train_df

def get_test_data(path):
    locations_df = pd.read_csv(os.path.join(path,'location.txt'),header=None)
    features_path = os.path.join(path,'wifi_signal')
    feature_csvs = os.listdir(features_path)
    test_df = pd.DataFrame()
    for feature_csv in feature_csvs:

        feature_df = pd.read_csv(os.path.join(features_path,feature_csv))
        feature_df.columns = ['timestamp','router_name','mac_address','channel','RSSI']
        unique_mac_addresses = list(set(feature_df['mac_address']))
        location_dict = {'x': 0, 'y': 0}
        
        for unique_mac_address in unique_mac_addresses:
            relevant_entries = feature_df.iloc[np.where(feature_df['mac_address']==unique_mac_address)]
            rssi_values = relevant_entries['RSSI']
            location_dict[unique_mac_address] = rssi_values
        test_df =test_df.append(location_dict,ignore_index=True)
    test_df = test_df.reindex(columns = ['x','y'] + [col for col in test_df.columns if col not in ['x','y'] ])
    return test_df

def make_predictions(train_data,test_data):
    pass

def preprocess_training_data(*args):
    pass
    
training_path = "./real_data/training/"
test_path = "./real_data/test/"

# get_training_data(training_path,calculate_means_and_variances=True).head()
get_test_data(test_path).head()


Unnamed: 0,x,y,00:2a:10:1a:69:80,00:2a:10:1a:69:81,00:2a:10:35:39:31,00:2a:10:3d:b0:10,00:2a:10:3d:b0:11,00:2a:10:3d:b0:1e,00:2a:10:3d:b0:1f,00:81:c4:85:07:a0,...,e8:de:27:6f:f5:d0,e8:de:27:6f:f5:d1,e8:de:27:70:02:b4,e8:de:27:70:02:b5,f8:d1:11:88:3f:2e,00:2a:10:0d:6d:30,00:2a:10:0d:6d:31,18:8b:45:09:72:60,18:8b:45:09:72:61,00:2a:10:35:39:30
0,0.0,0.0,651 -88 685 -88 721 -88 757 -88 79...,241 -88 271 -88 298 -88 325 -88 35...,687 -85 723 -85 759 -85 794 -85 826 ...,14 -71 41 -66 70 -66 99 -67 13...,17 -71 44 -71 73 -71 102 -65 13...,8 -71 35 -72 64 -70 95 -72 12...,3 -74 30 -73 59 -69 90 -72 12...,26 -45 55 -45 86 -46 117 -46 14...,...,0 -47 27 -46 56 -46 87 -46 11...,4 -43 31 -43 60 -40 91 -41 12...,2 -44 29 -46 58 -46 89 -46 12...,5 -46 32 -41 61 -42 92 -43 12...,116 -88 147 -88 178 -88 209 -88 239 ...,,,,,
1,0.0,0.0,19 -85 95 -87 129 -87 163 -87 197 ...,21 -84 198 -85 230 -85 261 -85 29...,670 -85 699 -85 727 -85 754 -85 786 ...,10 -63 42 -63 73 -64 107 -64 14...,586 -65 613 -65 641 -65 667 -73 69...,7 -68 39 -66 70 -66 104 -67 138 ...,3 -68 35 -66 66 -66 100 -67 134 ...,31 -50 62 -48 96 -47 130 -46 164 ...,...,0 -46 32 -45 63 -47 97 -49 131 ...,4 -39 36 -43 67 -53 101 -45 135 ...,2 -52 34 -49 65 -52 99 -52 133 ...,5 -43 37 -43 68 -41 102 -41 136 ...,614 -89 642 -89 668 -89 697 -89 725 ...,758 -85 790 -85 828 -87 865 -87 90...,762 -84 794 -84 832 -84 869 -84 90...,757 -82 789 -82 827 -82 864 -82 899 ...,383 -80 414 -80 443 -80 471 -80 500 ...,
2,0.0,0.0,481 -82 513 -82 543 -82 573 -82 603 ...,482 -80 514 -80 544 -80 574 -84 604 ...,,21 -67 49 -70 80 -70 111 -70 142 ...,337 -64 360 -64 385 -64 411 -64 43...,7 -72 35 -70 66 -66 97 -69 128 ...,3 -72 31 -70 62 -66 93 -69 124 ...,27 -57 58 -54 89 -56 120 -51 152 ...,...,0 -47 28 -51 59 -48 90 -48 121 ...,4 -46 32 -42 63 -46 94 -49 125 ...,2 -47 30 -46 61 -45 92 -47 123 ...,5 -39 33 -36 64 -38 95 -37 126 ...,388 -85 414 -85 442 -85 473 -85 505 ...,608 -87 639 -87 670 -87 702 -87 735 ...,,24 -80 52 -80 83 -80 114 -80 145 ...,25 -81 53 -81 84 -81 115 -81 146 ...,855 -86 883 -86 911 -86 939 -86 968 ...


In [104]:
!ls real_data/test

location.txt [34mwifi_signal[m[m
