In [2]:
!ls real_data/

[34mtest[m[m     [34mtraining[m[m


In [159]:
import os
import pandas as pd
import numpy as np
import progressbar
import time


def predict(train,test,n_ap):
    pass


def get_training_data(path,calculate_means_and_variances=True):
    ''' Loads in training data from path argument, and returns in matrix for with mac addresses as columns.
        If calculate_means_and_variances=True the entries are the means and standard deviations of the readings,
        else just a list of all samples is returned
    '''
    print(f'Loading training data from {path}')
    time.sleep(0.2)
    locations_df = pd.read_csv(os.path.join(path,'location.txt'),header=None)
    locations_df.columns = ['x','y']
    features_path = os.path.join(path,'wifi_signal')
    feature_csvs = os.listdir(features_path)
    train_df = pd.DataFrame()
    for feature_csv in progressbar.progressbar(feature_csvs,prefix=f'Loading training data from {path}'):
        location = np.array(locations_df.iloc[int(feature_csv[6:].split('.')[0])-1])
#         corresponding_location = locations_df[]
        feature_df = pd.read_csv(os.path.join(features_path,feature_csv),header=None)
        feature_df.columns = ['timestamp','router_name','mac_address','channel','RSSI']
        unique_mac_addresses = list(set(feature_df['mac_address']))
        location_dict = {'x': location[0], 'y': location[1]}
        
        for unique_mac_address in unique_mac_addresses:
            relevant_entries = feature_df.iloc[np.where(feature_df['mac_address']==unique_mac_address)]
            rssi_values = relevant_entries['RSSI'] + 0.5*np.random.random(size=len(relevant_entries))
            if calculate_means_and_variances:
                #             add artificial noise to avoid 0 standard deviation
                mean = rssi_values.mean()
                std_dv = rssi_values.std()
                means_and_stdevs = [mean,std_dv]
                location_dict[unique_mac_address] = means_and_stdevs 
            else:
                location_dict[unique_mac_address] = rssi_values
        train_df =train_df.append(location_dict,ignore_index=True)
    train_df = train_df.reindex(columns = ['x','y'] + [col for col in train_df.columns if col not in ['x','y'] ])
    return train_df

def get_test_data(path):
    locations_df = pd.read_csv(os.path.join(path,'location.txt'),header=None)
    features_path = os.path.join(path,'wifi_signal')
    feature_csvs = os.listdir(features_path)
    test_df = pd.DataFrame()
    for feature_csv in feature_csvs:

        feature_df = pd.read_csv(os.path.join(features_path,feature_csv))
        feature_df.columns = ['timestamp','router_name','mac_address','channel','RSSI']
        unique_mac_addresses = list(set(feature_df['mac_address']))
        location_dict = {'x': 0, 'y': 0}
        
        for unique_mac_address in unique_mac_addresses:
            relevant_entries = feature_df.iloc[np.where(feature_df['mac_address']==unique_mac_address)]
            rssi_values = relevant_entries['RSSI']
            location_dict[unique_mac_address] = rssi_values
        test_df =test_df.append(location_dict,ignore_index=True)
    test_df = test_df.reindex(columns = ['x','y'] + [col for col in test_df.columns if col not in ['x','y'] ])
    return test_df

def make_predictions(train_data,test_data):
    for i in range(len(test_data)):
        test_features = pd.DataFrame(test_data.iloc[i]).transpose()
        
        test_mac_addresses = set(test_features.columns)
        train_mac_addresses = set(train_data.columns)
        
        
        mutual_mac_addresses = list(test_mac_addresses.intersection(train_mac_addresses))
        cropped_test_df = test_features[['x','y']+mutual_mac_addresses]
        cropped_train_df = train_data[['x','y']+mutual_mac_addresses]
        predict(train_data,test_data,len(mutual_mac_addresses))
        break
#     predict(train_data,test_data,)
    return cropped_train_df,cropped_test_df

def preprocess_training_data(*args):
    pass
    
training_path = "./real_data/training/"
test_path = "./real_data/test/"

# get_training_data(training_path,calculate_means_and_variances=True).head()
test_data = get_test_data(test_path)
train_data = get_training_data(training_path)

a, b = make_predictions(train_data,test_data)


Loading training data from ./real_data/training/


Loading training data from ./real_data/training/100% (15 of 15) || Elapsed Time: 0:00:00 Time:  0:00:00


In [162]:
a.head()
# b.head()

Unnamed: 0,x,y,40:18:b1:78:86:55,00:81:c4:85:07:a1,08:ea:44:96:ec:95,00:2a:10:1a:69:81,00:81:c4:85:07:a0,40:18:b1:7b:42:15,10:9a:dd:a0:fb:1e,00:2a:10:0d:6d:31,...,08:ea:44:96:ec:d5,00:2a:10:35:39:31,e8:de:27:6f:f5:d1,18:8b:45:09:70:ee,08:ea:44:96:ec:d6,e8:de:27:6f:e4:d9,00:2a:10:35:39:30,40:18:b1:7b:42:14,00:2a:10:0d:6d:30,40:18:b1:78:89:d5
0,1.35,6.9,"[-88.0695315172444, 0.4835542650189351]","[-65.18937274061513, 2.6700861325764635]","[-87.58131055417826, 2.7456596321015634]","[-82.81456133749508, 0.16565240359682698]","[-65.59009460597228, 1.4478348687624065]","[-88.53137602023655, 2.6487090758014586]","[-86.78358339322425, 0.9051470646636604]","[-81.65241669414266, 0.19550266237550293]",...,"[-72.3955341292033, 0.7497727752865594]","[-82.7468805690706, 0.11926815191435418]","[-37.40675477179919, 1.0566746180384032]","[-82.5793982119248, 0.6513910636755706]","[-72.21118178935325, 0.8648233794577511]","[-46.00863914199553, 2.7205416819497503]","[-82.74226955993667, 0.12222328726951857]","[-88.04338215588666, 1.7000392308530536]","[-82.87623875625623, 0.0958253922797544]","[-84.06181354553866, 2.034233554198401]"
1,7.95,8.2,"[-87.79981395665817, 1.358417872304841]","[-58.88981505095673, 3.7116075994479414]","[-89.6908292964003, 0.1260499601167279]",,"[-58.90340866465809, 3.5091257437432923]",,"[-81.42908338149485, 3.0743170352524687]",,...,"[-78.63493426935351, 3.5834422957202285]",,"[-42.079087601095246, 2.6417422310953755]","[-83.18034033218504, 1.417334806589257]","[-78.79351191748108, 2.765473448827172]","[-46.990514431466195, 2.703466835376569]","[-84.67723458627785, 0.08253167652840658]","[-89.87071336077634, 0.07453928584727333]",,"[-87.09246905415183, 2.425103130682954]"
2,7.95,6.67,"[-87.84432180615285, 1.3580031486285138]","[-59.60054807782298, 3.400595919640743]","[-89.6786890433623, 0.11286533715818951]",,"[-59.4377938427251, 3.4195418963442488]","[-88.73544021218073, 0.14720376991657]","[-83.9880600015654, 1.7309954259846838]","[-86.74453452386446, 0.1559016187349671]",...,"[-76.95918361734869, 2.817102370048486]",,"[-43.37755841278585, 3.329680214236346]","[-83.05318872462503, 1.724313390963708]","[-78.04568661793364, 3.9855920682761767]","[-40.231608104718795, 2.9176790937542267]",,"[-89.97383124863227, 0.8152486758286002]",,"[-86.72666413209205, 1.7502984823035674]"
3,1.35,8.8,"[-88.28865980213476, 0.7682399157432299]","[-61.31988223099, 2.8548908500490144]","[-88.75061632407825, 0.19919906228567863]",,"[-62.225355193405875, 0.8263951650064892]",,"[-86.65823924769457, 1.9657675571882722]",,...,"[-81.35251822479128, 2.3057826860374275]",,"[-36.883237892433996, 1.9357501705258748]","[-83.04108168840278, 1.2389654134414365]","[-81.66692507545456, 1.8691141640104298]","[-37.550317612646495, 1.4391837356108756]",,,,"[-87.86014207657269, 1.5250648616663274]"
4,4.7,2.8,"[-83.82023426224427, 1.4833491276859783]","[-52.01526680284891, 1.336826405838258]","[-86.43937527531463, 1.5628126677695227]","[-84.75923786759107, 0.7404117189043171]","[-53.069069464597945, 1.2718644799404295]","[-88.4796005068834, 0.6341772718087589]","[-86.40518963804463, 1.4514252211673597]","[-86.76810280162876, 0.12810629263436882]",...,"[-70.87974985017266, 1.6797281121995744]",,"[-43.420012444801145, 3.4133870001736177]","[-82.57237589314215, 0.9563974932928202]","[-70.49444467208394, 1.4588909443318812]","[-29.573229954376657, 4.224684051464831]",,"[-88.50498393847025, 1.25409236691601]","[-86.66799158946122, 0.1260931530497966]","[-84.83569076130787, 1.3306169279080582]"
