# Run these codes FIRST each time before using

In [1]:
import pandas as pd
import numpy as np
from csv import writer

In [2]:
def place_reg(df1, df2, df3, df4, df5):
    #Creates a registration list from 5 measurement files of SSID-RSSI
    #NOTEs:
        #Takes 5 Pandas DataFrames as input arguments and MUST have header: SSID and RSSI
        #Output returns Registration List (reg_list) to be used in place-iden() OR Comparison List
    
    #Filtering out unique SSIDs from all datasets
    dfs = [df1, df2, df3, df4, df5]
    SSID_sets = [set(df['SSID']) for df in dfs]
    duplicated_SSIDs = set.intersection(*SSID_sets)
    dfs_filtered = [df[df['SSID'].isin(duplicated_SSIDs)] for df in dfs]

    #Calling the filtered dataset (M: Measurement data)
    M1 = dfs_filtered[0]
    M2 = dfs_filtered[1]
    M3 = dfs_filtered[2]
    M4 = dfs_filtered[3]
    M5 = dfs_filtered[4]
    
    #Sort SSID Column from alphabetically (A-Z)
    M1_sorted = M1.sort_values(by=['SSID'])
    M2_sorted = M2.sort_values(by=['SSID'])
    M3_sorted = M3.sort_values(by=['SSID'])
    M4_sorted = M4.sort_values(by=['SSID'])
    M5_sorted = M5.sort_values(by=['SSID'])

    #Reset index of all datasets
    M1_sorted = M1_sorted.reset_index(drop=True, inplace=False)
    M2_sorted = M2_sorted.reset_index(drop=True, inplace=False)
    M3_sorted = M3_sorted.reset_index(drop=True, inplace=False)
    M4_sorted = M4_sorted.reset_index(drop=True, inplace=False)
    M5_sorted = M5_sorted.reset_index(drop=True, inplace=False)
    
    #Drop SSID column from 2nd - 5th datasets
    M2_RSSI = M2_sorted.drop(['SSID'],axis=1)
    M3_RSSI = M3_sorted.drop(['SSID'],axis=1)
    M4_RSSI = M4_sorted.drop(['SSID'],axis=1)
    M5_RSSI = M5_sorted.drop(['SSID'],axis=1)
    
    #Concatenate all datasets, column-by-column
    dataset = pd.concat([M1_sorted, M2_RSSI, M3_RSSI, M4_RSSI, M5_RSSI], axis=1)

    #Creating "mean" column = mean of each row
    dataset['mean'] = dataset.mean(axis=1)

    #Creating the registration list
    reg_list = dataset[['SSID', 'mean']].copy()
    
    return reg_list

In [3]:
def place_reg2(df1, df2, df3, df4, df5, df6, df7, df8, df9, df10):
    #Creates a registration list from 10 measurement files of SSID-RSSI
    #NOTEs:
        #Takes 10 Pandas DataFrames as input arguments and MUST have header: SSID and RSSI
        #Output returns Registration List (reg_list) to be used in place-iden() OR Comparison List
    
    #Filtering out unique SSIDs from all datasets
    dfs = [df1, df2, df3, df4, df5, df6, df7, df8, df9, df10]
    SSID_sets = [set(df['SSID']) for df in dfs]
    duplicated_SSIDs = set.intersection(*SSID_sets)
    dfs_filtered = [df[df['SSID'].isin(duplicated_SSIDs)] for df in dfs]

    #Calling the filtered dataset (M: Measurement data)
    M1 = dfs_filtered[0]
    M2 = dfs_filtered[1]
    M3 = dfs_filtered[2]
    M4 = dfs_filtered[3]
    M5 = dfs_filtered[4]
    M6 = dfs_filtered[5]
    M7 = dfs_filtered[6]
    M8 = dfs_filtered[7]
    M9 = dfs_filtered[8]
    M10 = dfs_filtered[9]
    
    #Sort SSID Column from alphabetically (A-Z)
    M1_sorted = M1.sort_values(by=['SSID'])
    M2_sorted = M2.sort_values(by=['SSID'])
    M3_sorted = M3.sort_values(by=['SSID'])
    M4_sorted = M4.sort_values(by=['SSID'])
    M5_sorted = M5.sort_values(by=['SSID'])
    M6_sorted = M6.sort_values(by=['SSID'])
    M7_sorted = M7.sort_values(by=['SSID'])
    M8_sorted = M8.sort_values(by=['SSID'])
    M9_sorted = M9.sort_values(by=['SSID'])
    M10_sorted = M10.sort_values(by=['SSID'])

    #Reset index of all datasets
    M1_sorted = M1_sorted.reset_index(drop=True, inplace=False)
    M2_sorted = M2_sorted.reset_index(drop=True, inplace=False)
    M3_sorted = M3_sorted.reset_index(drop=True, inplace=False)
    M4_sorted = M4_sorted.reset_index(drop=True, inplace=False)
    M5_sorted = M5_sorted.reset_index(drop=True, inplace=False)
    M6_sorted = M6_sorted.reset_index(drop=True, inplace=False)
    M7_sorted = M7_sorted.reset_index(drop=True, inplace=False)
    M8_sorted = M8_sorted.reset_index(drop=True, inplace=False)
    M9_sorted = M9_sorted.reset_index(drop=True, inplace=False)
    M10_sorted = M10_sorted.reset_index(drop=True, inplace=False)
    
    #Drop SSID column from 2nd - 10th datasets
    M2_RSSI = M2_sorted.drop(['SSID'],axis=1)
    M3_RSSI = M3_sorted.drop(['SSID'],axis=1)
    M4_RSSI = M4_sorted.drop(['SSID'],axis=1)
    M5_RSSI = M5_sorted.drop(['SSID'],axis=1)
    M6_RSSI = M6_sorted.drop(['SSID'],axis=1)
    M7_RSSI = M7_sorted.drop(['SSID'],axis=1)
    M8_RSSI = M8_sorted.drop(['SSID'],axis=1)
    M9_RSSI = M9_sorted.drop(['SSID'],axis=1)
    M10_RSSI = M10_sorted.drop(['SSID'],axis=1)
    
    #Concatenate all datasets, column-by-column
    dataset = pd.concat([M1_sorted, M2_RSSI, M3_RSSI, M4_RSSI, M5_RSSI, M6_RSSI, M7_RSSI, M8_RSSI, M9_RSSI, M10_RSSI], axis=1)

    #Creating "mean" column = mean of each row
    dataset['mean'] = dataset.mean(axis=1)

    #Creating the registration list
    reg_list = dataset[['SSID', 'mean']].copy()
    
    return reg_list

In [4]:
def place_iden(registrationList, comparisonList, R_margin=10, P_th=50):
    #Identifies whether a place is registered or not
    #NOTEs: 
        #Comparison List (CL) and Registration List (RL) are assumed to be Pandas DataFrame
        #Default R_margin and P_th are based on SmartLocService paper
    
    #Create 2 column table: SSID-RSSI
    r = registrationList.filter(['SSID','mean'])
    c = comparisonList.filter(['SSID','RSSI'])
    
    #Convert each column in Pandas DataFrame to list + convert RSSI values to float
    rgs_SSID = r['SSID'].to_list()
    r1 = r['mean'].to_list()
    rgs_RSSI = [float(i) for i in r1]
    cmp_SSID = c['SSID'].to_list()
    r2 = c['RSSI'].to_list()
    cmp_RSSI = [float(i) for i in r2]
    
    #Convert 2 lists (SSID & RSSI) to dictionary
    rgs_dict = dict(zip(rgs_SSID,rgs_RSSI))
    cmp_dict = dict(zip(cmp_SSID,cmp_RSSI))
    
    #Finding common SSIDs by observation:
    P_num = []
    
    for SSID in cmp_dict:
        if SSID in rgs_dict:
            R_ave = rgs_dict[SSID] #Average RSSI value of each SSID AFTER multi-measurements within time T_A @ RL
            R_auth = cmp_dict[SSID] #Instantaneous RSSI value of each SSID @ CL
            
    #Finding common SSIDs that satisfies:
        #R_ave-R_margin <= R_auth <= R_ave+R_margin
            if R_auth >= (R_ave - R_margin) and R_auth <= (R_ave + R_margin):
                P_num.append(SSID) #Corresponding RSSI
                    
    print(len(P_num), 'common SSID(s) satisfy the equation.')
    print('SSID name(s):', P_num)

    #Computing concordance rate P
    P = len(P_num) / len(cmp_SSID) * 100
    print('The concordance rate is', P)
        
    #Comparing P and threshold P    
    if P >= P_th:
        print('This place is registered. I know this place.')
        #Save results to a csv file
        results= [P_th, R_margin, len(P_num), P, True]
        with open('out.csv', 'a+', newline='') as write_obj:
            # Create a writer object from csv module
            csv_writer = writer(write_obj)
            # Add contents of list as last row in the csv file
            csv_writer.writerow(results)
    else:
        print('This place is unregistered. Where am I?')
        results= [P_th, R_margin, len(P_num), P, False]
        with open('out.csv', 'a+', newline='') as write_obj:
            # Create a writer object from csv module
            csv_writer = writer(write_obj)
            # Add contents of list as last row in the csv file
            csv_writer.writerow(results)

---
### Example measurement

In [5]:
#LOAD SSID-RSSI measurement datasets (txt -> csv)
    #SSID-RSSI measurement using iOS airport function via meas.sh (see READ_ME)
data1 = pd.read_csv('automeasure/result1.txt', sep=' ', header=None, names= ['SSID','RSSI'])
data2 = pd.read_csv('automeasure/result2.txt', sep=' ', header=None, names= ['SSID','RSSI'])
data3 = pd.read_csv('automeasure/result3.txt', sep=' ', header=None, names= ['SSID','RSSI'])
data4 = pd.read_csv('automeasure/result4.txt', sep=' ', header=None, names= ['SSID','RSSI'])
data5 = pd.read_csv('automeasure/result5.txt', sep=' ', header=None, names= ['SSID','RSSI'])

In [6]:
#RUN place_reg() to get Registration List (RL)
rgs = place_reg(data1,data2,data3,data4,data5)

In [7]:
#OPTIONAL: Check if rgs is a dataframe or not
isinstance(rgs,pd.DataFrame)

True

In [8]:
#LOAD Comparison List (CL) dataset (CL needs to be in csv format)
    # CL is created the same way as RL using place_reg
cmp = pd.read_csv('data3(5-14).csv')

In [9]:
#RUN place_iden4()
place_iden(rgs,cmp)

1 common SSID(s) satisfy the equation.
SSID name(s): ['vf300-f8fc15-gw']
The concordance rate is 8.333333333333332
This place is unregistered. Where am I?
