In [1]:
# defining functions
import json 
import math

def getmeasurementTimestamp(item):
    return int(item['measurementTimestamp'])

def getProcessingTimestamp(item):
    return int(item['processingTimestamp'])

def get_x_error(item): #the error in the data is the stdev of the sample, we compute the error of the estimation (the sample mean)
    return item['value']['averagecoordinate']['error']['coordinates'][0]/math.sqrt(item['value']['trackeeHistory']['nMeasurements'])

def get_y_error(item):
    return item['value']['averagecoordinate']['error']['coordinates'][1]/math.sqrt(item['value']['trackeeHistory']['nMeasurements'])

def get_fitted(item):
     return item['value']['trackeeHistory']['fitStatus']
    

In [None]:
#reading data
for i in [0]:
    data = []
    with open("arena/2015-07-05.json") as f:
    #with open("arena/2015-07-05_raw.json") as f:
        data = f.readlines()

    json_lines = []
    mac_adresses = []
    
    for line in data:
        jsline = json.loads(line)
        json_lines.append(jsline)#now json_lines contains all lines of data
        mac_adresses.append(jsline["value"]["sourceMac"]) # mac_addresses is a list of address per line
        


In [None]:
#sorting by time
json_lines.sort(key = getmeasurementTimestamp) # now json_lines is sorted by time

In [None]:
#computation of error etc
minTime = getmeasurementTimestamp(json_lines[0])
maxTime = getmeasurementTimestamp(json_lines[len(json_lines) - 1])
print("minTime="+ str(minTime))
print("maxTime="+ str(maxTime))

timeMinutes = (maxTime - minTime)/1000/60

for i in [0]:
    average_x_error = 0
    average_y_error = 0
    count = 0
    good_lines = []
    number_of_addresses =  len(set(mac_adresses)) #number of mac adresses detected. Not people, because of MAC randomization
    mc_good = []
    delay = 0
    number_of_fitted = 0
    for jsline in json_lines:
        average_x_error += get_x_error(jsline)
        average_y_error += get_y_error(jsline)
        thisdelay = getProcessingTimestamp(jsline) - getmeasurementTimestamp(jsline)
        delay += thisdelay
        if (get_fitted(jsline) == 'FITTED'):
            number_of_fitted +=1
        if get_x_error(jsline) <= 2 and get_y_error(jsline) <= 2:             
            good_lines.append(jsline)
            mc_good.append(jsline["value"]["sourceMac"])

    number_of_lines = len(json_lines)
    number_of_addresses_good = len(set(mc_good)) # number of addresses where error < 2m 
    average_x_error /= number_of_lines
    average_y_error /= number_of_lines
    delay /= number_of_lines
    
    print ('time in minutes:' + str(timeMinutes)) 
    print ('number of lines: ' + str(number_of_lines))
    print ('average x error: ' + str(average_x_error))
    print ('average y error: ' + str(average_y_error))    
    print('number of addresses the whole day:' + str(number_of_addresses))     
    print('number of addresses with stdev < 2m:' + str(number_of_addresses_good))     
    print('average delay in ms:' + str(delay))
    print('number of fitted:' + str(number_of_fitted)) 

In [None]:
#computing FirstTimeSeen and LastTimeSeen
concertFinishedTimestamp=minTime+1000*60*60*8#timestamp until 8 am
FirstTimeSeen=dict()
LastTimeSeen=dict()

for jsline in json_lines:
    address = jsline["value"]["sourceMac"]
    time =  getmeasurementTimestamp(jsline)
    if address in FirstTimeSeen:        
        if time < FirstTimeSeen[address]:
            FirstTimeSeen[address] = time
    else:
        FirstTimeSeen[address] = time
        
    if address in LastTimeSeen:         
        if time > LastTimeSeen[address]:
            LastTimeSeen[address] = time
    else:
        LastTimeSeen[address] = time 

In [None]:
#Computing dwell time, number of persistent addresses
DwellTime = dict()
DwellTimeDuringConcert = dict()
numberOfAdressesAtConcert=0
for address in LastTimeSeen.keys():    
    DwellTime[address] = int((LastTimeSeen[address] - FirstTimeSeen[address]) /1000/60) # in minutes
    if LastTimeSeen[address] < concertFinishedTimestamp:
        numberOfAdressesAtConcert += 1
        DwellTimeDuringConcert[address] = DwellTime[address]  
print('number of addresses detected during concert hours:')
print(numberOfAdressesAtConcert)
longTermAddresses=[]
numberOfAddresses = []
AddressesInSec = []
for jsline in json_lines:
    sec = int(math.floor((getmeasurementTimestamp(jsline)- minTime)/1000))
    #print(str(sec))
    address = jsline["value"]["sourceMac"]
    if DwellTime[address] > 10:#  i.e. >10 minutes 
        longTermAddresses.append(address)
        if len(AddressesInSec) <= sec:
            while len(AddressesInSec) <= sec:
                AddressesInSec.append([])                
            AddressesInSec[sec].append(address)                       
        else:
            if address not in AddressesInSec[sec]:                                
                AddressesInSec[sec].append(address)
for setje in AddressesInSec:
    numberOfAddresses.append(len(setje)) 
longTermCount = len(set(longTermAddresses))                
print("Long term persistent addresses (>10 min)= " + str(longTermCount))
average = 0            
maxN = 0
for addresses in AddressesInSec:            
    average += len(addresses) 
    maxN = max(len(addresses), maxN)
average /= len(AddressesInSec)   
print("averageNumberOfAddressesPerSecondAtConcert " + str(average))
print(maxN)


In [None]:
#drawing how many addresses per second are visible
import matplotlib.pyplot as plt
plt.plot(numberOfAddresses)
plt.ylabel('addresses present')
plt.xlabel('sec')
axes = plt.gca()
axes.set_xlim([0,33000])
axes.set_ylim([0,max(numberOfAddresses)])
plt.show()

In [None]:
#not used at the moment
import matplotlib.pyplot as plt
plt.plot(numberOfAddresses)
plt.ylabel('addresses present')
plt.xlabel('min')
axes = plt.gca()
axes.set_xlim([0,500])
axes.set_ylim([0,max(numberOfAddresses)])
plt.show()

In [None]:
n_addresses_staying_this_much = []
maxDwellTime = max(DwellTimeDuringConcert.values())
print("maxDwellTime during the whole day=" + str(maxDwellTime) + " minutes")
for i in range(maxDwellTime + 1):   
    n_addresses_staying_this_much.append(0)
    
for address in DwellTime.keys():  
    if LastTimeSeen[address] < concertFinishedTimestamp:
        n_addresses_staying_this_much[DwellTime[address]] +=1
#for i in range(0,470):
   # print(str(n_addresses_staying_this_much[i]))    
n_addresses_staying_less_than_10min =  sum(n_addresses_staying_this_much[:10])

n_addresses_staying_more_than_10min = numberOfAdressesAtConcert - n_addresses_staying_less_than_10min


print("number of addresses staying less than 10 min during concert hours  = " + str(n_addresses_staying_less_than_10min))
print("number of addresses staying more than 10 min during concert hours  = " + str(n_addresses_staying_more_than_10min))
 

In [None]:
#plotting dwell time distribution
import matplotlib.pyplot as plt
plt.plot(n_addresses_staying_this_much)
plt.ylabel('number of addresses 05 july')
plt.xlabel('dwell time in minutes')
axes = plt.gca()
axes.set_xlim([10,470])
axes.set_ylim([0,200])
#axes.set_ylim([0,max(n_addresses_staying_this_much)])
plt.show()

In [None]:
106/8

In [None]:
Randomized = dict()
PersistentRandomized = dict()
count0 = 0
count1 = 0
for line in json_lines:
    address = line["value"]["sourceMac"]
    if line["value"]["trackeeHistory"]["localMac"] == 1 :
        count1 +=1
        Randomized[address] = 1
        if DwellTime[address] > 10: 
            if LastTimeSeen[address] < concertFinishedTimestamp:
                PersistentRandomized[address] = 1 
    else:
        count0 +=1
        Randomized[address] = 0
        if DwellTime[address] > 10: 
            if LastTimeSeen[address] < concertFinishedTimestamp:
                PersistentRandomized[address] = 0 

zeros=0
ones=0
zerosPersistent = 0
onesPersistent = 0
for key in Randomized.keys():
    if Randomized[key]==0:
        zeros +=1
    else:
        ones +=1
for key in PersistentRandomized.keys():
    if PersistentRandomized[key]==0:
        zerosPersistent +=1
    else:
        onesPersistent +=1        
print("total number of lines with localMac == 1: " + str(count1))
print("total number of lines with localMac == 0: " + str(count0))
print("total number of addresses with localMac == 1: " + str(ones))
print("total number of addresses with localMac == 0: " + str(zeros))
print("total number of persistent addresses with localMac == 1: " + str(onesPersistent))
print("total number of persistent addresses with localMac == 0: " + str(zerosPersistent))
      


In [None]:
483+9769
    

In [None]:
#double-check:another approach to computing visible addresses per second
Visible = dict()
for line in json_lines:
    address = line["value"]["sourceMac"]
    timeSec = int(math.floor((getmeasurementTimestamp(line)- minTime)/1000))
    if timeSec in Visible.keys():        
        Visible[timeSec].append(address)
    else:
        Visible[timeSec] = []
        Visible[timeSec].append(address)

maxSec = max(Visible.keys())    
print(maxSec)
for sec in range(0,maxSec):
    if sec not in Visible.keys():
        Visible[sec] = []
NVisible = []
for sec in range(0,86437):
    NVisible.append(len(set(Visible[sec])))
    

In [None]:

86437/60

In [None]:
#drawing how many addresses per second are visible
import matplotlib.pyplot as plt
plt.plot(NVisible)
plt.ylabel('addresses present')
plt.xlabel('sec')
axes = plt.gca()
axes.set_xlim([0,30000])
axes.set_ylim([0,max(NVisible)])
plt.show()

In [53]:
######################################################################## packet rate analysis
import fileinput
json_lines_raw = []
for line in fileinput.input(['arena/2015-07-05_raw.json']):
    jsline = json.loads(line)
    data = []
    data.append(jsline["value"]["sourceMac"])
    data.append(jsline["value"]["localMac"])
    data.append(jsline["measurementTimestamp"])
    data.append(jsline["value"]["droneId"])
    json_lines_raw.append(data)
print(json_lines_raw[0])
        
      

['32bf72d3-d707-4c37-b9b5-6318187de63f', 0, 1436047299132, '107']


In [55]:
numberOfMeasurementsForDrone = dict()
for line in json_lines_raw:
    droneId = line[3]
    if droneId not in numberOfMeasurementsForDrone.keys():
        numberOfMeasurementsForDrone[droneId] = 0
    numberOfMeasurementsForDrone[droneId] +=1    
maxMeasurements = 0
maxDroneId = 0
for droneId in numberOfMeasurementsForDrone.keys():
    if numberOfMeasurementsForDrone[droneId] > maxMeasurements:
        maxMeasurements = numberOfMeasurementsForDrone[droneId]
        maxDroneId = droneId
print(maxDroneId)
print(maxMeasurements)

112
2920472


In [71]:
timestamps112 = dict()
for line in json_lines_raw:
    if line[3]=="112":
        address = line[0]
        if address not in timestamps112.keys():
            timestamps112[address] = []        
        timestamps112[address].append(line[2])
for address in timestamps112.keys():
    timestamps112[address].sort()    

In [3]:
timestamps = dict()
localMac = dict()
for line in json_lines_raw:
    address = line[0]
    if address not in timestamps.keys():
        timestamps[address] = []        
    timestamps[address].append(line[2])
    if address not in localMac.keys():   
        localMac[address] = line[1]
        

In [4]:
for address in timestamps.keys():
    timestamps[address].sort()

In [32]:
delays = dict()
for address in timestamps.keys():
    if address not in delays.keys():
        delays[address] = []
    oldTimeStamp = 0        
    for timestamp in timestamps[address]:
        if oldTimeStamp > 0:
            delay = (timestamp - oldTimeStamp)
            delays[address].append(delay)
        oldTimeStamp = timestamp
                            

In [72]:
delays112 = dict()
for address in timestamps112.keys():
    if address not in delays112.keys():
        delays112[address] = []
    oldTimeStamp = 0        
    for timestamp in timestamps112[address]:
        if oldTimeStamp > 0:
            delay = (timestamp - oldTimeStamp)
            delays112[address].append(delay)
        oldTimeStamp = timestamp

In [42]:
print(json_lines_raw[30000])
print(len(delays))
address1 = '32bf72d3-d707-4c37-b9b5-6318187de63f'
address2 = '3be779a3-1e1a-4b79-8ad0-c555f5124e7c'
print(localMac[address1])
print(localMac[address2])
print(delays[address1])
print(delays[address2])

['3be779a3-1e1a-4b79-8ad0-c555f5124e7c', 0, 1436047321397]
126629
0
0
[9, 2, 31, 641, 0, 91, 21895, 94, 10, 98, 248791, 123, 6, 0, 2, 2, 3, 39, 34615, 41, 86, 41, 1, 42, 7, 9036, 41, 41, 55, 298, 100, 510, 61, 9, 16, 35, 6937, 43, 128, 1, 2, 558, 334, 8892, 43, 3, 207, 6, 17770, 65, 4, 1, 1, 6, 772, 33133, 1, 3, 8, 25, 2, 15, 65854, 0, 2, 11, 22, 107721, 33, 8, 1, 0, 41, 6, 5, 1843, 69, 80, 81, 1, 1, 6, 11, 2966, 3, 37, 2, 5, 0, 7, 4729, 7015, 20884, 7, 30776, 88032, 16495, 2138, 83102, 7, 19998, 0, 10394, 22178, 10, 43590, 13, 54991, 92, 7, 1, 4, 62679, 47048, 98150, 44253, 36885, 6, 7338, 2917, 6, 5589, 729, 58099, 21177, 7, 12, 24508, 54235, 36018, 13821, 83711, 5, 45959, 581848, 547, 119, 18, 6197, 22, 4709, 17, 38976, 8, 43, 31, 2, 3, 2, 35, 2, 2, 3, 0, 80483, 72, 2, 1, 4, 13, 3, 2, 2, 8571, 5, 2, 15, 13, 122, 718, 98857, 24, 42887, 31, 9, 32, 2, 1, 6, 34, 8265, 1, 2, 17, 1, 1, 17, 2, 1, 0, 6, 65, 18054, 20, 1, 2, 2, 3, 16, 2, 3, 86152, 42332, 234440, 44, 63605, 13, 24362, 3514, 1

In [34]:
#computing statistics of all addresses
import numpy as np
averageDelay= dict()
medianDelay = dict()
stderrDelay = dict()
for address in delays.keys():
    if address not in averageDelay.keys():
        if len(delays[address]) > 1:
            delaysArray = np.array(delays[address])
            averageDelay[address] = np.mean(delaysArray)
            medianDelay[address] = np.median(delaysArray)
            stderrDelay[address] = np.std(delaysArray)
            

In [44]:
#computing statistics of non-randomized addresses
averageDelay0= dict()
medianDelay0 = dict()
stderrDelay0 = dict()
for address in delays.keys():
    if localMac[address] == 0:
        if address not in averageDelay0.keys():
            if len(delays[address]) > 1:
                delaysArray = np.array(delays[address])
                averageDelay0[address] = np.mean(delaysArray)
                medianDelay0[address] = np.median(delaysArray)
                stderrDelay0[address] = np.std(delaysArray)

In [45]:
#computing statistics of randomized addresses
averageDelay1= dict()
medianDelay1 = dict()
stderrDelay1 = dict()
for address in delays.keys():
    if localMac[address] == 1:
        if address not in averageDelay0.keys():
            if len(delays[address]) > 1:
                delaysArray = np.array(delays[address])
                averageDelay1[address] = np.mean(delaysArray)
                medianDelay1[address] = np.median(delaysArray)
                stderrDelay1[address] = np.std(delaysArray)

In [88]:
#computing statistics of non-randomized addresses for drone 112 (most overloaded drone)
averageDelay0_112= dict()
medianDelay0_112 = dict()
stderrDelay0_112 = dict()
for address in delays112.keys():
    if localMac[address] == 0:
        if address not in averageDelay0_112.keys():
            if len(delays112[address]) > 1:
                delaysArray = np.array(delays112[address])
                averageDelay0_112[address] = np.mean(delaysArray)
                medianDelay0_112[address] = np.median(delaysArray)
                stderrDelay0_112[address] = np.std(delaysArray)
                
#computing statistics of randomized addresses for drone 112
averageDelay1_112= dict()
medianDelay1_112 = dict()
stderrDelay1_112 = dict()
for address in delays112.keys():
    if localMac[address] == 1:
        if address not in averageDelay0_112.keys():
            if len(delays112[address]) > 1:
                delaysArray = np.array(delays112[address])
                averageDelay1_112[address] = np.mean(delaysArray)
                medianDelay1_112[address] = np.median(delaysArray)
                stderrDelay1_112[address] = np.std(delaysArray)       
                if len(delays112[address]) > 50:
                    print(address)

c5b498f0-85f3-46c7-bcdf-bfc68cc3307d
b157483f-40a2-4b89-aeaf-2915fdcfd051
5dc76ac2-d690-4071-9bc2-1f2f6bf322ba
16d40756-8b33-4407-af05-6ce94d5426de
27f4ffa9-8067-492e-9117-a88efb1a70d5
ffb55990-8320-4508-a570-754f8f1ac109
551bcd10-3317-45bd-b3cc-1da8f49af335
381e14b4-ac20-4943-925f-4fc3f0292c91
ac910ed5-6937-40db-b196-5142129baee6
443200e5-5752-4e5e-92f4-00af19a293be
befe9d03-2463-4257-a125-c8eb3678d172
0e9458db-92f8-40c0-9d87-cf5612c22aaf
1250424c-3989-4332-a82e-65cb3d69c5be
93a6d510-66be-4363-8fc8-04d44fede25d
ceb8f949-b81e-465c-a860-334a6cfa6623
9531accb-b185-438a-828c-ce560efabe9b
8001fdd4-ad4a-4b1d-becd-85e35c0873fe
5096cae8-d6a9-46a0-b60f-20f0f63cc72e
a5db01fb-d0f2-4b06-864f-4accc55c719e
474a4679-8d6a-4449-a389-318b894bdee1
edb59581-3ccc-41cc-b664-d5056365cf35
90d0860c-d4d1-416e-bc02-c1396f777a41
ae7eb61d-6eed-4b9b-bb81-4c7cedb056ba
08df02a4-dd3b-46fb-989b-9f19938f4b87
17c0e320-d78f-41ee-9720-0b948985ad03
94533f64-1815-4bec-aadd-12a6861f43a6
26090b75-20e9-4c2c-9894-935275fbf796
0

In [75]:
#drawing the delays of some non-randomized address
import matplotlib.pyplot as plt
plt.plot(delays112['32bf72d3-d707-4c37-b9b5-6318187de63f'])
plt.ylabel('interarrival time (sec)')
plt.xlabel('packet')
#axes.set_ylim([0,200])
axes = plt.gca()

plt.show()

In [84]:

#drawing the delays of some randomized address
import matplotlib.pyplot as plt
plt.plot(delays112['7a43a795-b538-4563-b702-6f0256588479'])
plt.ylabel('interarrival time (sec)')
plt.xlabel('packet')
#axes.set_ylim([0,200])
axes = plt.gca()
print(delays112['7a43a795-b538-4563-b702-6f0256588479'])
plt.show()

[800, 44199, 31, 44966, 23, 721, 44255, 25, 19, 679, 44276, 23, 462, 44512, 737, 44261, 23, 447, 214, 44314, 24, 666]


In [89]:
print(delays112['443200e5-5752-4e5e-92f4-00af19a293be'])#a randomized address


[144882, 1390, 109, 1907, 453, 116, 562, 133927, 1666, 5170, 1128, 1028, 228, 111, 188, 374, 150, 522, 335, 714, 1534, 4045, 779, 901, 558, 905, 105, 1569, 92, 1263, 1229, 121, 1000, 450, 125, 1607, 402, 339, 328, 234, 230, 574279, 18344, 179882, 100, 1064, 3343, 680, 165, 279, 1690, 1008, 1122, 106, 347, 109, 82, 483, 446, 449, 3594, 1007, 3261, 170, 523, 204, 3466, 2611, 9, 1894, 1792, 1017, 551, 25915, 1048, 20, 503, 402, 2218, 935, 1685, 2038, 1336, 1340, 3036, 4160, 130, 19, 1046, 959, 2129, 449, 560, 97, 10, 1023, 1345, 113, 113, 17, 432, 2133, 1119, 3, 788, 677, 343, 213, 2179416, 226, 209, 685, 3027, 5731, 3469, 348, 565, 1124, 60, 724, 736, 48, 8, 56, 1194, 93, 562, 112, 669, 2025, 1911, 678, 103, 3040, 422, 17451, 903, 2698, 336, 114, 6852, 1117, 1014, 448, 451, 673, 682, 779, 2459, 579, 783, 794, 113, 107, 576, 208, 842, 395, 104, 25, 523, 14]


In [66]:
arr = np.array(delays['32bf72d3-d707-4c37-b9b5-6318187de63f'])

In [67]:
plt.hist(arr, bins = 500)
plt.ylabel('frequency')
plt.xlabel('delay')
axes.set_xlim([0,200000])

axes = plt.gca()

plt.show()

In [50]:
#plotting histogram of average delay for non-randomized
averagedelaysList = []
for address in averageDelay0.keys():
    averagedelaysList.append(averageDelay[address])
averagedelays = np.array(averagedelaysList)
plt.hist(averagedelays, bins = 2000)
plt.ylabel('frequency')
plt.xlabel('average packet delay for non-randomized addresses')

#axes.set_xlim([0,200000])

axes = plt.gca()

plt.show()

In [49]:
#plotting histogram of average delay for randomized
averagedelaysList = []
for address in averageDelay1.keys():
    averagedelaysList.append(averageDelay[address])
averagedelays = np.array(averagedelaysList)
plt.hist(averagedelays, bins = 2000)
plt.ylabel('frequency')
plt.xlabel('average packet delay for randomized addresses')

#axes.set_xlim([0,200000])

axes = plt.gca()

plt.show()

In [51]:
def plotHistogramOfDictionary(dictionary, xlabel, ylabel, nbins):
    dictionaryList = []
    for address in dictionary.keys():
        dictionaryList.append(dictionary[address])
    dictArray = np.array(dictionaryList)
    plt.hist(dictArray, bins = nbins)
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    axes = plt.gca()
    plt.show()

#def plotHistogramOfDictionaryFilteredByDroneId(dictionary, ylabel, xlabel, nbins, droneId)
#    for address in dictionary.keys():
#        if 


In [91]:
plotHistogramOfDictionary(averageDelay0, 'freq', 'average packet delay for non-randomized addresses ',2000 )

In [None]:
plotHistogramOfDictionary(medianDelay0_112, 'freq', 'median packet delay for non-randomized addresses for dron 112',2000 )

In [92]:
plotHistogramOfDictionary(stderrDelay0_112, 'freq', 'stdev packet delay for non-randomized addresses for dron 112',2000 )