In [1]:
import pymongo
from pymongo import MongoClient
import datetime
import dateutil.parser
import json
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import requests
from scipy.misc import imread
from StringIO import StringIO
import math
import pytz
import helpers as h
# Since I moved most of the functions into their own file I haven't cleaned up the imports section. 
# There is probably unused imports here, but I'd imagine that it's not too much of a problem.

In [2]:
%matplotlib inline
plt.rcParams['figure.figsize'] = (20, 10)

# Get the data we need

To do this analysis, we need to know about the stickers, base stations and detections.

Stickers and base stations are static data, they live in google spreadsheets.

Detections is from the MongoDB database.

---

# Fixed locations

## Stickers
The stickers are the locating items in the space. They are stuck to the ceiling at roughly 2m intervals.

TODO: put in the gif of me putting up a sticker and an image of one.

They have a QR code on them, so they can be scaned with a phone. That gives us a time stamp and a location.

In future we should be able to get orientation and position in field of view because QR codes have a top.

In [3]:
stickerLocations = h.getDFfromCSVURL("https://docs.google.com/spreadsheets/d/1sijQZR2iFLo2FS_3r5gbsuAkaglRz557LWjdLxnPkpE/pub?gid=0&single=true&output=csv")
stickerLocations = stickerLocations.drop(["raw","z"], 1) # this is just because we aren't on multiple floors
stickerLocations.head(3)

Unnamed: 0,stickerID,x,y
0,1,63852.0944,-13561.61778
1,2,62985.85359,-11830.46044
2,3,62985.85359,-9830.460444


## Base Station (Agent) Locations

We use the term agent and base station interchangably at the moment. In the future we will probably use agent to mean a simulated occupant, so expect to see base station used more.

TODO: put in a base image

These are the Raspberry Pi computers that recieve the beacon packets.

In [4]:
baseStationData = h.getDFfromCSVURL("https://docs.google.com/spreadsheets/d/167pxh_NRq5e9sQV9Zb8Z-Q-kdN7Zy9FCzCp47eVXTuE/pub?gid=1446311565&single=true&output=csv")

#only keep the columns we care about
baseStationData = baseStationData.ix[:,0:5]
baseStationData = baseStationData.drop(["location"], 1)
baseStationData = baseStationData[baseStationData.agentId > 0] #not really sure how this works, maybe failure is falsy?
baseStationData.head(3)

Unnamed: 0,agentId,agentName,x,y
0,00000000e5bd0ae9,Ace,2953.206017,14427.45886
1,000000006c1a3b5a,Alfa,3323.405966,9086.502512
2,0000000020294bbd,Beer,3313.076869,2665.631392


# variable data

## Known Locations

These are generated by me walking around wearing my badge. I scan each sticker as I pass under it. Because the stickers are so close, it's hard for me to take an ambiguous route between them.

The data sets here are test sets. 

TODO: replace these test sets with a series of 5 minute journeys in an array of URLs.

In [5]:
def getSampleWalkData(url):
    firstRefTry = h.getDFfromCSVURL(url, columnNames=["stickerID","xstickerID","detectionType","xID","timeStamp"])
    return firstRefTry.drop(["xstickerID","detectionType","xID"], 1)

In [6]:
trainingWalkURLs = [
 "https://docs.google.com/spreadsheets/d/1uE_tUItRANypaWxCmZeXWsgFZq_JDG626v4Gg2UQfKg/pub?gid=952022876&single=true&output=csv",
 "https://docs.google.com/spreadsheets/d/1v20slmW6nz8GnA3u8cbWBYpTuT1UwaLHFSm8AYNNr8E/pub?gid=649191784&single=true&output=csv",
 "https://docs.google.com/spreadsheets/d/10QiLgS-oBha4mwwJlzrZb-8BZHb3MPJ7Q3FIrPuTpUA/pub?gid=1669457584&single=true&output=csv",
 "https://docs.google.com/spreadsheets/d/1bU-juDRDXl0mVWGCsPvTppaOf7td51R2cP1P6kxjAk4/pub?gid=1291732418&single=true&output=csv",
 "https://docs.google.com/spreadsheets/d/12YyeEIds7RSKfF1NVLqy3x8F_hidV3lhNwfsJ8UuBRE/pub?gid=2131252797&single=true&output=csv"
]

## Get the data and format it ready for use

Get all the data from the spreadsheets.

Give known positions time and coordinates

To make things easier to process later I've joined the coords to the sticker detections

`sydTime` is needed because my phone records the sticker detection times in Sydney local time, but everything else is in UTC. It's not used again unless it's needed for debugging. If datlight saving changes, uncomment and check if it works properly. (I hate timezones!)

In [7]:
trainingWalks = []
for i, twURL in enumerate(trainingWalkURLs):
    thisData = getSampleWalkData(twURL)
    richStickerData = thisData.merge(stickerLocations,on="stickerID")
    richStickerData['time'] = richStickerData.apply(lambda x: h.sydTimeToUTC(dateutil.parser.parse(x.timeStamp, dayfirst=True)) , axis=1)
#     richStickerData['sydtime'] = richStickerData.apply(lambda x: dateutil.parser.parse(x.timeStamp, dayfirst=True) , axis=1)
    richStickerData = richStickerData.drop(["timeStamp"], 1)
    richStickerData = richStickerData.sort_values("time", ascending=1)

    print "{}: {} TO {} ({}) {} rows".format(i,richStickerData.time.min(), 
                                 richStickerData.time.max(), 
                                 richStickerData.time.max() - richStickerData.time.min(),
                                 richStickerData.shape[0])
    
    trainingWalks.append(richStickerData)
    


0: 2016-10-17 00:21:32+00:00 TO 2016-10-17 00:23:50+00:00 (0 days 00:02:18) 23 rows
1: 2016-11-02 00:28:58+00:00 TO 2016-11-02 00:46:47+00:00 (0 days 00:17:49) 221 rows
2: 2016-11-06 02:20:09+00:00 TO 2016-11-06 02:22:17+00:00 (0 days 00:02:08) 28 rows
3: 2016-11-12 04:31:04+00:00 TO 2016-11-12 04:51:12+00:00 (0 days 00:20:08) 231 rows
4: 2016-11-09 05:13:43+00:00 TO 2016-11-09 05:20:47+00:00 (0 days 00:07:04) 76 rows


In [8]:
liveSet = trainingWalks[2]
liveSet.head()

Unnamed: 0,stickerID,x,y,time
27,72,42623.86053,29848.85705,2016-11-06 02:20:09+00:00
26,73,40623.86053,29848.85705,2016-11-06 02:20:15+00:00
25,52,38034.91657,28100.28274,2016-11-06 02:20:21+00:00
24,53,38034.91657,26100.28274,2016-11-06 02:20:26+00:00
23,54,38034.91657,24100.28274,2016-11-06 02:20:31+00:00


## Detections

This is to get the detection data associated with each journey. This was a _huge_ struggle to get. There is so much data that most attempts failed. It was solved by shaving five second slices off the data. Any more than that and the server choked. I'm not sure why, the data returns from queries on the server, but 5s worth returns almost immediatly, but more times out.

In [9]:
import signal
import sys

class timeout:
    def __init__(self, seconds=1, error_message='Timeout'):
        self.seconds = seconds
        self.error_message = error_message
    def handle_timeout(self, signum, frame):
        raise TimeoutError(self.error_message)
    def __enter__(self):
        signal.signal(signal.SIGALRM, self.handle_timeout)
        signal.alarm(self.seconds)
    def __exit__(self, type, value, traceback):
        signal.alarm(0)       

In [10]:
def robustRequest(request, headers, tries=10, timeoutSeconds=20):    
        response = False
        with timeout(seconds=timeoutSeconds):
            for i in range(tries):
                if not response:
                    try:
                        response = requests.request("GET", request, headers=headers)
                        return response
                    except:
                        if i == tries-1:
                            print "{}\nfailed {} times".format(request, i+1), sys.exc_info()[1]
                            return False

In [11]:
def nSecondWindows(stickerRecordingsRow, personID=304, chunkLength=5.0, chatty=True):
    try:
        startTime = datetime.datetime.now()

        row = stickerRecordingsRow
        halfWindow = datetime.timedelta(seconds=chunkLength/2.0)
        ws = row.time - halfWindow
        we = row.time + halfWindow
        url = "http://ec2-52-65-111-92.ap-southeast-2.compute.amazonaws.com:3000"
        headers = {'authorization': "Basic ",# + key,
                   'content-type':  "application/json",
                   'cache-control': "no-cache"}

        request = "{}/find?minor[]={}&windowstart={}&windowend={}".format(
            url, personID, ws.isoformat(), we.isoformat())

        windowDetectionsResponse = robustRequest(request, headers)

        if windowDetectionsResponse:

            responses = json.loads(windowDetectionsResponse.text)[u'results'][0]
            responses = pd.DataFrame.from_dict(responses)
            responses['temptime'] = responses.apply(lambda x: dateutil.parser.parse(x.time, dayfirst=True) , axis=1)
            responses = responses.drop(['time'], 1)
            responses = responses.rename(columns = {'temptime':'time'})
            responses = responses.sort_values("time", ascending=1)
            
            responses = pd.merge(responses,baseStationData,on=["agentId"])
            responses = responses.drop(["agentId"], 1)
            # rescale rssi between 0 and max in positive numbers. Bigger number means more powerful signal
            minPower = -100 #min(responses.rssi)
            responses['rssiAdj'] = responses.rssi - minPower

            if chatty:
                print "index: {}\n{}\nstart: {}\ndetection: {}\nend: {}\ntook: {}\nresult rows: {}\n".format(
                    row.name,row, ws, row.time, we, datetime.datetime.now() - startTime, responses.shape[0])
            
            return responses

        else:
            #the request returned false, so so will we
            return False
    except:
        print "OH FUCK"
        print sys.exc_info()
        print row

In [12]:
# t=liveSet[:2].apply(nSecondWindows, axis=1) ## can't for the life of me make this work. Maybe post SO question eventually?
detections = []
for index, row in liveSet.iterrows():
    detections.append( nSecondWindows(row, chatty=False) )

liveSet["detections"] = detections

In [13]:
liveSet.head()

Unnamed: 0,stickerID,x,y,time,detections
27,72,42623.86053,29848.85705,2016-11-06 02:20:09+00:00,minor rssi ti...
26,73,40623.86053,29848.85705,2016-11-06 02:20:15+00:00,minor rssi ti...
25,52,38034.91657,28100.28274,2016-11-06 02:20:21+00:00,minor rssi ti...
24,53,38034.91657,26100.28274,2016-11-06 02:20:26+00:00,minor rssi ti...
23,54,38034.91657,24100.28274,2016-11-06 02:20:31+00:00,minor rssi ti...


In [14]:
liveSet.detections[0].head()

Unnamed: 0,minor,rssi,time,agentName,x,y,rssiAdj
0,304,-64,2016-11-06 02:22:14.935000+00:00,Bravo,12030.63638,12272.73506,36
1,304,-76,2016-11-06 02:22:15.890000+00:00,Bravo,12030.63638,12272.73506,24
2,304,-68,2016-11-06 02:22:17.797000+00:00,Bravo,12030.63638,12272.73506,32
3,304,-65,2016-11-06 02:22:18.757000+00:00,Bravo,12030.63638,12272.73506,35
4,304,-64,2016-11-06 02:22:14.938000+00:00,Alfa,3323.405966,9086.502512,36


## Preview Walk

In [None]:
def timeSliced(fullRow, savePlace, contextDF, personID=304):
    df = fullRow.detections

    fig, ax = plt.subplots()
    
    timeFormatString = "%H:%M:%S"
    title = "Detections for person {} around {}".format(personID, fullRow.time.strftime(timeFormatString))
    plt.title(title)
    
    #--- general vvv
    
    #put the image on the graph as an underlay
    img = imread("map.png")
    ax.imshow(img, zorder=0, extent=[-1300, 72000, -24000, 41000])
    #                                [left,  right, bottom, top  ]
    
    #put the baseStations on the map 
    for index, row in baseStationData.iterrows():
        ax.text(row.x, row.y, row.agentName, fontsize=8)
    ax.scatter(baseStationData.x,baseStationData.y, marker='+')
    
    #Hide the axis numbers because they don't tell us much
    ax.xaxis.set_major_formatter(plt.NullFormatter())
    ax.yaxis.set_major_formatter(plt.NullFormatter())
    
    #--- specific to this animation vvv
    
    #the sticker labels
    for index, row in contextDF.iterrows():
        ax.text(row.x, row.y, "{0:03d}".format(row.stickerID), fontsize=8)
    
    #sticker dots
    ax.scatter(contextDF.x, contextDF.y, zorder=20)
    ax.scatter(stickerLocations.x,stickerLocations.y,s=1,c="y")
    
    #the path
    ax.plot(contextDF.x, contextDF.y, alpha=0.7, linewidth=1, c="g", solid_capstyle='round')
    
    #--- specific to this frame vvv
    
    # Plot all the detections onto the map as pale circles
    ax.scatter(df.x, df.y, zorder=1, s=abs(df.rssiAdj)*100, alpha=0.2)
    
    #plot the current window's dot bigger
    ax.scatter(fullRow.x, fullRow.y, zorder=30, s=150, c="r")

    #--- clean up vvv
    
    fig.savefig(savePlace+title+".png", bbox_inches='tight')
    fig.clf()
    plt.close()

In [17]:
savePlace = 'frames/test1/'
h.clearFolder("/"+savePlace)
#this takes quite a long time if you have a lot of frames
startStopwatch = datetime.datetime.now()
print startStopwatch

for index, row in liveSet.iterrows():
    if index%10==0:
        print index, datetime.datetime.now() - startStopwatch
    timeSliced(row, savePlace, liveSet)

print "That took", datetime.datetime.now() - startStopwatch

2016-11-14 18:32:01.795693
20 0:00:04.623798
10 0:00:11.151391
0 0:00:17.623760
That took 0:00:18.316100


In [19]:
h.makeGif(savePlace + "*.png", "SampleWalk4")

convert -delay 20 -loop 0 frames/test1/*.png SampleWalk4.gif


![](SampleWalk4.gif)

![](SampleWalk3.gif)

##straight path

![](SampleWalk1.gif)

##not straight path
![](SampleWalk2.gif)