In [1]:
import pymongo
from pymongo import MongoClient
import datetime
import dateutil.parser
import json
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import requests
from scipy.misc import imread
from StringIO import StringIO
import math
import pytz
import helpers
import sys
# Since I moved most of the functions into their own file I haven't cleaned up the imports section. 
# There is probably unused imports here, but I'd imagine that it's not too much of a problem.

In [2]:
%matplotlib inline
plt.rcParams['figure.figsize'] = (20, 10)

# Get the data we need

To do this analysis, we need to know about the stickers, base stations and detections.

Stickers and base stations are static data, they live in google spreadsheets.

Detections is from the MongoDB database.

---

# Fixed locations

## Stickers
The stickers are the locating items in the space. They are stuck to the ceiling at roughly 2m intervals.

TODO: put in the gif of me putting up a sticker and an image of one.

They have a QR code on them, so they can be scaned with a phone. That gives us a time stamp and a location.

In future we should be able to get orientation and position in field of view because QR codes have a top.

In [3]:
stickerLocations = helpers.getDFfromCSVURL("https://docs.google.com/spreadsheets/d/1sijQZR2iFLo2FS_3r5gbsuAkaglRz557LWjdLxnPkpE/pub?gid=0&single=true&output=csv")
stickerLocations = stickerLocations.drop(["raw","z"], 1) # this is just because we aren't on multiple floors
stickerLocations.head(3)

Unnamed: 0,stickerID,x,y
0,1,63852.0944,-13561.61778
1,2,62985.85359,-11830.46044
2,3,62985.85359,-9830.460444


## Base Station (Agent) Locations

We use the term agent and base station interchangably at the moment. In the future we will probably use agent to mean a simulated occupant, so expect to see base station used more.

TODO: put in a base image

These are the Raspberry Pi computers that recieve the beacon packets.

In [4]:
baseStationData = helpers.getDFfromCSVURL("https://docs.google.com/spreadsheets/d/167pxh_NRq5e9sQV9Zb8Z-Q-kdN7Zy9FCzCp47eVXTuE/pub?gid=1446311565&single=true&output=csv")

#only keep the columns we care about
baseStationData = baseStationData.ix[:,0:5]
baseStationData = baseStationData.drop(["location"], 1)
baseStationData = baseStationData[baseStationData.agentId > 0] #not really sure how this works, maybe failure is falsy?
baseStationData.head(3)

Unnamed: 0,agentId,agentName,x,y
0,00000000e5bd0ae9,Ace,2953.206017,14427.45886
1,000000006c1a3b5a,Alfa,3323.405966,9086.502512
2,0000000020294bbd,Beer,3313.076869,2665.631392


# variable data

## Known Locations

These are generated by me walking around wearing my badge. I scan each sticker as I pass under it. Because the stickers are so close, it's hard for me to take an ambiguous route between them.

The data sets here are test sets. 

TODO: replace these test sets with a series of 5 minute journeys in an array of URLs.

In [5]:
trainingWalkURLs = [
 "https://docs.google.com/spreadsheets/d/1v20slmW6nz8GnA3u8cbWBYpTuT1UwaLHFSm8AYNNr8E/pub?gid=649191784&single=true&output=csv",
 "https://docs.google.com/spreadsheets/d/10QiLgS-oBha4mwwJlzrZb-8BZHb3MPJ7Q3FIrPuTpUA/pub?gid=1669457584&single=true&output=csv",
 "https://docs.google.com/spreadsheets/d/1bU-juDRDXl0mVWGCsPvTppaOf7td51R2cP1P6kxjAk4/pub?gid=1291732418&single=true&output=csv",
 "https://docs.google.com/spreadsheets/d/12YyeEIds7RSKfF1NVLqy3x8F_hidV3lhNwfsJ8UuBRE/pub?gid=2131252797&single=true&output=csv"
]

## Get the data and format it ready for use

Get all the data from the spreadsheets.

Give known positions time and coordinates

To make things easier to process later I've joined the coords to the sticker detections

`sydTime` is needed because my phone records the sticker detection times in Sydney local time, but everything else is in UTC. It's not used again unless it's needed for debugging. If datlight saving changes, uncomment and check if it works properly. (I hate timezones!)

In [6]:
trainingWalks = helpers.getTrainingWalkData(trainingWalkURLs, stickerLocations)

0: 2016-11-02 00:28:58+00:00 TO 2016-11-02 00:46:47+00:00 (0 days 00:17:49) 221 rows
1: 2016-11-06 02:20:09+00:00 TO 2016-11-06 02:22:17+00:00 (0 days 00:02:08) 28 rows
2: 2016-11-12 04:31:04+00:00 TO 2016-11-12 04:51:12+00:00 (0 days 00:20:08) 231 rows
3: 2016-11-09 05:13:43+00:00 TO 2016-11-09 05:20:47+00:00 (0 days 00:07:04) 76 rows


In [7]:
liveSet = trainingWalks[2]
liveSet.head()

Unnamed: 0,stickerID,x,y,time
230,163,37113.33215,14324.31273,2016-11-12 04:31:04+00:00
229,162,37113.33215,12324.31273,2016-11-12 04:31:09+00:00
228,161,37113.33215,10324.31273,2016-11-12 04:31:14+00:00
227,160,37113.33215,8324.312731,2016-11-12 04:31:18+00:00
226,159,36875.44152,6455.449251,2016-11-12 04:31:26+00:00


In [9]:
# t=liveSet[:2].apply(nSecondWindows, axis=1) ## can't for the life of me make this work. Maybe post SO question eventually?

detections = []
for index, row in liveSet.iterrows():
    detections.append( helpers.nSecondWindows(row, baseStationData, personID=304, chunkLength=5.0, chatty=False) )

liveSet["detections"] = detections


In [10]:
liveSet.head()

Unnamed: 0,stickerID,x,y,time,detections
230,163,37113.33215,14324.31273,2016-11-12 04:31:04+00:00,minor rssi tim...
229,162,37113.33215,12324.31273,2016-11-12 04:31:09+00:00,minor rssi tim...
228,161,37113.33215,10324.31273,2016-11-12 04:31:14+00:00,minor rssi tim...
227,160,37113.33215,8324.312731,2016-11-12 04:31:18+00:00,"no detections found, deal with this later"
226,159,36875.44152,6455.449251,2016-11-12 04:31:26+00:00,minor rssi ti...


## Detections

This is to get the detection data associated with each journey. This was a _huge_ struggle to get. There is so much data that most attempts failed. It was solved by shaving five second slices off the data. Any more than that and the server choked. I'm not sure why, the data returns from queries on the server, but 5s worth returns almost immediatly, but more times out.

In [23]:
# t=liveSet[:2].apply(nSecondWindows, axis=1) ## can't for the life of me make this work. Maybe post SO question eventually?


detections = []
for index, row in liveSet.iterrows():
    detections.append( nSecondWindows(row, baseStationData, chatty=False) )

liveSet["detections"] = detections

NameError: global name 'robustRequest' is not defined

In [9]:
liveSet.head()

Unnamed: 0,stickerID,x,y,time,detections
2,88,55964.18531,15890.67113,2016-11-02 00:28:58+00:00,
1,88,55964.18531,15890.67113,2016-11-02 00:29:00+00:00,
4,87,53964.18531,15890.67113,2016-11-02 00:29:03+00:00,
220,86,53236.03541,17162.52123,2016-11-02 00:29:09+00:00,
219,175,51770.01103,15890.67113,2016-11-02 00:29:12+00:00,


In [10]:
liveSet.detections[0].head()

AttributeError: 'NoneType' object has no attribute 'head'

## Preview Walk

In [None]:
savePlace = 'frames/test1/'
helpers.clearFolder("/"+savePlace)
#this takes quite a long time if you have a lot of frames
startStopwatch = datetime.datetime.now()
print startStopwatch

for index, row in liveSet.iterrows():
    if index%10==0:
        print index, datetime.datetime.now() - startStopwatch
    helpers.timeSliced(row, savePlace, liveSet, baseStationData, stickerLocations)

print "That took", datetime.datetime.now() - startStopwatch

In [None]:
helpers.makeGif(savePlace + "*.png", "SampleWalk4")

![](SampleWalk4.gif)

![](SampleWalk3.gif)

## straight path

![](SampleWalk1.gif)

## not straight path
![](SampleWalk2.gif)