In [1]:
import pandas as pd
import numpy as np

In [2]:
from pymongo import MongoClient



def _connect_mongo(host, port, db):
    mongo_str = 'mongodb://%s:%s/%s' % (host, port, db)
    client = MongoClient(mongo_str)
    return client[db]

def read_mongo(db, collection, query={}, host = 'localhost', port = 27017, no_id = True):
    """Read from MongoDB and store into DataFrame"""

    # Connect to MongoDB
    db = _connect_mongo(host = host, port = port, db = db)

    # Query to specific DB and collection
    collection = db[collection].find(query)

    # Create DataFrame
    global df
    df = pd.DataFrame(list(collection))

    # Delete the _id
    if no_id:
        del df['_id']

read_mongo('workshopdb','zapis1',{}, '192.168.84.17')


In [3]:
df.head(10)

Unnamed: 0,deviceAddress,proximity,rssi,scanType,sourceId,timestamp,trackingId
0,e2:02:00:04:d7:40,FAR,-103,BLE,rNt0R,1505214866,mabs
1,e2:02:00:04:d7:40,NEAR,-74,BLE,HNenF,1505214865,mabs
2,e2:02:00:04:d7:40,FAR,-103,BLE,rNt0R,1505214866,mabs
3,e2:02:00:04:d7:40,FAR,-103,BLE,rNt0R,1505214866,mabs
4,e2:02:00:04:d7:40,FAR,-96,BLE,rNt0R,1505214873,mabs
5,e2:02:00:04:d7:40,NEAR,-73,BLE,HNenF,1505214873,mabs
6,e2:02:00:04:d7:40,FAR,-96,BLE,ZN6Xd,1505214872,mabs
7,e2:02:00:04:d7:40,FAR,-88,BLE,9McaT,1505214873,mabs
8,e2:02:00:04:d7:40,FAR,-96,BLE,rNt0R,1505214873,mabs
9,e2:02:00:04:d7:40,NEAR,-73,BLE,HNenF,1505214873,mabs


In [4]:
# Create new df with only timestamp, sourceId and rssi
df_important = df[['timestamp','sourceId','rssi']].copy()

# Print some info about this df
df_important.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9779 entries, 0 to 9778
Data columns (total 3 columns):
timestamp    9779 non-null int64
sourceId     9779 non-null object
rssi         9779 non-null int64
dtypes: int64(2), object(1)
memory usage: 191.0+ KB


In [9]:
print("Number of rows: " + str(df_important.shape[0]))
print("Number of columns: " + str(df_important.shape[1]))

Number of rows: 9779
Number of columns: 3


In [13]:
# Delete duplicates of records where is same timestamp and sourceId
# Gateways send data to the cloud only if the RSSI changes. 
# If there were no changes in RSSI, MQTT takes the last record and print it, that is why timestamp do not change.
df_important_filtered = df_important.drop_duplicates(['timestamp', 'sourceId'])

# Get info about this data
df_important_filtered.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1778 entries, 0 to 9776
Data columns (total 3 columns):
timestamp    1778 non-null int64
sourceId     1778 non-null object
rssi         1778 non-null int64
dtypes: int64(2), object(1)
memory usage: 48.6+ KB


In [14]:
# Print first 10 elements
df_important_filtered.head(10)

Unnamed: 0,timestamp,sourceId,rssi
0,1505214866,rNt0R,-103
1,1505214865,HNenF,-74
4,1505214873,rNt0R,-96
5,1505214873,HNenF,-73
6,1505214872,ZN6Xd,-96
7,1505214873,9McaT,-88
25,1505214880,8JRGb,-84
29,1505214882,9McaT,-103
36,1505214887,QQhDc,-89
41,1505214894,9McaT,-93


In [54]:
def rssi_to_distance(rssi):

    ### Declare local variable TxPower
    # This is value of rssi @ 1m
    TxPower = -65
    ratio = rssi * 1.0 / TxPower
    
    # If rssi was 0
    if (rssi == 0):
      return -1
    
    if (ratio < 1.0):
        return ratio**10
    else:
      dist = (0.89976) * (ratio ** 7.7095) + 0.111
    return dist

In [25]:
# Test
rssi_to_distance(-77)

3.432784121672608

In [48]:
df_important_filtered["dist"] = df_important_filtered.apply(lambda row: rssi_to_distance(row['rssi']), axis=1)

df_important_filtered.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,timestamp,sourceId,rssi,dist
0,1505214866,rNt0R,-103,31.403463
1,1505214865,HNenF,-74,2.556193
4,1505214873,rNt0R,-96,18.299263
5,1505214873,HNenF,-73,2.312703
6,1505214872,ZN6Xd,-96,18.299263
7,1505214873,9McaT,-88,9.410515
25,1505214880,8JRGb,-84,6.607842
29,1505214882,9McaT,-103,31.403463
36,1505214887,QQhDc,-89,10.256966
41,1505214894,9McaT,-93,14.350359


In [53]:
# Sort by timestamp
df_important_filtered_sorted = df_important_filtered.sort_values('timestamp', ascending = True)

df_important_filtered_sorted.head(10)

Unnamed: 0,timestamp,sourceId,rssi,dist
1,1505214865,HNenF,-74,2.556193
0,1505214866,rNt0R,-103,31.403463
6,1505214872,ZN6Xd,-96,18.299263
4,1505214873,rNt0R,-96,18.299263
5,1505214873,HNenF,-73,2.312703
7,1505214873,9McaT,-88,9.410515
25,1505214880,8JRGb,-84,6.607842
29,1505214882,9McaT,-103,31.403463
36,1505214887,QQhDc,-89,10.256966
41,1505214894,9McaT,-93,14.350359
