In [1]:
!pip install pickle5

Collecting pickle5
  Downloading pickle5-0.0.11.tar.gz (132 kB)
[K     |████████████████████████████████| 132 kB 1.3 MB/s 
[?25hBuilding wheels for collected packages: pickle5
  Building wheel for pickle5 (setup.py) ... [?25l- \ | / - done
[?25h  Created wheel for pickle5: filename=pickle5-0.0.11-cp37-cp37m-linux_x86_64.whl size=245902 sha256=7e4339467247ea42425d9050083812087de72c6118d446d3b9065ce91da1ac12
  Stored in directory: /root/.cache/pip/wheels/7e/6a/00/67136a90d6aca437d806d1d3cedf98106e840c97a3e5188198
Successfully built pickle5
Installing collected packages: pickle5
Successfully installed pickle5-0.0.11


In [2]:
import glob
import pickle5 as pickle
import numpy as np
import pandas as pd
from tqdm import tqdm
from scipy import interpolate
import matplotlib.pyplot as plt

In [3]:
modelOutputDir = '../input/idln-temp-files-version-1/MLP_v7_Results/MLP_v7_Results'
modelWiFiOOF_files = sorted(glob.glob(f"{modelOutputDir}/*_OOF.pickle"))
wayPointData_trainPath = '../input/idln-temp-files-version-1/wayPointData_train.pickle'

## Helper functions

In [4]:
def getBuildingName(inputFilePath):
    return inputFilePath.split('/')[-1].split('_')[0]

def getModelName(inputFilePath):
    temp = inputFilePath.split('/')[-1].split('_')
    return temp[1] + '_' + temp[2]

def getDfRmse(OOF_preds):
    return np.sqrt(np.square(OOF_preds['x_preds'] - OOF_preds['x_tgt']) + \
                   np.square(OOF_preds['y_preds'] - OOF_preds['y_tgt'])).mean()

def getBuildingWayPointData_train(buildingName : str):
    ## read waypoint data
    with open(wayPointData_trainPath, 'rb') as inputFile:
        wayPointData_train = pickle.load(inputFile)
        
    ## shortlist the building data
    buildingWayPointData = wayPointData_train[wayPointData_train['building'] == buildingName]
    print(f"buidling waypoint data shape = {buildingWayPointData.shape}")
    buildingWayPointData = buildingWayPointData.sort_values(by=['path','timestamp']);
    return buildingWayPointData

def commonPaths_WiFiOOF_WaypointData(buildingWifiOOF, buildingWayPointData):
    wifiPaths = sorted(buildingWifiOOF['path'].unique().tolist())
    waypointPaths = sorted(buildingWayPointData['path'].unique().tolist())
    commonPaths = sorted(list(set(wifiPaths).intersection(set(waypointPaths))))
    return commonPaths

In [5]:
def generate_WayPointPredictions(buildingName, modelName, buildingWifiOOF, buildingWayPointData):
    """
    Function to calculate waypoint positions from postions predicted using waypoint features
    Piecewise linear fit to waypoint timestamps from input wifi timestamps
    
    Extrapolation function reference
    --------------------------------
    https://stackoverflow.com/questions/2745329/how-to-make-scipy-interpolate-give-an-extrapolated-result-beyond-the-input-range
    https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.interp1d.html#:~:text=Interpolate%20a%201%2DD%20function,the%20value%20of%20new%20points.
    """
    
    ## placeholder for storing output
    timestamp = []
    interp_x = []
    interp_y = []
    x_groundTruth = []
    y_groundTruth = []
    pathList = []

    ## certain path files may not have wifi data, so considering only common path files
    commonPaths = commonPaths_WiFiOOF_WaypointData(buildingWifiOOF, buildingWayPointData)

    for path in commonPaths:
        ## print(f"path = {path}")
        pathWifiData = buildingWifiOOF[buildingWifiOOF['path'] == path]
        pathWaypointData = buildingWayPointData[buildingWayPointData['path'] == path]

        ## interpolate to get waypoint data
        """if len(pathWifiData) > 1:
            ## method2 using scipy interpolate.interp1d function 
            xfit = interpolate.interp1d(pathWifiData['timestamp'], pathWifiData['x_preds'], kind ='slinear', fill_value='extrapolate')
            yfit = interpolate.interp1d(pathWifiData['timestamp'], pathWifiData['y_preds'], kind ='slinear', fill_value='extrapolate')
            interpolatedXCoordinates = xfit(pathWaypointData['timestamp'])
            interpolatedYCoordinates = yfit(pathWaypointData['timestamp'])
        else:
        """
        ## method1 using np.interp function
        interpolatedXCoordinates = np.interp(pathWaypointData['timestamp'], pathWifiData['timestamp'], pathWifiData['x_preds'])
        interpolatedYCoordinates = np.interp(pathWaypointData['timestamp'], pathWifiData['timestamp'], pathWifiData['y_preds'])        

        timestamp.append(pathWaypointData['timestamp'])
        interp_x.append(interpolatedXCoordinates)
        interp_y.append(interpolatedYCoordinates)
        x_groundTruth.append(pathWaypointData['x'])
        y_groundTruth.append(pathWaypointData['y'])
        pathList.append([path] * len(interpolatedXCoordinates))

    timestamp = np.concatenate(timestamp, axis=0)
    interp_x = np.concatenate(interp_x, axis=0)
    interp_y = np.concatenate(interp_y, axis=0)    
    x_groundTruth = np.concatenate(x_groundTruth, axis=0)    
    y_groundTruth = np.concatenate(y_groundTruth, axis=0)    
    pathList = np.concatenate(pathList, axis=0)    

    modelWayPointPredictions = pd.DataFrame({'timestamp': timestamp, 'x_preds' : interp_x, 'y_preds': interp_y, 
                                             'x_tgt': x_groundTruth, 'y_tgt': y_groundTruth, 'path': pathList})
    modelWayPointPredictions.to_pickle(f"{buildingName}_{modelName}_wayPointPreds_train.pickle")
    return modelWayPointPredictions

In [6]:
modelName = getModelName(modelWiFiOOF_files[0])
buildingNameList = []
buildingOOFScoreList = []
buildingWayPtPredsScoreList = []

for buildingOOFPath in tqdm(modelWiFiOOF_files):
    ## get building name
    buildingName = getBuildingName(buildingOOFPath)
    
    ## read wifi OOF 
    buildingWifiOOF = pd.read_pickle(buildingOOFPath)
    buildingWifiOOF = buildingWifiOOF.sort_values(by=['path','timestamp']);
    buildingOOFScore = getDfRmse(buildingWifiOOF)
    
    ## read building waypoint data train
    buildingWayPointData = getBuildingWayPointData_train(buildingName)
    
    ## interpolate and predict waypoint positiosn
    modelWayPointPredictions = generate_WayPointPredictions(buildingName, modelName, buildingWifiOOF, buildingWayPointData)
    buildingWayPtPredsScore = getDfRmse(modelWayPointPredictions)
    
    buildingNameList.append(buildingName)
    buildingOOFScoreList.append(buildingOOFScore)
    buildingWayPtPredsScoreList.append(buildingWayPtPredsScore)

  0%|          | 0/6 [00:00<?, ?it/s]

buidling waypoint data shape = (2491, 5)


 17%|█▋        | 1/6 [00:01<00:09,  1.97s/it]

buidling waypoint data shape = (1691, 5)


 33%|███▎      | 2/6 [00:03<00:07,  1.76s/it]

buidling waypoint data shape = (897, 5)


 50%|█████     | 3/6 [00:03<00:03,  1.13s/it]

buidling waypoint data shape = (1008, 5)


 67%|██████▋   | 4/6 [00:04<00:02,  1.01s/it]

buidling waypoint data shape = (3119, 5)


 83%|████████▎ | 5/6 [00:08<00:01,  1.91s/it]

buidling waypoint data shape = (3613, 5)


100%|██████████| 6/6 [00:09<00:00,  1.64s/it]


```python
## plt.plot(pathWaypointData['x'], pathWaypointData['y'], label='waypoint Data')
plt.plot(pathWifiData['x_preds'], pathWifiData['y_preds'], label='wifi preds')
plt.plot(interpolatedXCoordinates, interpolatedYCoordinates, label='interp wayPt Data')
plt.legend()
plt.grid(True)
plt.show()
```

In [7]:
modelWayPointResults = pd.DataFrame({'buildingName': buildingNameList, 'OOFScore': buildingOOFScoreList, 'wayPtScore': buildingWayPtPredsScoreList})
modelWayPointResults

Unnamed: 0,buildingName,OOFScore,wayPtScore
0,5a0546857ecc773753327266,8.771246,8.90819
1,5c3c44b80379370013e0fd2b,10.905769,10.801153
2,5d27099f03f801723c32511d,8.440701,8.619501
3,5d2709a003f801723c3251bf,7.574697,7.589286
4,5d2709b303f801723c327472,9.267596,10.075583
5,5d2709bb03f801723c32852c,12.517473,13.204245


In [8]:
modelWayPointResults.to_pickle(f"{modelName}_oof_wayPtScore_comparison.pickle")