## Indoor Localization - Capstone Project
Redo of Class 4 Task 3 in Python

by: Omar Chavez
Last Update

### Summary of Project:
Use different classification algorithms to discover where someone is inside a building based on the relative signal strength of wireless access points. The trainingData has been included to train the algorithms and can then test on the validationData

In [120]:
## Import all required modules
import numpy as np
import pandas as pd
import scipy
from math import sqrt
import matplotlib.pyplot as plt
import seaborn as sns

# SKLearn
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

# models
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

### Import Datasets

In [121]:
# Training Data
LocCompOOB = pd.read_csv("trainingdata.csv")
# Data to Make Predictions on
LocIncompOOB = pd.read_csv("validationData.csv") 

### Evaluate Datasets

#### Examine Training Dataset

In [122]:
LocCompOOB.head()

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP520,LONGITUDE,LATITUDE,FLOOR,BUILDINGID,SPACEID,RELATIVEPOSITION,USERID,PHONEID,TIMESTAMP
0,100,100,100,100,100,100,100,100,100,100,...,100,-7541.2643,4864921.0,2,1,106,2,2,23,1371713733
1,100,100,100,100,100,100,100,100,100,100,...,100,-7536.6212,4864934.0,2,1,106,2,2,23,1371713691
2,100,100,100,100,100,100,100,-97,100,100,...,100,-7519.1524,4864950.0,2,1,103,2,2,23,1371714095
3,100,100,100,100,100,100,100,100,100,100,...,100,-7524.5704,4864934.0,2,1,102,2,2,23,1371713807
4,100,100,100,100,100,100,100,100,100,100,...,100,-7632.1436,4864982.0,0,0,122,2,11,13,1369909710


In [123]:
LocCompOOB.describe()

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP520,LONGITUDE,LATITUDE,FLOOR,BUILDINGID,SPACEID,RELATIVEPOSITION,USERID,PHONEID,TIMESTAMP
count,19937.0,19937.0,19937.0,19937.0,19937.0,19937.0,19937.0,19937.0,19937.0,19937.0,...,19937.0,19937.0,19937.0,19937.0,19937.0,19937.0,19937.0,19937.0,19937.0,19937.0
mean,99.823644,99.820936,100.0,100.0,99.613733,97.130461,94.733661,93.820234,94.693936,99.163766,...,100.0,-7464.275947,4864871.0,1.674575,1.21282,148.429954,1.833024,9.068014,13.021869,1371421000.0
std,5.866842,5.798156,0.0,0.0,8.615657,22.93189,30.541335,33.010404,30.305084,12.634045,...,0.0,123.40201,66.93318,1.223078,0.833139,58.342106,0.372964,4.98872,5.36241,557205.4
min,-97.0,-90.0,100.0,100.0,-97.0,-98.0,-99.0,-98.0,-98.0,-99.0,...,100.0,-7691.3384,4864746.0,0.0,0.0,1.0,1.0,1.0,1.0,1369909000.0
25%,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,...,100.0,-7594.737,4864821.0,1.0,0.0,110.0,2.0,5.0,8.0,1371056000.0
50%,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,...,100.0,-7423.0609,4864852.0,2.0,1.0,129.0,2.0,11.0,13.0,1371716000.0
75%,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,...,100.0,-7359.193,4864930.0,3.0,2.0,207.0,2.0,13.0,14.0,1371721000.0
max,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,...,100.0,-7300.81899,4865017.0,4.0,2.0,254.0,2.0,18.0,24.0,1371738000.0


In [124]:
LocCompOOB.info

<bound method DataFrame.info of        WAP001  WAP002  WAP003  WAP004  WAP005  WAP006  WAP007  WAP008  WAP009  \
0         100     100     100     100     100     100     100     100     100   
1         100     100     100     100     100     100     100     100     100   
2         100     100     100     100     100     100     100     -97     100   
3         100     100     100     100     100     100     100     100     100   
4         100     100     100     100     100     100     100     100     100   
...       ...     ...     ...     ...     ...     ...     ...     ...     ...   
19932     100     100     100     100     100     100     100     100     100   
19933     100     100     100     100     100     100     100     100     100   
19934     100     100     100     100     100     100     100     100     100   
19935     100     100     100     100     100     100     100     100     100   
19936     100     100     100     100     100     100     100     100     100

#### Examine Incomplete Dataset

In [125]:
LocIncompOOB.head()

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP520,LONGITUDE,LATITUDE,FLOOR,BUILDINGID,SPACEID,RELATIVEPOSITION,USERID,PHONEID,TIMESTAMP
0,100,100,100,100,100,100,100,100,100,100,...,100,-7515.916799,4864890.0,1,1,0,0,0,0,1380872703
1,100,100,100,100,100,100,100,100,100,100,...,100,-7383.867221,4864840.0,4,2,0,0,0,13,1381155054
2,100,100,100,100,100,100,100,100,100,100,...,100,-7374.30208,4864847.0,4,2,0,0,0,13,1381155095
3,100,100,100,100,100,100,100,100,100,100,...,100,-7365.824883,4864843.0,4,2,0,0,0,13,1381155138
4,100,100,100,100,100,100,100,100,100,100,...,100,-7641.499303,4864922.0,2,0,0,0,0,2,1380877774


In [126]:
LocIncompOOB.describe()

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP520,LONGITUDE,LATITUDE,FLOOR,BUILDINGID,SPACEID,RELATIVEPOSITION,USERID,PHONEID,TIMESTAMP
count,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,...,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0,1111.0
mean,98.627363,100.0,99.665167,99.665167,100.0,100.0,100.0,94.29703,90.655266,98.811881,...,99.843384,-7529.197448,4864902.0,1.571557,0.758776,0.0,0.0,0.0,11.923492,1380604000.0
std,16.127245,0.0,7.888265,7.888607,0.0,0.0,0.0,32.148809,39.886991,14.931667,...,5.220261,120.209336,70.2728,1.001828,0.816139,0.0,0.0,0.0,6.556236,500322.2
min,-94.0,100.0,-87.0,-88.0,100.0,100.0,100.0,-99.0,-96.0,-98.0,...,-74.0,-7695.938755,4864748.0,0.0,0.0,0.0,0.0,0.0,0.0,1379578000.0
25%,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,...,100.0,-7637.4238,4864843.0,1.0,0.0,0.0,0.0,0.0,9.0,1380189000.0
50%,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,...,100.0,-7560.3763,4864915.0,1.0,1.0,0.0,0.0,0.0,13.0,1380873000.0
75%,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,...,100.0,-7420.539659,4864967.0,2.0,1.0,0.0,0.0,0.0,15.0,1380875000.0
max,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,...,100.0,-7299.786517,4865017.0,4.0,2.0,0.0,0.0,0.0,21.0,1381248000.0


In [127]:
LocIncompOOB.info

<bound method DataFrame.info of       WAP001  WAP002  WAP003  WAP004  WAP005  WAP006  WAP007  WAP008  WAP009  \
0        100     100     100     100     100     100     100     100     100   
1        100     100     100     100     100     100     100     100     100   
2        100     100     100     100     100     100     100     100     100   
3        100     100     100     100     100     100     100     100     100   
4        100     100     100     100     100     100     100     100     100   
...      ...     ...     ...     ...     ...     ...     ...     ...     ...   
1106     100     100     100     100     100     100     100     100     100   
1107     100     100     100     100     100     100     100     100     100   
1108     100     100     100     100     100     100     100     100     100   
1109     100     100     100     100     100     100     100     100     100   
1110     100     100     100     100     100     100     100     100     100   

      W

In [128]:
LocIncompOOB.dtypes

WAP001              int64
WAP002              int64
WAP003              int64
WAP004              int64
WAP005              int64
                    ...  
SPACEID             int64
RELATIVEPOSITION    int64
USERID              int64
PHONEID             int64
TIMESTAMP           int64
Length: 529, dtype: object

### Preprocess Datasets 

#### Dataset 1 - Completed Dataset

In [129]:
# Remove features that have no impact on location
LocCompOOB = LocCompOOB.drop("USERID",  1)
LocCompOOB = LocCompOOB.drop("TIMESTAMP",  1)
LocCompOOB = LocCompOOB.drop("PHONEID",  1)
LocCompOOB = LocCompOOB.drop("RELATIVEPOSITION",  1)

In [130]:
LocCompOOB

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE,FLOOR,BUILDINGID,SPACEID
0,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,-7541.2643,4.864921e+06,2,1,106
1,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,-7536.6212,4.864934e+06,2,1,106
2,100,100,100,100,100,100,100,-97,100,100,...,100,100,100,100,100,-7519.1524,4.864950e+06,2,1,103
3,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,-7524.5704,4.864934e+06,2,1,102
4,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,-7632.1436,4.864982e+06,0,0,122
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19932,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,-7485.4686,4.864875e+06,3,1,1
19933,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,-7390.6206,4.864836e+06,1,2,140
19934,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,-7516.8415,4.864889e+06,3,1,13
19935,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,-7537.3219,4.864896e+06,3,1,113


In [131]:
# Combine FLOOR and SPACEID into LOCATIONID to create dependent variable
LocCompOOB["LOCATIONID"] = LocCompOOB["SPACEID"].astype(str) + "-" + LocCompOOB["FLOOR"].astype(str)
# Remove FLOOR AND SPACEID
LocCompOOB = LocCompOOB.drop("FLOOR",  1)
LocCompOOB = LocCompOOB.drop("SPACEID",  1)

In [132]:
LocCompOOB

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE,BUILDINGID,LOCATIONID
0,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7541.2643,4.864921e+06,1,106-2
1,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7536.6212,4.864934e+06,1,106-2
2,100,100,100,100,100,100,100,-97,100,100,...,100,100,100,100,100,100,-7519.1524,4.864950e+06,1,103-2
3,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7524.5704,4.864934e+06,1,102-2
4,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7632.1436,4.864982e+06,0,122-0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19932,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7485.4686,4.864875e+06,1,1-3
19933,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7390.6206,4.864836e+06,2,140-1
19934,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7516.8415,4.864889e+06,1,13-3
19935,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7537.3219,4.864896e+06,1,113-3


#### Dataset 2 - Incomplete Dataset

In [133]:
# Data to Make Predictions on
LocIncompOOB = pd.read_csv("validationData.csv") 

In [134]:
# Remove features that have no impact on location
LocIncompOOB = LocIncompOOB.drop("USERID",  1)
LocIncompOOB = LocIncompOOB.drop("TIMESTAMP",  1)
LocIncompOOB = LocIncompOOB.drop("PHONEID",  1)
LocIncompOOB = LocIncompOOB.drop("RELATIVEPOSITION",  1)

In [135]:
# Combine FLOOR and SPACEID into LOCATIONID to create dependent variable
LocIncompOOB["LOCATIONID"] = LocIncompOOB["SPACEID"].astype(str) + "-" + LocIncompOOB["FLOOR"].astype(str)

In [136]:
# Remove FLOOR AND SPACEID
LocIncompOOB = LocIncompOOB.drop("FLOOR",  1)
LocIncompOOB = LocIncompOOB.drop("SPACEID",  1)

In [137]:
LocIncompOOB

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE,BUILDINGID,LOCATIONID
0,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7515.916799,4.864890e+06,1,0-1
1,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7383.867221,4.864840e+06,2,0-4
2,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7374.302080,4.864847e+06,2,0-4
3,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7365.824883,4.864843e+06,2,0-4
4,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7641.499303,4.864922e+06,0,0-2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1106,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7317.344231,4.864796e+06,2,0-3
1107,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7313.731120,4.864792e+06,2,0-3
1108,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7637.535798,4.864903e+06,0,0-0
1109,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7636.654005,4.864905e+06,0,0-0


### Split Data Up Into Buildings

#### Dataset 1 

In [138]:
LocComp_0 = LocCompOOB[LocCompOOB["BUILDINGID"] == 0]

In [139]:
LocComp_0

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE,BUILDINGID,LOCATIONID
4,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7632.1436,4.864982e+06,0,122-0
7119,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7637.2570,4.864950e+06,0,102-0
7120,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7628.1345,4.864977e+06,0,110-0
7121,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7622.0546,4.864976e+06,0,111-0
7122,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7636.2057,4.864967e+06,0,107-0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18492,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7661.4735,4.864934e+06,0,228-3
18493,100,100,100,100,100,100,100,100,-75,100,...,100,100,100,100,100,100,-7656.7628,4.864938e+06,0,236-3
18494,100,100,100,100,100,100,100,100,-88,100,...,100,100,100,100,100,100,-7646.3076,4.864932e+06,0,235-3
18496,100,100,100,100,100,100,100,100,-66,100,...,100,100,100,100,100,100,-7644.9453,4.864949e+06,0,230-3


In [140]:
LocComp_1 = LocCompOOB[LocCompOOB["BUILDINGID"] == 1]

In [141]:
LocComp_1

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE,BUILDINGID,LOCATIONID
0,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7541.2643,4.864921e+06,1,106-2
1,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7536.6212,4.864934e+06,1,106-2
2,100,100,100,100,100,100,100,-97,100,100,...,100,100,100,100,100,100,-7519.1524,4.864950e+06,1,103-2
3,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7524.5704,4.864934e+06,1,102-2
5,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7533.8962,4.864939e+06,1,105-2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19930,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7516.1984,4.864890e+06,1,4-3
19932,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7485.4686,4.864875e+06,1,1-3
19934,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7516.8415,4.864889e+06,1,13-3
19935,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7537.3219,4.864896e+06,1,113-3


In [142]:
LocComp_2 = LocCompOOB[LocCompOOB["BUILDINGID"] == 2]

In [143]:
LocComp_2

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE,BUILDINGID,LOCATIONID
46,100,100,100,100,100,100,100,100,100,100,...,100,100,-93,100,100,100,-7331.748000,4.864767e+06,2,247-3
47,100,100,100,100,100,100,100,100,100,100,...,100,100,-89,100,100,100,-7331.400500,4.864768e+06,2,248-3
49,100,100,100,100,100,100,100,100,100,100,...,100,100,100,-91,100,100,-7327.492165,4.864767e+06,2,247-3
50,100,100,100,100,100,100,100,100,100,100,...,100,100,-90,100,100,100,-7336.700400,4.864764e+06,2,246-3
51,100,100,100,100,100,100,100,100,100,100,...,100,100,-92,100,100,100,-7337.394800,4.864763e+06,2,245-3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19926,100,100,100,100,100,100,100,100,100,100,...,100,100,-87,100,100,100,-7331.496600,4.864768e+06,2,206-1
19927,100,100,100,100,100,100,100,100,100,100,...,100,-84,-77,100,100,100,-7390.734700,4.864835e+06,2,141-1
19928,100,100,100,100,100,100,100,100,100,100,...,100,-90,-87,100,100,100,-7331.463800,4.864769e+06,2,207-1
19931,100,100,100,100,100,100,100,100,100,100,...,100,100,-91,100,100,100,-7394.577825,4.864837e+06,2,140-1


In [144]:
# Change LOCATIONID to categorical variable
LocComp_2["LOCATIONID"] = LocComp_2["LOCATIONID"].astype("category")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  LocComp_2["LOCATIONID"] = LocComp_2["LOCATIONID"].astype("category")


In [145]:
# Delete BUILDING ID since it's not needed in individual building subsets
LocComp_2 = LocComp_2.drop("BUILDINGID",  1)

In [146]:
LocComp_2.dtypes

WAP001           int64
WAP002           int64
WAP003           int64
WAP004           int64
WAP005           int64
                ...   
WAP519           int64
WAP520           int64
LONGITUDE      float64
LATITUDE       float64
LOCATIONID    category
Length: 523, dtype: object

#### Dataset 2

In [147]:
LocIncomp_0 = LocIncompOOB[LocIncompOOB["BUILDINGID"] == 0]
LocIncomp_1 = LocIncompOOB[LocIncompOOB["BUILDINGID"] == 1]
LocIncomp_2 = LocIncompOOB[LocIncompOOB["BUILDINGID"] == 2]

In [148]:
LocIncomp_0

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE,BUILDINGID,LOCATIONID
4,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7641.499303,4.864922e+06,0,0-2
21,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7586.944816,4.864986e+06,0,0-1
28,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7674.785283,4.864934e+06,0,0-2
29,100,100,100,100,100,100,100,100,-79,100,...,100,100,100,100,100,100,-7656.475561,4.864938e+06,0,0-2
30,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7642.764986,4.865005e+06,0,0-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1048,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7641.787154,4.864902e+06,0,0-2
1049,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7592.050954,4.864978e+06,0,0-1
1108,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7637.535798,4.864903e+06,0,0-0
1109,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7636.654005,4.864905e+06,0,0-0


In [149]:
LocIncomp_1

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE,BUILDINGID,LOCATIONID
0,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7515.916799,4.864890e+06,1,0-1
16,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7559.678074,4.864887e+06,1,0-3
20,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7558.859014,4.864871e+06,1,0-3
24,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7568.985890,4.864876e+06,1,0-1
25,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7562.717100,4.864866e+06,1,0-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1073,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7530.048346,4.864958e+06,1,0-3
1074,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7530.497510,4.864957e+06,1,0-3
1075,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7537.454233,4.864898e+06,1,0-3
1076,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7557.615859,4.864887e+06,1,0-3


In [150]:
LocIncomp_2

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE,BUILDINGID,LOCATIONID
1,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7383.867221,4.864840e+06,2,0-4
2,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7374.302080,4.864847e+06,2,0-4
3,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7365.824883,4.864843e+06,2,0-4
5,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7338.807210,4.864825e+06,2,0-2
6,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7379.351683,4.864849e+06,2,0-3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1103,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7359.502802,4.864837e+06,2,0-3
1104,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7330.881524,4.864823e+06,2,0-3
1105,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7316.945210,4.864815e+06,2,0-3
1106,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7317.344231,4.864796e+06,2,0-3


In [151]:
# Delete BUILDINGID now that it's not needed
LocIncomp_0 = LocIncomp_0.drop("BUILDINGID",  1)
LocIncomp_1 = LocIncomp_1.drop("BUILDINGID",  1)
LocIncomp_2 = LocIncomp_2.drop("BUILDINGID",  1)

In [152]:
# Change LOCATIONID to categorical variable
LocIncomp_0["LOCATIONID"] = LocIncomp_0["LOCATIONID"].astype("category")
LocIncomp_1["LOCATIONID"] = LocIncomp_1["LOCATIONID"].astype("category")
LocIncomp_2["LOCATIONID"] = LocIncomp_2["LOCATIONID"].astype("category")

In [153]:
LocIncomp_0.dtypes

WAP001           int64
WAP002           int64
WAP003           int64
WAP004           int64
WAP005           int64
                ...   
WAP519           int64
WAP520           int64
LONGITUDE      float64
LATITUDE       float64
LOCATIONID    category
Length: 523, dtype: object

In [154]:
LocIncomp_1.dtypes

WAP001           int64
WAP002           int64
WAP003           int64
WAP004           int64
WAP005           int64
                ...   
WAP519           int64
WAP520           int64
LONGITUDE      float64
LATITUDE       float64
LOCATIONID    category
Length: 523, dtype: object

In [155]:
LocIncomp_2.dtypes

WAP001           int64
WAP002           int64
WAP003           int64
WAP004           int64
WAP005           int64
                ...   
WAP519           int64
WAP520           int64
LONGITUDE      float64
LATITUDE       float64
LOCATIONID    category
Length: 523, dtype: object

### Train/test sets

In [156]:
# Set Random Seed
seed = 123

#### Building 0 Dataset

In [157]:
LocComp_0

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE,BUILDINGID,LOCATIONID
4,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7632.1436,4.864982e+06,0,122-0
7119,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7637.2570,4.864950e+06,0,102-0
7120,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7628.1345,4.864977e+06,0,110-0
7121,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7622.0546,4.864976e+06,0,111-0
7122,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7636.2057,4.864967e+06,0,107-0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18492,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,-7661.4735,4.864934e+06,0,228-3
18493,100,100,100,100,100,100,100,100,-75,100,...,100,100,100,100,100,100,-7656.7628,4.864938e+06,0,236-3
18494,100,100,100,100,100,100,100,100,-88,100,...,100,100,100,100,100,100,-7646.3076,4.864932e+06,0,235-3
18496,100,100,100,100,100,100,100,100,-66,100,...,100,100,100,100,100,100,-7644.9453,4.864949e+06,0,230-3


In [158]:
LocComp_0.shape

(5249, 524)

In [159]:
# Split up dataset into independent features (x)
# Note: BUILDINGID no longer needed in individual building datasets
xLocComp_0 = LocComp_0.iloc[:,0:522]
xLocComp_0

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP513,WAP514,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE
4,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7632.1436,4.864982e+06
7119,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7637.2570,4.864950e+06
7120,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7628.1345,4.864977e+06
7121,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7622.0546,4.864976e+06
7122,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7636.2057,4.864967e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18492,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7661.4735,4.864934e+06
18493,100,100,100,100,100,100,100,100,-75,100,...,100,100,100,100,100,100,100,100,-7656.7628,4.864938e+06
18494,100,100,100,100,100,100,100,100,-88,100,...,100,100,100,100,100,100,100,100,-7646.3076,4.864932e+06
18496,100,100,100,100,100,100,100,100,-66,100,...,100,100,100,100,100,100,100,100,-7644.9453,4.864949e+06


In [160]:
# Split into the dependent features (y) and make sure it's a categorical variable
yLocComp_0 = pd.DataFrame(LocComp_0["LOCATIONID"], dtype="category")
print(yLocComp_0)
print(yLocComp_0.dtypes)

      LOCATIONID
4          122-0
7119       102-0
7120       110-0
7121       111-0
7122       107-0
...          ...
18492      228-3
18493      236-3
18494      235-3
18496      230-3
18497      229-3

[5249 rows x 1 columns]
LOCATIONID    category
dtype: object


In [161]:
xtrain_0, xtest_0, ytrain_0, ytest_0 = train_test_split(xLocComp_0, yLocComp_0, 
            test_size=.30, random_state=seed) 
print(xtrain_0.shape, xtest_0.shape)
print(ytrain_0.shape, ytest_0.shape)

(3674, 522) (1575, 522)
(3674, 1) (1575, 1)


#### Building 1 Dataset

In [162]:
LocComp_1.shape

(5196, 524)

In [163]:
# Split up dataset into independent features (x)
# Note: BUILDINGID no longer needed in individual building datasets
xLocComp_1 = LocComp_1.iloc[:,0:522]
xLocComp_1

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP513,WAP514,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE
0,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7541.2643,4.864921e+06
1,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7536.6212,4.864934e+06
2,100,100,100,100,100,100,100,-97,100,100,...,100,100,100,100,100,100,100,100,-7519.1524,4.864950e+06
3,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7524.5704,4.864934e+06
5,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7533.8962,4.864939e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19930,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7516.1984,4.864890e+06
19932,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7485.4686,4.864875e+06
19934,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7516.8415,4.864889e+06
19935,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7537.3219,4.864896e+06


In [164]:
# Split into the dependent features (y) and make sure it's a categorical variable
yLocComp_1 = pd.DataFrame(LocComp_1["LOCATIONID"], dtype="category")
print(yLocComp_1)
print(yLocComp_1.dtypes)

      LOCATIONID
0          106-2
1          106-2
2          103-2
3          102-2
5          105-2
...          ...
19930        4-3
19932        1-3
19934       13-3
19935      113-3
19936      112-3

[5196 rows x 1 columns]
LOCATIONID    category
dtype: object


In [165]:
xtrain_1, xtest_1, ytrain_1, ytest_1 = train_test_split(xLocComp_1, yLocComp_1, 
            test_size=.30, random_state=seed) 
print(xtrain_1.shape, xtest_1.shape)
print(ytrain_1.shape, ytest_1.shape)

(3637, 522) (1559, 522)
(3637, 1) (1559, 1)


#### Building 2 Dataset

In [166]:
LocComp_2.shape

(9492, 523)

In [167]:
# Split up dataset into independent features (x)
# Note: BUILDINGID no longer needed in individual building datasets
xLocComp_2 = LocComp_2.iloc[:,0:522]
xLocComp_2

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP513,WAP514,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE
46,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,-93,100,100,100,-7331.748000,4.864767e+06
47,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,-89,100,100,100,-7331.400500,4.864768e+06
49,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,-91,100,100,-7327.492165,4.864767e+06
50,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,-90,100,100,100,-7336.700400,4.864764e+06
51,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,-92,100,100,100,-7337.394800,4.864763e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19926,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,-87,100,100,100,-7331.496600,4.864768e+06
19927,100,100,100,100,100,100,100,100,100,100,...,100,100,100,-84,-77,100,100,100,-7390.734700,4.864835e+06
19928,100,100,100,100,100,100,100,100,100,100,...,100,100,100,-90,-87,100,100,100,-7331.463800,4.864769e+06
19931,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,-91,100,100,100,-7394.577825,4.864837e+06


In [168]:
# Split into the dependent features (y) and make sure it's a categorical variable
yLocComp_2 = pd.DataFrame(LocComp_2["LOCATIONID"], dtype="category")
print(yLocComp_2)
print(yLocComp_2.dtypes)

      LOCATIONID
46         247-3
47         248-3
49         247-3
50         246-3
51         245-3
...          ...
19926      206-1
19927      141-1
19928      207-1
19931      140-1
19933      140-1

[9492 rows x 1 columns]
LOCATIONID    category
dtype: object


In [169]:
xtrain_2, xtest_2, ytrain_2, ytest_2 = train_test_split(xLocComp_2, yLocComp_2, 
            test_size=.30, random_state=seed) 
print(xtrain_2.shape, xtest_2.shape)
print(ytrain_2.shape, ytest_2.shape)

(6644, 522) (2848, 522)
(6644, 1) (2848, 1)


### Modeling
Run datasets through 3 different models and evaluate performance metrics:
- Random Forest 
- K-Nearest Neighbor
- Gradient Boosting

#### Random Forest

##### Building 0

In [170]:
rf_0 = RandomForestClassifier(n_estimators=520)

In [171]:
# Note: need to use .values.ravel() here because for RF, ytrain needs to come in as
# in as 1D, or flattened. 
# .values gives the values in a numpy array shape (n,1)
# .ravel converts that array to (n,) - flattnened
rf_0.fit(xtrain_0, ytrain_0.values.ravel())

RandomForestClassifier(n_estimators=520)

In [172]:
rf_pred_0 = rf_0.predict(xtest_0)

In [173]:
rf_pred_0

array(['136-1', '124-1', '130-1', ..., '208-1', '222-0', '137-3'],
      dtype=object)

In [174]:
score_0 = cross_val_score(rf_0, xtrain_0, ytrain_0.values.ravel(), cv=3)
score_0

array([0.83510204, 0.85877551, 0.85784314])

In [175]:
# Calculate precision, recall, f1-score, and accuracy
print(classification_report(ytest_0, rf_pred_0, zero_division=0))

              precision    recall  f1-score   support

       101-1       0.60      0.60      0.60         5
       101-2       0.83      1.00      0.91         5
       101-3       1.00      0.86      0.92         7
       102-0       0.75      1.00      0.86         6
       102-1       0.83      0.77      0.80        13
       102-2       1.00      1.00      1.00        10
       102-3       1.00      0.89      0.94         9
       103-1       0.67      0.67      0.67         3
       103-2       0.50      1.00      0.67         2
       103-3       1.00      1.00      1.00         5
       104-1       0.78      0.88      0.82         8
       104-2       1.00      0.67      0.80         6
       104-3       1.00      1.00      1.00         2
       105-1       1.00      0.83      0.91         6
       105-2       1.00      1.00      1.00         9
       105-3       0.80      1.00      0.89         4
       106-0       1.00      1.00      1.00         1
       106-1       0.75    

##### Building 1

In [176]:
rf_1 = RandomForestClassifier()

In [177]:
rf_1.fit(xtrain_1, ytrain_1.values.ravel())

RandomForestClassifier()

In [178]:
rf_pred_1 = rf_1.predict(xtest_1)
rf_pred_1

array(['8-2', '203-0', '203-2', ..., '107-1', '119-0', '9-0'],
      dtype=object)

In [179]:
# Calculate precision, recall, f1-score, and accuracy
print(classification_report(ytest_1, rf_pred_1, zero_division=0))

              precision    recall  f1-score   support

         1-0       0.89      1.00      0.94         8
         1-3       1.00      0.92      0.96        12
        10-0       1.00      1.00      1.00        10
        10-2       1.00      1.00      1.00        14
       101-1       1.00      1.00      1.00        25
       101-2       1.00      1.00      1.00        16
       101-3       1.00      1.00      1.00        22
       102-1       1.00      1.00      1.00        26
       102-2       1.00      1.00      1.00         8
       102-3       0.94      1.00      0.97        16
       103-1       0.92      1.00      0.96        23
       103-2       1.00      1.00      1.00        13
       103-3       1.00      1.00      1.00        11
       104-1       1.00      0.94      0.97        32
       104-2       1.00      1.00      1.00        13
       104-3       1.00      0.95      0.98        21
       105-1       1.00      1.00      1.00        22
       105-2       1.00    

##### Building 2

In [180]:
rf_2 = RandomForestClassifier()

In [181]:
rf_2.fit(xtrain_2,ytrain_2.values.ravel())

RandomForestClassifier()

In [182]:
rf_pred_2 = rf_2.predict(xtest_2)
rf_pred_2

array(['137-3', '214-3', '109-1', ..., '126-3', '105-3', '203-4'],
      dtype=object)

In [183]:
# Calculate precision, recall, f1-score, and accuracy
print(classification_report(ytest_2, rf_pred_2, zero_division=0))

              precision    recall  f1-score   support

       101-0       0.80      0.57      0.67         7
       101-1       0.89      1.00      0.94        17
       101-2       1.00      1.00      1.00         8
       101-3       0.77      1.00      0.87        10
       101-4       1.00      1.00      1.00         7
       102-1       1.00      0.75      0.86        12
       102-2       0.91      0.95      0.93        21
       102-3       0.89      0.94      0.92        18
       102-4       1.00      1.00      1.00         6
       103-0       1.00      0.94      0.97        18
       103-1       0.78      0.88      0.82         8
       103-2       0.88      0.78      0.82         9
       103-3       1.00      0.90      0.95        10
       103-4       1.00      1.00      1.00         5
       104-0       0.79      0.94      0.86        16
       104-1       1.00      0.86      0.92         7
       104-2       1.00      1.00      1.00        11
       104-3       1.00    

#### K-Nearest Neighbor

##### Building 0

In [733]:
knn_0 = KNeighborsClassifier(n_neighbors=5)

In [734]:
knn_0.fit(xtrain_0, ytrain_0.values.ravel())

KNeighborsClassifier()

In [737]:
knn_pred_0 = knn_0.predict(xtest_0)
knn_pred_0

array(['136-1', '124-1', '130-1', ..., '210-1', '234-0', '137-3'],
      dtype=object)

In [738]:
# Calculate precision, recall, f1-score, and accuracy
print(classification_report(ytest_0, knn_pred_0, zero_division=0))

              precision    recall  f1-score   support

       101-1       0.22      0.40      0.29         5
       101-2       0.14      0.40      0.21         5
       101-3       0.20      0.29      0.24         7
       102-0       0.50      0.67      0.57         6
       102-1       0.62      0.62      0.62        13
       102-2       0.43      0.60      0.50        10
       102-3       0.40      0.44      0.42         9
       103-1       0.40      0.67      0.50         3
       103-2       0.14      1.00      0.25         2
       103-3       0.40      0.40      0.40         5
       104-1       0.50      0.75      0.60         8
       104-2       0.44      0.67      0.53         6
       104-3       0.08      0.50      0.13         2
       105-1       0.75      0.50      0.60         6
       105-2       0.46      0.67      0.55         9
       105-3       0.25      0.75      0.38         4
       106-0       0.33      1.00      0.50         1
       106-1       0.47    

##### Building 1

In [769]:
knn_1 = KNeighborsClassifier(n_neighbors=5)

In [771]:
knn_1.fit(xtrain_1, ytrain_1.values.ravel())

KNeighborsClassifier()

In [772]:
knn_pred_1 = knn_1.predict(xtest_1)
knn_pred_1

array(['9-2', '202-0', '203-2', ..., '107-1', '117-0', '10-0'],
      dtype=object)

In [773]:
# Calculate precision, recall, f1-score, and accuracy
print(classification_report(ytest_1, knn_pred_1, zero_division=0))

              precision    recall  f1-score   support

         1-0       0.80      1.00      0.89         8
         1-3       0.37      0.58      0.45        12
        10-0       0.60      0.60      0.60        10
        10-2       0.65      0.79      0.71        14
       101-1       0.71      0.68      0.69        25
       101-2       0.92      0.75      0.83        16
       101-3       0.59      0.73      0.65        22
       102-1       0.83      0.58      0.68        26
       102-2       0.57      1.00      0.73         8
       102-3       0.59      0.81      0.68        16
       103-1       0.72      0.78      0.75        23
       103-2       0.77      0.77      0.77        13
       103-3       0.92      1.00      0.96        11
       104-1       0.79      0.72      0.75        32
       104-2       1.00      0.92      0.96        13
       104-3       0.62      0.38      0.47        21
       105-1       0.80      0.55      0.65        22
       105-2       1.00    

##### Building 2

In [774]:
knn_2 = KNeighborsClassifier(n_neighbors=5)

In [775]:
knn_2.fit(xtrain_2, ytrain_2.values.ravel())

KNeighborsClassifier()

In [776]:
knn_pred_2 = knn_2.predict(xtest_2)
knn_pred_2

array(['137-3', '214-3', '123-1', ..., '126-3', '106-3', '203-4'],
      dtype=object)

In [777]:
# Calculate precision, recall, f1-score, and accuracy
print(classification_report(ytest_2, knn_pred_2, zero_division=0))

              precision    recall  f1-score   support

       101-0       0.50      0.43      0.46         7
       101-1       0.75      0.35      0.48        17
       101-2       0.37      0.88      0.52         8
       101-3       0.71      0.50      0.59        10
       101-4       1.00      1.00      1.00         7
       102-1       0.62      0.67      0.64        12
       102-2       0.50      0.48      0.49        21
       102-3       0.48      0.78      0.60        18
       102-4       1.00      1.00      1.00         6
       103-0       0.67      0.56      0.61        18
       103-1       0.38      0.38      0.38         8
       103-2       0.80      0.44      0.57         9
       103-3       0.75      0.60      0.67        10
       103-4       0.62      1.00      0.77         5
       104-0       0.58      0.88      0.70        16
       104-1       0.75      0.43      0.55         7
       104-2       0.48      0.91      0.62        11
       104-3       0.44    

#### Gradient Boosting 

##### Building 0

In [743]:
gb_0 = GradientBoostingClassifier()

In [745]:
gb_0.fit(xtrain_0, ytrain_0.values.ravel())

GradientBoostingClassifier()

In [746]:
gb_pred_0 = gb_0.predict(xtest_0)
gb_pred_0

array(['136-1', '124-1', '227-3', ..., '208-1', '233-0', '137-3'],
      dtype=object)

In [747]:
# Calculate precision, recall, f1-score, and accuracy
print(classification_report(ytest_0, gb_pred_0, zero_division=0))

              precision    recall  f1-score   support

       101-1       1.00      0.20      0.33         5
       101-2       0.40      0.40      0.40         5
       101-3       0.75      0.43      0.55         7
       102-0       0.60      0.50      0.55         6
       102-1       1.00      0.69      0.82        13
       102-2       0.71      0.50      0.59        10
       102-3       0.67      0.22      0.33         9
       103-1       1.00      1.00      1.00         3
       103-2       0.67      1.00      0.80         2
       103-3       0.80      0.80      0.80         5
       104-1       1.00      0.75      0.86         8
       104-2       0.80      0.67      0.73         6
       104-3       1.00      0.50      0.67         2
       105-1       0.50      0.33      0.40         6
       105-2       0.75      0.33      0.46         9
       105-3       0.67      1.00      0.80         4
       106-0       0.00      0.00      0.00         1
       106-1       0.80    

##### Building 1

In [778]:
gb_1 = GradientBoostingClassifier()

In [779]:
gb_1.fit(xtrain_1, ytrain_1.values.ravel())

GradientBoostingClassifier()

In [780]:
gb_pred_1 = gb_1.predict(xtest_1)
gb_pred_1

array(['8-2', '203-0', '203-2', ..., '107-1', '119-0', '10-0'],
      dtype=object)

In [781]:
# Calculate precision, recall, f1-score, and accuracy
print(classification_report(ytest_1, gb_pred_1, zero_division=0))

              precision    recall  f1-score   support

         1-0       1.00      1.00      1.00         8
         1-3       0.92      0.92      0.92        12
        10-0       0.64      0.90      0.75        10
        10-2       0.74      1.00      0.85        14
       101-1       0.96      0.92      0.94        25
       101-2       0.94      0.94      0.94        16
       101-3       0.95      0.95      0.95        22
       102-1       1.00      0.96      0.98        26
       102-2       0.88      0.88      0.88         8
       102-3       0.73      1.00      0.84        16
       103-1       1.00      1.00      1.00        23
       103-2       1.00      0.85      0.92        13
       103-3       1.00      1.00      1.00        11
       104-1       1.00      0.97      0.98        32
       104-2       1.00      0.77      0.87        13
       104-3       0.94      0.76      0.84        21
       105-1       0.85      1.00      0.92        22
       105-2       0.75    

##### Building 2

In [112]:
gb_2 = GradientBoostingClassifier()

In [113]:
gb_2.fit(xtrain_2, ytrain_2.values.ravel())

GradientBoostingClassifier()

In [114]:
gb_pred_2 = gb_2.predict(xtest_2)
gb_pred_2

array(['203-0', '212-3', '109-1', ..., '126-3', '105-3', '107-0'],
      dtype=object)

In [187]:
# Calculate precision, recall, f1-score, and accuracy
print(classification_report(ytest_2, gb_pred_2, zero_division=0))

              precision    recall  f1-score   support

       101-0       0.38      0.43      0.40         7
       101-1       1.00      1.00      1.00        17
       101-2       0.42      0.62      0.50         8
       101-3       0.80      0.40      0.53        10
       101-4       0.75      0.86      0.80         7
       102-1       0.73      0.92      0.81        12
       102-2       0.81      0.81      0.81        21
       102-3       0.63      0.67      0.65        18
       102-4       1.00      0.83      0.91         6
       103-0       0.83      0.83      0.83        18
       103-1       1.00      0.88      0.93         8
       103-2       0.53      1.00      0.69         9
       103-3       0.83      0.50      0.62        10
       103-4       1.00      1.00      1.00         5
       104-0       0.11      0.81      0.19        16
       104-1       1.00      0.86      0.92         7
       104-2       0.88      0.64      0.74        11
       104-3       0.75    

### Model Selection and Predicting Location for Incomplete Dataset 

The model with the highest average accuracy: is the Random Forest model with an average accuracy of 93% across the three buildings.

In [116]:
# Split up dataset into independent features for incomplete dataset
xLocIncomp_0 = LocIncomp_0.iloc[:,0:522]
xLocIncomp_0

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP513,WAP514,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE
4,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7641.499303,4.864922e+06
21,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7586.944816,4.864986e+06
28,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7674.785283,4.864934e+06
29,100,100,100,100,100,100,100,100,-79,100,...,100,100,100,100,100,100,100,100,-7656.475561,4.864938e+06
30,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7642.764986,4.865005e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1048,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7641.787154,4.864902e+06
1049,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7592.050954,4.864978e+06
1108,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7637.535798,4.864903e+06
1109,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7636.654005,4.864905e+06


In [117]:
xLocIncomp_1 = LocIncomp_1.iloc[:,0:522]
xLocIncomp_1

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP513,WAP514,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE
0,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7515.916799,4.864890e+06
16,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7559.678074,4.864887e+06
20,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7558.859014,4.864871e+06
24,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7568.985890,4.864876e+06
25,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7562.717100,4.864866e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1073,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7530.048346,4.864958e+06
1074,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7530.497510,4.864957e+06
1075,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7537.454233,4.864898e+06
1076,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7557.615859,4.864887e+06


In [118]:
xLocIncomp_2 = LocIncomp_2.iloc[:,0:522]
xLocIncomp_2

Unnamed: 0,WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP007,WAP008,WAP009,WAP010,...,WAP513,WAP514,WAP515,WAP516,WAP517,WAP518,WAP519,WAP520,LONGITUDE,LATITUDE
1,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7383.867221,4.864840e+06
2,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7374.302080,4.864847e+06
3,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7365.824883,4.864843e+06
5,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7338.807210,4.864825e+06
6,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7379.351683,4.864849e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1103,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7359.502802,4.864837e+06
1104,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7330.881524,4.864823e+06
1105,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7316.945210,4.864815e+06
1106,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,-7317.344231,4.864796e+06


In [184]:
# Building 0 Prediction
rfIncompPred_0 = rf_0.predict(xLocIncomp_0)
rfIncompPred_0

array(['214-2', '122-0', '228-2', '214-2', '123-1', '237-0', '229-1',
       '224-0', '106-0', '114-0', '118-0', '117-0', '120-0', '116-0',
       '110-1', '115-2', '222-1', '218-0', '216-0', '215-0', '213-0',
       '213-0', '229-1', '220-0', '212-0', '233-0', '211-0', '233-0',
       '232-0', '222-0', '232-0', '224-0', '224-0', '225-0', '235-0',
       '121-0', '110-0', '120-0', '123-0', '134-0', '101-1', '103-1',
       '114-1', '103-1', '104-1', '105-1', '110-1', '118-1', '108-1',
       '107-1', '103-1', '102-1', '222-1', '215-1', '215-1', '102-0',
       '225-0', '122-0', '125-0', '126-0', '133-0', '133-0', '128-0',
       '129-0', '130-0', '132-0', '102-0', '229-1', '233-1', '104-1',
       '105-1', '106-1', '106-1', '110-1', '111-2', '109-1', '109-1',
       '113-1', '112-1', '113-1', '110-1', '110-1', '118-1', '107-1',
       '116-0', '115-1', '117-1', '119-1', '121-1', '126-0', '123-1',
       '130-0', '122-1', '122-1', '117-1', '115-1', '138-1', '128-1',
       '102-0', '102

In [185]:
# Building 1 Prediction
rfIncompPred_1 = rf_1.predict(xLocIncomp_1)
rfIncompPred_1

array(['3-3', '108-3', '110-2', '107-1', '108-1', '106-2', '102-2',
       '105-2', '103-2', '201-2', '206-3', '216-3', '216-3', '217-3',
       '202-3', '203-3', '101-3', '104-3', '101-3', '103-3', '105-3',
       '10-0', '106-1', '106-1', '102-1', '102-1', '9-0', '2-2', '6-3',
       '1-3', '28-2', '6-2', '7-2', '6-3', '6-2', '4-3', '6-2', '7-2',
       '4-3', '6-3', '105-1', '104-1', '103-1', '103-1', '103-1', '103-1',
       '104-1', '102-1', '101-1', '106-1', '106-1', '117-1', '201-1',
       '202-1', '202-1', '204-1', '216-0', '245-0', '9-0', '116-0',
       '107-3', '110-1', '110-1', '112-2', '110-1', '110-1', '112-2',
       '110-1', '115-0', '107-1', '108-1', '2-2', '106-1', '107-1',
       '102-1', '109-1', '109-1', '105-1', '103-1', '202-1', '107-1',
       '203-1', '101-1', '28-2', '22-2', '2-2', '8-2', '10-2', '2-2',
       '17-2', '2-2', '210-1', '1-3', '209-1', '4-3', '207-1', '209-1',
       '209-1', '6-0', '207-1', '10-2', '105-1', '1-3', '206-2', '106-1',
       '26-1

In [186]:
# Building 2 Prediction
rfIncompPred_2 = rf_2.predict(xLocIncomp_2)
rfIncompPred_2

array(['101-4', '108-4', '111-4', '132-2', '107-3', '230-3', '205-2',
       '106-0', '208-2', '129-0', '132-0', '205-2', '133-0', '109-1',
       '206-4', '201-1', '110-1', '208-4', '208-4', '117-1', '143-1',
       '114-3', '129-3', '125-3', '136-1', '134-1', '133-1', '131-1',
       '112-1', '111-1', '106-1', '103-1', '143-1', '141-1', '101-1',
       '117-1', '214-1', '201-1', '204-1', '137-1', '103-2', '202-1',
       '140-3', '107-1', '209-1', '128-2', '224-1', '223-1', '126-3',
       '223-1', '218-1', '109-3', '218-1', '108-3', '208-0', '101-1',
       '102-1', '102-3', '106-4', '107-1', '116-1', '117-1', '116-1',
       '107-1', '203-3', '209-2', '107-1', '109-1', '124-0', '109-1',
       '208-2', '122-1', '133-4', '209-2', '140-3', '123-1', '140-1',
       '110-1', '125-1', '234-3', '133-4', '111-1', '101-1', '126-1',
       '126-1', '106-1', '205-2', '129-1', '129-1', '135-3', '103-1',
       '117-1', '217-2', '224-3', '105-1', '131-1', '123-1', '104-4',
       '131-1', '223