# WiSenseIndoorData, Cleaning of Data, Finding Error % in each node

# 1. Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 2. Reading WiSenseData

In [2]:
#Reading the Indoor dataset
dff = pd.read_csv('WiSenseIndoorData.csv' , header = None)  

In [3]:
#There are 14 columns
dff.columns

Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], dtype='int64')

# 3. Renaming the Columns

In [4]:
df = dff.rename(columns={0: 'timeStamp', 1: 'nodeAddress' , 2: 'packteID', 3: 'nodeRSSI', 4: 'nodeVolt', 5: 'temperature1', 6: 'temperature2', 7: 'temperature3', 8: 'pressure', 9: 'luminosity' , 10: 'rainfall', 11: 'solarPanelVolt', 12: 'solarPanelBattVolt', 13: 'solarPanelCurr'})

In [5]:
# Getting some info about Dataset
df.info()   

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 237962 entries, 0 to 237961
Data columns (total 14 columns):
timeStamp             237962 non-null object
nodeAddress           237962 non-null object
packteID              237962 non-null int64
nodeRSSI              237962 non-null int64
nodeVolt              237962 non-null float64
temperature1          237962 non-null float64
temperature2          67645 non-null float64
temperature3          237962 non-null float64
pressure              237962 non-null int64
luminosity            237962 non-null int64
rainfall              237962 non-null float64
solarPanelVolt        237962 non-null float64
solarPanelBattVolt    237962 non-null float64
solarPanelCurr        237962 non-null float64
dtypes: float64(8), int64(4), object(2)
memory usage: 25.4+ MB


In [6]:
#Copying the original dataset ('df') into data1
data1 = df.copy() 

In [7]:
#Describing different features of DataSet
data1.describe() 

Unnamed: 0,packteID,nodeRSSI,nodeVolt,temperature1,temperature2,temperature3,pressure,luminosity,rainfall,solarPanelVolt,solarPanelBattVolt,solarPanelCurr
count,237962.0,237962.0,237962.0,237962.0,67645.0,237962.0,237962.0,237962.0,237962.0,237962.0,237962.0,237962.0
mean,9366.141325,-59.811583,3.306243,23.064358,19.083289,24.076831,894.850295,1760.504908,3774.663749,0.856794,3.625573,-20.277049
std,9552.428998,21.894051,0.086869,14.662447,5.337975,19.842031,55.726966,9257.096017,16306.70267,1.425299,0.295458,28.345545
min,1.0,-111.0,2.76,-300.0,7.8,9.84,220.0,0.0,0.0,0.0,0.0,-110.5
25%,2384.0,-78.0,3.27,20.5,15.38,19.68,897.0,0.0,0.0,0.6,3.49,-30.8
50%,6119.0,-65.0,3.33,23.5,18.13,22.67,900.0,5.0,0.0,0.6,3.6,-30.3
75%,13008.0,-38.0,3.37,26.0,21.57,25.21,902.0,114.0,470.0,0.61,3.78,-5.8
max,62356.0,-20.0,3.4,43.37,46.34,320.0,908.0,65535.0,222968.0,8.75,4.24,854.3


In [8]:
#Getting total number of Unique Nodes from the DataSet
data1.groupby('nodeAddress').count()  

Unnamed: 0_level_0,timeStamp,packteID,nodeRSSI,nodeVolt,temperature1,temperature2,temperature3,pressure,luminosity,rainfall,solarPanelVolt,solarPanelBattVolt,solarPanelCurr
nodeAddress,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
fc:c2:3d:00:00:00:7a:5b,7076,7076,7076,7076,7076,1024,7076,7076,7076,7076,7076,7076,7076
fc:c2:3d:00:00:00:82:2d,12523,12523,12523,12523,12523,3393,12523,12523,12523,12523,12523,12523,12523
fc:c2:3d:00:00:00:89:e8,9211,9211,9211,9211,9211,4147,9211,9211,9211,9211,9211,9211,9211
fc:c2:3d:00:00:00:97:ad,33656,33656,33656,33656,33656,13294,33656,33656,33656,33656,33656,33656,33656
fc:c2:3d:00:00:01:10:8e,18015,18015,18015,18015,18015,6002,18015,18015,18015,18015,18015,18015,18015
fc:c2:3d:00:00:01:15:b3,15709,15709,15709,15709,15709,3704,15709,15709,15709,15709,15709,15709,15709
fc:c2:3d:00:00:01:17:ae,31113,31113,31113,31113,31113,13462,31113,31113,31113,31113,31113,31113,31113
fc:c2:3d:00:00:01:20:24,41325,41325,41325,41325,41325,12768,41325,41325,41325,41325,41325,41325,41325
fc:c2:3d:00:00:01:2f:2d,9803,9803,9803,9803,9803,3140,9803,9803,9803,9803,9803,9803,9803
fc:c2:3d:00:00:01:33:2a,23682,23682,23682,23682,23682,2855,23682,23682,23682,23682,23682,23682,23682


In [9]:
#Converting datatype of 'timeStamp' to datetime type
data1['timeStamp'] = pd.to_datetime(data1['timeStamp'])  

In [10]:
#Now we can se the data type of 'timeStamp', it is now datetime type
data1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 237962 entries, 0 to 237961
Data columns (total 14 columns):
timeStamp             237962 non-null datetime64[ns]
nodeAddress           237962 non-null object
packteID              237962 non-null int64
nodeRSSI              237962 non-null int64
nodeVolt              237962 non-null float64
temperature1          237962 non-null float64
temperature2          67645 non-null float64
temperature3          237962 non-null float64
pressure              237962 non-null int64
luminosity            237962 non-null int64
rainfall              237962 non-null float64
solarPanelVolt        237962 non-null float64
solarPanelBattVolt    237962 non-null float64
solarPanelCurr        237962 non-null float64
dtypes: datetime64[ns](1), float64(8), int64(4), object(1)
memory usage: 25.4+ MB


# Functions to Clean the DataSet

In [11]:
# Now We will create two new columns in our Dataset namely, 'temp1_changed', 'temp2_changed' 'temp3_changed' and 'pressure_changed'
# These column will contain value '1' if corresponding values are changed changed else it will contain 0
data1['temp1_changed'] = 0
data1['temp2_changed'] = 0
data1['temp3_changed'] = 0
data1['pressure_changed'] = 0

In [12]:
# Grouping the DataSet by 'temp3_changed'.
data1.groupby('temp3_changed').count()

Unnamed: 0_level_0,timeStamp,nodeAddress,packteID,nodeRSSI,nodeVolt,temperature1,temperature2,temperature3,pressure,luminosity,rainfall,solarPanelVolt,solarPanelBattVolt,solarPanelCurr,temp1_changed,temp2_changed,pressure_changed
temp3_changed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,237962,237962,237962,237962,237962,237962,67645,237962,237962,237962,237962,237962,237962,237962,237962,237962,237962


In [13]:
data1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 237962 entries, 0 to 237961
Data columns (total 18 columns):
timeStamp             237962 non-null datetime64[ns]
nodeAddress           237962 non-null object
packteID              237962 non-null int64
nodeRSSI              237962 non-null int64
nodeVolt              237962 non-null float64
temperature1          237962 non-null float64
temperature2          67645 non-null float64
temperature3          237962 non-null float64
pressure              237962 non-null int64
luminosity            237962 non-null int64
rainfall              237962 non-null float64
solarPanelVolt        237962 non-null float64
solarPanelBattVolt    237962 non-null float64
solarPanelCurr        237962 non-null float64
temp1_changed         237962 non-null int64
temp2_changed         237962 non-null int64
temp3_changed         237962 non-null int64
pressure_changed      237962 non-null int64
dtypes: datetime64[ns](1), float64(8), int64(8), object(1)
memory usage:

# Following Scripts will deal with first value of each node if it is outlier

1. We'll just check if first value of each node for a particular column is outlier (i.e temperature > 100 or temperature < 0), if it is outlier then we'll change its value to next row value

In [14]:
from tqdm import tqdm_notebook

nodes = data1['nodeAddress'].unique() # this line will create an array having total unique nodes

print('Checking Outlier for temperature1')
for n in tqdm_notebook(nodes , desc = 'Processing records for Outlier'):
    for i in range(data1.shape[0] - 1):
        if(data1.loc[i , 'nodeAddress'] == n):
            val0 = float(data1.loc[i,'temperature1'])
            if(val0 < 0 or val0 > 100):
                data1.loc[i,'temperature1'] = data1.loc[i+1,'temperature1']
                print('Outlier Found at', i , 'for node' , n)
                break
            else:
                break

print('Checking Outlier for temperature2')                
for n in tqdm_notebook(nodes , desc = 'Processing records for Outlier'):
    for i in range(data1.shape[0] - 1):
        if(data1.loc[i , 'nodeAddress'] == n):
            val0 = float(data1.loc[i,'temperature2'])
            if(val0 < 0 or val0 > 100):
                data1.loc[i,'temperature2'] = data1.loc[i+1,'temperature2']
                print('Outlier Found at',i, 'for node' , n)
                break
            else:
                break
                
                
print('Checking Outlier for temperature3')                
for n in tqdm_notebook(nodes , desc = 'Processing records for Outlier'):
    for i in range(data1.shape[0] - 1):
        if(data1.loc[i , 'nodeAddress'] == n):
            val0 = float(data1.loc[i,'temperature3'])
            if(val0 < 0 or val0 > 100):
                data1.loc[i,'temperature3'] = data1.loc[i+1,'temperature3']
                print('Outlier Found at',i, 'for node' , n)
                break
            else:
                break
                
                
print('Checking Outlier for pressure')
for n in tqdm_notebook(nodes , desc = 'Processing records for Outlier'):
    for i in range(data1.shape[0] - 1):
        if(data1.loc[i , 'nodeAddress'] == n):
            val0 = float(data1.loc[i,'pressure'])
            if(val0 < 750 or val0 > 1000):
                data1.loc[i,'pressure'] = data1.loc[i+1,'pressure']
                print('Outlier Found at',i, 'for node' , n)
                break
            else:
                break

                


Checking Outlier for temperature1


HBox(children=(IntProgress(value=0, description='Processing records for Outlier', max=13, style=ProgressStyle(…

Outlier Found at 2748 for node fc:c2:3d:00:00:00:97:ad

Checking Outlier for temperature2


HBox(children=(IntProgress(value=0, description='Processing records for Outlier', max=13, style=ProgressStyle(…


Checking Outlier for temperature3


HBox(children=(IntProgress(value=0, description='Processing records for Outlier', max=13, style=ProgressStyle(…


Checking Outlier for pressure


HBox(children=(IntProgress(value=0, description='Processing records for Outlier', max=13, style=ProgressStyle(…




# Following is the function to clean 'temp1', 'temp2', 'temp3' and 'pressure'

In [124]:
nodes = data1['nodeAddress'].unique() # this line will create an array having total unique nodes


#Function to clean 'temperature1'


def temperature1_clean(df):
    for n in nodes:
        k = 0
        for i in range(k , df.shape[0]-1):
          if(df.loc[i, 'nodeAddress'] == n):
            val0 = float(df.loc[i,'temperature1'])
            time0 = (df.loc[i,'timeStamp' ])
            for j in range(i+1, df.shape[0]-1):
              if(df.loc[j, 'nodeAddress'] == n):
                val1 = float(df.loc[j , 'temperature1'])
                time1 = (df.loc[j , 'timeStamp'])
                timedelta = time1 - time0
                minutes = timedelta.total_seconds() / 60
                
                if (abs(val1 - val0) > 10 and minutes < 30.0):
                  df.loc[j,'temperature1'] = val0
                  df.loc[j, 'temp1_changed'] = 1
                  k = j
                  break
                elif((val1) > 100  and minutes > 30.0):
                  df.loc[j,'temperature1'] = 'NaN'
                  k = j
                  break
                
                elif((val1) < 0  and minutes > 30.0):
                  df.loc[j,'temperature1'] = 'NaN'
                  k = j
                  break
                else:
                  k = j
                  break
                    
                 


#Function to clean 'temperature2'

def temperature2_clean(df):
    for n in nodes:
        k = 0
        for i in range(k , df.shape[0]-1):
          if(df.loc[i, 'nodeAddress'] == n):
            val0 = float(df.loc[i,'temperature2'])
            time0 = (df.loc[i,'timeStamp' ])
            for j in range(i+1, df.shape[0]-1):
              if(df.loc[j, 'nodeAddress'] == n):
                val1 = float(df.loc[j , 'temperature2'])
                time1 = (df.loc[j , 'timeStamp'])
                timedelta = time1 - time0
                minutes = timedelta.total_seconds() / 60
                
                if (abs(val1 - val0) > 10 and minutes < 30.0):
                  df.loc[j,'temperature2'] = val0
                  df.loc[j, 'temp2_changed'] = 1
                  k = j
                  break
                elif(((val1) > 100 or (val1) < 0 ) and minutes > 30.0):
                  df.loc[j,'temperature2'] = 'NaN'
                  k = j
                  break
                else:
                  k = j
                  break
                    
                 

#Function to clean 'temperature3'

def temperature3_clean(df):
    for n in nodes:
        k = 0
        for i in range(k , df.shape[0]-1):
          if(df.loc[i, 'nodeAddress'] == n):
            val0 = float(df.loc[i,'temperature3'])
            time0 = (df.loc[i,'timeStamp' ])
            for j in range(i+1, df.shape[0]-1):
              if(df.loc[j, 'nodeAddress'] == n):
                val1 = float(df.loc[j , 'temperature3'])
                time1 = (df.loc[j , 'timeStamp'])
                timedelta = time1 - time0
                minutes = timedelta.total_seconds() / 60
                
                if (abs(val1 - val0) > 10 and minutes < 30.0):
                  df.loc[j,'temperature3'] = val0
                  df.loc[j, 'temp3_changed'] = 1
                  k = j
                  break
                elif(((val1) > 100 or (val1) < 0 ) and minutes > 30.0):
                  df.loc[j,'temperature3'] = 'NaN'
                  k = j
                  break
                else:
                  k = j
                  break
                    
                    
                   
# Function to clean 'pressure'

def pressure_clean(df):
    for n in nodes:
        k = 0
        for i in range(k , df.shape[0]-1):
          if(df.loc[i, 'nodeAddress'] == n):
            val0 = float(df.loc[i,'pressure'])
            time0 = (df.loc[i,'timeStamp' ])
            for j in range(i+1, df.shape[0]-1):
              if(df.loc[j, 'nodeAddress'] == n):
                val1 = float(df.loc[j , 'pressure'])
                time1 = (df.loc[j , 'timeStamp'])
                timedelta = time1 - time0
                minutes = timedelta.total_seconds() / 60
                
                if (abs(val1 - val0) > 10 and minutes < 30.0):
                  df.loc[j,'pressure'] = val0
                  df.loc[j, 'pressure_changed'] = 1
                  k = j
                  break
                elif(((val1) > 1000 or (val1) < 750 ) and minutes > 30.0):
                  df.loc[j,'pressure'] = 'NaN'
                  k = j
                  break
                else:
                  k = j
                  break

"\n\n#Function to clean 'temperature2'\n\ndef temperature2_clean(df):\n    for n in nodes:\n        k = 0\n        for i in range(k , df.shape[0]-1):\n          if(df.loc[i, 'nodeAddress'] == n):\n            val0 = float(df.loc[i,'temperature2'])\n            time0 = (df.loc[i,'timeStamp' ])\n            for j in range(i+1, df.shape[0]-1):\n              if(df.loc[j, 'nodeAddress'] == n):\n                val1 = float(df.loc[j , 'temperature2'])\n                time1 = (df.loc[j , 'timeStamp'])\n                timedelta = time1 - time0\n                minutes = timedelta.total_seconds() / 60\n                \n                if (abs(val1 - val0) > 10 and minutes < 30.0):\n                  df.loc[j,'temperature2'] = val0\n                  df.loc[j, 'temp2_changed'] = 1\n                  k = j\n                  break\n                elif(((val1) > 100 or (val1) < 0 ) and minutes > 30.0):\n                  df.loc[j,'temperature2'] = 'NaN'\n                  k = j\n             

In [125]:
temperature1_clean(data1)
temperature2_clean(data1)
temperature3_clean(data1)
pressure_clean(data1)

In [126]:
data1.describe()

Unnamed: 0,packteID,nodeRSSI,nodeVolt,temperature1,temperature2,temperature3,pressure,luminosity,rainfall,solarPanelVolt,solarPanelBattVolt,solarPanelCurr,temp1_changed,temp2_changed,temp3_changed,pressure_changed
count,237962.0,237962.0,237962.0,237962.0,67645.0,237962.0,237962.0,237962.0,237962.0,237962.0,237962.0,237962.0,237962.0,237962.0,237962.0,237962.0
mean,9366.141325,-59.811583,3.306243,23.740546,19.083289,24.076831,894.850295,1760.504908,3774.663749,0.856794,3.625573,-20.277049,0.009199,0.0,0.0,0.0
std,9552.428998,21.894051,0.086869,4.526312,5.337975,19.842031,55.726966,9257.096017,16306.70267,1.425299,0.295458,28.345545,0.095469,0.0,0.0,0.0
min,1.0,-111.0,2.76,0.0,7.8,9.84,220.0,0.0,0.0,0.0,0.0,-110.5,0.0,0.0,0.0,0.0
25%,2384.0,-78.0,3.27,20.5,15.38,19.68,897.0,0.0,0.0,0.6,3.49,-30.8,0.0,0.0,0.0,0.0
50%,6119.0,-65.0,3.33,23.5,18.13,22.67,900.0,5.0,0.0,0.6,3.6,-30.3,0.0,0.0,0.0,0.0
75%,13008.0,-38.0,3.37,26.0,21.57,25.21,902.0,114.0,470.0,0.61,3.78,-5.8,0.0,0.0,0.0,0.0
max,62356.0,-20.0,3.4,39.12,46.34,320.0,908.0,65535.0,222968.0,8.75,4.24,854.3,1.0,0.0,0.0,0.0


In [69]:
df.describe()

Unnamed: 0,packteID,nodeRSSI,nodeVolt,temperature1,temperature2,temperature3,pressure,luminosity,rainfall,solarPanelVolt,solarPanelBattVolt,solarPanelCurr
count,237962.0,237962.0,237962.0,237962.0,67645.0,237962.0,237962.0,237962.0,237962.0,237962.0,237962.0,237962.0
mean,9366.141325,-59.811583,3.306243,23.064358,19.083289,24.076831,894.850295,1760.504908,3774.663749,0.856794,3.625573,-20.277049
std,9552.428998,21.894051,0.086869,14.662447,5.337975,19.842031,55.726966,9257.096017,16306.70267,1.425299,0.295458,28.345545
min,1.0,-111.0,2.76,-300.0,7.8,9.84,220.0,0.0,0.0,0.0,0.0,-110.5
25%,2384.0,-78.0,3.27,20.5,15.38,19.68,897.0,0.0,0.0,0.6,3.49,-30.8
50%,6119.0,-65.0,3.33,23.5,18.13,22.67,900.0,5.0,0.0,0.6,3.6,-30.3
75%,13008.0,-38.0,3.37,26.0,21.57,25.21,902.0,114.0,470.0,0.61,3.78,-5.8
max,62356.0,-20.0,3.4,43.37,46.34,320.0,908.0,65535.0,222968.0,8.75,4.24,854.3


In [127]:
np.where(df.temperature1 == 0)

(array([190686, 190687, 190688, 190689, 190690, 190692, 190693, 190694,
        190695, 190696, 190697, 190698, 190700, 190701, 190703, 190704,
        190705, 190706, 190707, 190708, 190709, 190710, 190711, 190712,
        190715, 190716, 190717, 190718, 190719, 190720, 190721, 190722,
        190723, 190724, 190725, 190726, 190727, 190728, 190729, 190730,
        190731, 190733, 190734, 190736, 190737, 190738, 190739, 190740,
        190741, 190742, 190743, 190745, 190746, 190747, 190748, 190749,
        190750, 190751, 190752, 190753, 190754, 190755, 190756, 190757,
        190758, 190759, 190761, 190762, 190763, 190764, 190765, 190766,
        190767, 190769, 190770, 190771, 190772, 190773, 190774, 190775,
        190776, 190777, 190778, 190779, 190780, 190781, 190782, 190783,
        190784, 190786, 190840, 190841, 190844, 190845, 190846, 190848,
        190849, 190850, 190853, 190859, 190861, 190862, 190871, 190872,
        190873, 190875, 190884, 190885, 190886, 190887, 190888, 

In [56]:
data1[2748:2750]

Unnamed: 0,timeStamp,nodeAddress,packteID,nodeRSSI,nodeVolt,temperature1,temperature2,temperature3,pressure,luminosity,rainfall,solarPanelVolt,solarPanelBattVolt,solarPanelCurr,temp1_changed,temp2_changed,temp3_changed,pressure_changed
2748,2019-01-07 14:51:25,fc:c2:3d:00:00:00:97:ad,5527,-59,3.13,-300.0,16.06,19.05,903,0,0.0,0.56,3.14,-5.2,0,0,0,0
2749,2019-01-07 14:51:54,fc:c2:3d:00:00:01:79:b2,5528,-71,3.1,20.12,16.06,19.36,903,468,0.0,0.55,3.15,-5.0,0,0,0,0


In [100]:
data1.head()

Unnamed: 0,timeStamp,nodeAddress,packteID,nodeRSSI,nodeVolt,temperature1,temperature2,temperature3,pressure,luminosity,rainfall,solarPanelVolt,solarPanelBattVolt,solarPanelCurr
0,2019-01-04 16:27:08,fc:c2:3d:00:00:01:17:ae,2686,-75,2.97,20.87,18.81,20.22,901,244,0.0,0.55,3.05,-5.4
1,2019-01-04 16:27:58,fc:c2:3d:00:00:00:89:e8,2687,-74,3.23,19.87,18.81,19.85,902,290,0.0,0.6,3.62,-5.7
2,2019-01-04 16:28:09,fc:c2:3d:00:00:01:17:ae,2688,-82,2.97,21.0,18.81,20.23,901,243,0.0,0.55,3.04,-5.2
3,2019-01-04 16:29:10,fc:c2:3d:00:00:01:17:ae,2689,-80,2.97,20.87,19.52,20.24,901,249,0.0,0.55,3.05,-5.4
4,2019-01-04 16:29:59,fc:c2:3d:00:00:01:10:8e,2690,-75,3.28,20.87,19.52,20.78,902,274,0.0,0.6,3.66,-5.4


In [101]:
np.where(data1.temperature1 == -300)

(array([2748, 2750, 2752, 2756, 2759, 2762, 2764, 2766, 2771, 2773, 2776,
        2778, 2780, 2785, 2787, 2790, 2792, 2794, 2799, 2801, 2804, 2806,
        2809, 2813, 2815, 2818, 2820, 2822, 2827, 2829, 2832, 2834, 2837,
        2841, 2843, 2846, 2848, 2851, 2855, 2857, 2860, 2862, 2865, 2869,
        2871, 2874, 2876, 2880, 2883, 2885, 2888, 2890, 2894, 2897, 2899,
        2902, 2904, 2908, 2911, 2913, 2916, 2919, 2923, 2925, 2928, 2930,
        2933, 2937, 2939, 2942, 2944, 2947, 2951, 2953, 2956, 2958, 2962,
        2965, 2967, 2970, 2972, 2976, 2979, 2981, 2984, 2986, 2989, 2993,
        2995, 2998, 3000, 3003, 3007, 3009, 3012, 3014, 3017, 3021, 3023,
        3026, 3028, 3032, 3035, 3037, 3040, 3042, 3046, 3049, 3051, 3054,
        3056, 3060, 3063, 3065, 3068, 3071, 3074, 3077, 3079, 3082, 3085,
        3089, 3091, 3094, 3096, 3100, 3103, 3105, 3108, 3110, 3115, 3119,
        3122, 3126, 3129, 3134, 3138, 3141, 3145, 3149, 3154, 3159, 3162,
        3166, 3170, 3174, 3179, 3182, 

In [112]:
for n in nodes:
    for i in range(data1.shape[0] - 1):
        if(data1.loc[i , 'nodeAddress'] == n):
            val0 = float(data1.loc[i,'temperature1'])
            if(val0 < -5 or val0 > 100):
                data1.loc[i,'temperature1'] = data1.loc[i+1,'temperature1']
                break

In [113]:
np.where(data1.temperature1 == -300)

(array([2750, 2752, 2756, 2759, 2762, 2764, 2766, 2771, 2773, 2776, 2778,
        2780, 2785, 2787, 2790, 2792, 2794, 2799, 2801, 2804, 2806, 2809,
        2813, 2815, 2818, 2820, 2822, 2827, 2829, 2832, 2834, 2837, 2841,
        2843, 2846, 2848, 2851, 2855, 2857, 2860, 2862, 2865, 2869, 2871,
        2874, 2876, 2880, 2883, 2885, 2888, 2890, 2894, 2897, 2899, 2902,
        2904, 2908, 2911, 2913, 2916, 2919, 2923, 2925, 2928, 2930, 2933,
        2937, 2939, 2942, 2944, 2947, 2951, 2953, 2956, 2958, 2962, 2965,
        2967, 2970, 2972, 2976, 2979, 2981, 2984, 2986, 2989, 2993, 2995,
        2998, 3000, 3003, 3007, 3009, 3012, 3014, 3017, 3021, 3023, 3026,
        3028, 3032, 3035, 3037, 3040, 3042, 3046, 3049, 3051, 3054, 3056,
        3060, 3063, 3065, 3068, 3071, 3074, 3077, 3079, 3082, 3085, 3089,
        3091, 3094, 3096, 3100, 3103, 3105, 3108, 3110, 3115, 3119, 3122,
        3126, 3129, 3134, 3138, 3141, 3145, 3149, 3154, 3159, 3162, 3166,
        3170, 3174, 3179, 3182, 3186, 

In [115]:
data1[2748:2752]

Unnamed: 0,timeStamp,nodeAddress,packteID,nodeRSSI,nodeVolt,temperature1,temperature2,temperature3,pressure,luminosity,rainfall,solarPanelVolt,solarPanelBattVolt,solarPanelCurr
2748,2019-01-07 14:51:25,fc:c2:3d:00:00:00:97:ad,5527,-59,3.13,20.12,16.06,19.05,903,0,0.0,0.56,3.14,-5.2
2749,2019-01-07 14:51:54,fc:c2:3d:00:00:01:79:b2,5528,-71,3.1,20.12,16.06,19.36,903,468,0.0,0.55,3.15,-5.0
2750,2019-01-07 14:52:30,fc:c2:3d:00:00:00:97:ad,5529,-60,3.13,-300.0,16.76,19.08,903,0,0.0,0.56,3.14,-5.3
2751,2019-01-07 14:52:56,fc:c2:3d:00:00:01:79:b2,5530,-74,3.1,20.12,16.06,19.38,903,463,0.0,0.55,3.14,-5.0
