In [1073]:
import pandas as pd
import matplotlib.pyplot as plt

In [1074]:

def renameColumns(df):
    return [col.split('-', 1)[1] if '-' in col else col for col in df.columns]

def convertTime(df):
    if df["Time"].dtype == 'int64':
        df["Time"] = pd.to_datetime(df["Time"], unit='ms')    
    else:
        df['Time'] = pd.to_datetime(df['Time'], format='%Y-%m-%d %H:%M:%S')
    df.sort_values(by=['Time'], inplace=True)
    return df

In [1075]:
def readAndProcessCSV(filename):
    df = pd.read_csv(filename)
    df.columns = renameColumns(df)
    df = convertTime(df)
    return df

In [1076]:
def isWeekend(dateObj):
    return dateObj.weekday() in [5, 6]

def isWeeekday(dateObj):
    return not isWeekend(dateObj)

#Depending on on the mask created, you can get the data from the weekend or the week
def applyMask(df, mask):
    dfWeekend = df[~mask]
    dfWeekend = dfWeekend.reset_index(drop=True)
    return dfWeekend

def eliminateNightHours(df):
    return df[(df['Time'].dt.hour >= 6) & (df['Time'].dt.hour <= 23)]

def getDataOneHour(hour, df):
    return df[(df['Time'].dt.hour > hour) & (df['Time'].dt.hour <= hour + 1)]

def getDataOneDay(date, df):
    return df[(df['Time'].dt.day == date.day) & (df['Time'].dt.month == date.month) & (df['Time'].dt.year == date.year)]

def getDataThreeDays(date, df):
    return df[(df['Time'].dt.day >= date.day) & (df['Time'].dt.day <= date.day + 2) & (df['Time'].dt.month == date.month) & (df['Time'].dt.year == date.year)]

def eliminateDayHours(df):
    return df[(df['Time'].dt.hour > 6) & (df['Time'].dt.hour < 23)]

In [1077]:
def makeDfSensorSingle(df, sensorName):
    sensorColumns = ['Time'] + [col for col in df.columns if sensorName in col] 
    dfSensor = df[sensorColumns]
    dfSensor = dfSensor.dropna(axis=1, how='all')
    dfSensor = dfSensor.dropna().reset_index(drop=True)
    return dfSensor

In [1078]:
#DF that has all the data from October and November
dfOctNov = readAndProcessCSV("sensorData/08_45_18.csv")
fileNames = ["08_46_27", "08_47_03", "08_48_14", "08_48_51", "08_49_32", "08_50_13", "08_50_46"]
for fileName in fileNames:
    df = readAndProcessCSV("sensorData/" + fileName + ".csv")
    dfOctNov = pd.concat([dfOctNov, df])

#Time range of dfOctNov is 2023-10-01 to 2023-11-25
#dfOctNov["Time"].dt.date

In [1079]:
#DF that has only date time data (6 AM to 11 PM)
dfDay = eliminateNightHours(dfOctNov)
dfDay = dfDay.reset_index(drop=True)

In [1080]:

#dfWeekend - df that contains only the data from the weekend
weekendMask = dfOctNov['Time'].apply(isWeekend)
dfWeekend = applyMask(dfOctNov, weekendMask)

In [1081]:
#dfWeekday - df that contains only the data from the week
weekdayMask = dfOctNov['Time'].apply(isWeeekday)
dfWeekday = applyMask(dfOctNov, weekdayMask)

In [1082]:
dfFinal = applyMask(dfOctNov, weekendMask)
dfFinal = eliminateNightHours(dfFinal)
dfFinal = dfFinal.reset_index(drop=True)

In [1083]:
def getTemperatureMean(df, sensorName, measurementName):
    columnName = sensorName + '.' + measurementName
    dfCopy = getDataThreeDays(pd.to_datetime('2023-10-16'), df)
    return dfCopy[columnName].mean()

def getTemperatureStd(df, sensorName, measurementName):
    columnName = sensorName + '.' + measurementName
    dfCopy = getDataThreeDays(pd.to_datetime('2023-10-16'), df)
    return dfCopy[columnName].std()

temperatureMean = {}
for i in range(1, 10):
    sensorName = 'eye0' + str(i)
    temperatureMean.update({sensorName: getTemperatureMean(dfFinal, sensorName, 'temperature')})
temperatureMean.update({'eye11': getTemperatureMean(dfFinal, 'eye11', 'temperature')})
temperatureMean.update({'eye10': getTemperatureMean(dfFinal, 'eye10', 'temperature')})

for i in range(1, 7):
    sensorName = 'gas0' + str(i)
    temperatureMean.update({sensorName: getTemperatureMean(dfFinal, sensorName, 'temperature')})

for i in range(1, 6):
    sensorName = 'voc0' + str(i)
    temperatureMean.update({sensorName: getTemperatureMean(dfFinal, sensorName, 'temperature')})

#sort the dictionary by value
temperatureMean = dict(sorted(temperatureMean.items(), key=lambda item: item[1]))

# for sensorName in temperatureMean:  
#     print(sensorName, temperatureMean[sensorName])  

In [1084]:
temperatureStd = {}
for i in range(1, 10):
    sensorName = 'eye0' + str(i)
    temperatureStd.update({sensorName: getTemperatureStd(dfFinal, sensorName, 'temperature')})
temperatureStd.update({'eye11': getTemperatureStd(dfFinal, 'eye11', 'temperature')})
temperatureStd.update({'eye10': getTemperatureStd(dfFinal, 'eye10', 'temperature')})

for i in range(1, 7):
    sensorName = 'gas0' + str(i)
    temperatureStd.update({sensorName: getTemperatureStd(dfFinal, sensorName, 'temperature')})

for i in range(1, 6):
    sensorName = 'voc0' + str(i)
    temperatureStd.update({sensorName: getTemperatureStd(dfFinal, sensorName, 'temperature')})
temperatureStd = dict(sorted(temperatureStd.items(), key=lambda x: x[1]))

# for sensorName in temperatureStd:  
#     print(sensorName, temperatureStd[sensorName])  

In [1090]:
selectedSensors = ['eye03', 'eye04', 'eye05', 'eye09', 'eye11', 'gas01', 'gas02', 'gas03', 'gas04', 'gas05', 'gas06', 'voc01' 'voc02', 'voc03', 'voc04', 'voc05']
sensorEye01 = makeDfSensorSingle(dfFinal, 'eye02')
for i in selectedSensors:
    sensorDf = makeDfSensorSingle(dfFinal, i)
    sensorEye01 = pd.concat([sensorEye01, sensorDf], axis=1)

sensorEye01 = sensorEye01.drop('Time', axis=1)
sensorEye01 = sensorEye01.dropna().reset_index(drop=True)
sensorEye01.head()

TypeError: unsupported operand type(s) for +: 'float' and 'str'

In [1086]:
#sensorEye01 = sensorEye01[sensorEye01['eye01.occupancy'] == 1]
sensorEye01.head()

Unnamed: 0,eye02.humidity,eye02.light,eye02.motion,eye02.occupancy,eye02.temperature,eye02.vdd,eye03.humidity,eye03.light,eye03.motion,eye03.occupancy,...,voc04.motion,voc04.temperature,voc04.tvoc,voc04.vdd,voc05.humidity,voc05.light,voc05.motion,voc05.temperature,voc05.tvoc,voc05.vdd
0,42.0,1.0,0.0,0.0,23.5,3661.0,39.0,0.0,0.0,0.0,...,0.0,25.3,109.0,3652.0,38.0,0.0,0.0,25.8,147.0,3657.0
1,42.0,1.0,0.0,0.0,23.6,3659.0,39.0,0.0,0.0,0.0,...,0.0,25.3,117.0,3652.0,38.0,0.0,0.0,25.8,153.0,3654.0
2,42.0,1.0,0.0,0.0,23.5,3659.0,39.0,0.0,0.0,0.0,...,0.0,25.2,109.0,3652.0,38.0,0.0,0.0,25.8,161.0,3654.0
3,42.0,1.0,0.0,0.0,23.5,3659.0,39.0,0.0,0.0,0.0,...,0.0,25.3,113.0,3649.0,38.0,0.0,0.0,25.8,143.0,3654.0
4,42.0,1.0,0.0,0.0,23.5,3659.0,39.0,0.0,0.0,0.0,...,0.0,25.2,106.0,3652.0,38.0,0.0,0.0,25.8,148.0,3654.0


In [1087]:
sensorVOC01.head()

Unnamed: 0,Time,voc01.humidity,voc01.light,voc01.motion,voc01.temperature,voc01.tvoc,voc01.vdd
0,2023-10-02 06:05:18,35.0,0.0,0.0,24.7,65.0,3648.0
1,2023-10-02 06:10:18,35.0,0.0,0.0,24.6,59.0,3648.0
2,2023-10-02 06:15:18,35.0,0.0,0.0,24.7,51.0,3648.0
3,2023-10-02 06:20:25,36.0,0.0,0.0,24.6,66.0,3648.0
4,2023-10-02 06:25:18,36.0,13.0,0.0,24.6,63.0,3648.0


In [1088]:
sensorGas01.head()

Unnamed: 0,Time,gas01.co2,gas01.humidity,gas01.light,gas01.motion,gas01.temperature,gas01.vdd
0,2023-10-02 06:06:11,410.0,37.0,0.0,0.0,25.5,3667.0
1,2023-10-02 06:16:11,404.0,37.0,0.0,0.0,25.5,3667.0
2,2023-10-02 06:26:10,409.0,37.0,0.0,0.0,25.5,3670.0
3,2023-10-02 06:36:10,410.0,37.0,0.0,0.0,25.4,3667.0
4,2023-10-02 06:46:10,401.0,37.0,0.0,0.0,25.5,3670.0
